From 1680ac7a5ad578d7acf819912557ceea4b4a5a88 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Fri, 7 Jul 2023 16:18:29 -0700 Subject: Input: gameport - use IS_REACHABLE() instead of open-coding it Replace an open-coded preprocessor conditional with an equivalent helper. Reviewed-by: Randy Dunlap Link: https://lore.kernel.org/r/ZKYLLmsdCH0Gp7TO@google.com Signed-off-by: Dmitry Torokhov --- include/linux/gameport.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/gameport.h b/include/linux/gameport.h index 0a221e768ea4..07e370113b2b 100644 --- a/include/linux/gameport.h +++ b/include/linux/gameport.h @@ -63,7 +63,7 @@ struct gameport_driver { int gameport_open(struct gameport *gameport, struct gameport_driver *drv, int mode); void gameport_close(struct gameport *gameport); -#if defined(CONFIG_GAMEPORT) || (defined(MODULE) && defined(CONFIG_GAMEPORT_MODULE)) +#if IS_REACHABLE(CONFIG_GAMEPORT) void __gameport_register_port(struct gameport *gameport, struct module *owner); /* use a define to avoid include chaining to get THIS_MODULE */ -- cgit v1.2.3 From 8ce49c2a2aa53afde9a20a8ce02b069d3b262af0 Mon Sep 17 00:00:00 2001 From: Deepak Kumar Singh Date: Fri, 7 Jul 2023 03:11:36 +0530 Subject: rpmsg: core: Add signal API support Some transports like Glink support the state notifications between clients using flow control signals similar to serial protocol signals. Local glink client drivers can send and receive flow control status to glink clients running on remote processors. Add APIs to support sending and receiving of flow control status by rpmsg clients. Signed-off-by: Deepak Kumar Singh Signed-off-by: Sarannya S Acked-by: Arnaud Pouliquen Link: https://lore.kernel.org/r/1688679698-31274-2-git-send-email-quic_sarannya@quicinc.com Signed-off-by: Bjorn Andersson --- drivers/rpmsg/rpmsg_core.c | 21 +++++++++++++++++++++ drivers/rpmsg/rpmsg_internal.h | 2 ++ include/linux/rpmsg.h | 15 +++++++++++++++ 3 files changed, 38 insertions(+) (limited to 'include/linux') diff --git a/drivers/rpmsg/rpmsg_core.c b/drivers/rpmsg/rpmsg_core.c index 5039df757127..32b550c91d9f 100644 --- a/drivers/rpmsg/rpmsg_core.c +++ b/drivers/rpmsg/rpmsg_core.c @@ -330,6 +330,25 @@ int rpmsg_trysend_offchannel(struct rpmsg_endpoint *ept, u32 src, u32 dst, } EXPORT_SYMBOL(rpmsg_trysend_offchannel); +/** + * rpmsg_set_flow_control() - request remote to pause/resume transmission + * @ept: the rpmsg endpoint + * @pause: pause transmission + * @dst: destination address of the endpoint + * + * Return: 0 on success and an appropriate error value on failure. + */ +int rpmsg_set_flow_control(struct rpmsg_endpoint *ept, bool pause, u32 dst) +{ + if (WARN_ON(!ept)) + return -EINVAL; + if (!ept->ops->set_flow_control) + return -EOPNOTSUPP; + + return ept->ops->set_flow_control(ept, pause, dst); +} +EXPORT_SYMBOL_GPL(rpmsg_set_flow_control); + /** * rpmsg_get_mtu() - get maximum transmission buffer size for sending message. * @ept: the rpmsg endpoint @@ -539,6 +558,8 @@ static int rpmsg_dev_probe(struct device *dev) rpdev->ept = ept; rpdev->src = ept->addr; + + ept->flow_cb = rpdrv->flowcontrol; } err = rpdrv->probe(rpdev); diff --git a/drivers/rpmsg/rpmsg_internal.h b/drivers/rpmsg/rpmsg_internal.h index 39b646d0d40d..b950d6f790a3 100644 --- a/drivers/rpmsg/rpmsg_internal.h +++ b/drivers/rpmsg/rpmsg_internal.h @@ -55,6 +55,7 @@ struct rpmsg_device_ops { * @trysendto: see @rpmsg_trysendto(), optional * @trysend_offchannel: see @rpmsg_trysend_offchannel(), optional * @poll: see @rpmsg_poll(), optional + * @set_flow_control: see @rpmsg_set_flow_control(), optional * @get_mtu: see @rpmsg_get_mtu(), optional * * Indirection table for the operations that a rpmsg backend should implement. @@ -75,6 +76,7 @@ struct rpmsg_endpoint_ops { void *data, int len); __poll_t (*poll)(struct rpmsg_endpoint *ept, struct file *filp, poll_table *wait); + int (*set_flow_control)(struct rpmsg_endpoint *ept, bool pause, u32 dst); ssize_t (*get_mtu)(struct rpmsg_endpoint *ept); }; diff --git a/include/linux/rpmsg.h b/include/linux/rpmsg.h index 523c98b96cb4..90d8e4475f80 100644 --- a/include/linux/rpmsg.h +++ b/include/linux/rpmsg.h @@ -64,12 +64,14 @@ struct rpmsg_device { }; typedef int (*rpmsg_rx_cb_t)(struct rpmsg_device *, void *, int, void *, u32); +typedef int (*rpmsg_flowcontrol_cb_t)(struct rpmsg_device *, void *, bool); /** * struct rpmsg_endpoint - binds a local rpmsg address to its user * @rpdev: rpmsg channel device * @refcount: when this drops to zero, the ept is deallocated * @cb: rx callback handler + * @flow_cb: remote flow control callback handler * @cb_lock: must be taken before accessing/changing @cb * @addr: local rpmsg address * @priv: private data for the driver's use @@ -92,6 +94,7 @@ struct rpmsg_endpoint { struct rpmsg_device *rpdev; struct kref refcount; rpmsg_rx_cb_t cb; + rpmsg_flowcontrol_cb_t flow_cb; struct mutex cb_lock; u32 addr; void *priv; @@ -106,6 +109,7 @@ struct rpmsg_endpoint { * @probe: invoked when a matching rpmsg channel (i.e. device) is found * @remove: invoked when the rpmsg channel is removed * @callback: invoked when an inbound message is received on the channel + * @flowcontrol: invoked when remote side flow control request is received */ struct rpmsg_driver { struct device_driver drv; @@ -113,6 +117,7 @@ struct rpmsg_driver { int (*probe)(struct rpmsg_device *dev); void (*remove)(struct rpmsg_device *dev); int (*callback)(struct rpmsg_device *, void *, int, void *, u32); + int (*flowcontrol)(struct rpmsg_device *, void *, bool); }; static inline u16 rpmsg16_to_cpu(struct rpmsg_device *rpdev, __rpmsg16 val) @@ -192,6 +197,8 @@ __poll_t rpmsg_poll(struct rpmsg_endpoint *ept, struct file *filp, ssize_t rpmsg_get_mtu(struct rpmsg_endpoint *ept); +int rpmsg_set_flow_control(struct rpmsg_endpoint *ept, bool pause, u32 dst); + #else static inline int rpmsg_register_device_override(struct rpmsg_device *rpdev, @@ -316,6 +323,14 @@ static inline ssize_t rpmsg_get_mtu(struct rpmsg_endpoint *ept) return -ENXIO; } +static inline int rpmsg_set_flow_control(struct rpmsg_endpoint *ept, bool pause, u32 dst) +{ + /* This shouldn't be possible */ + WARN_ON(1); + + return -ENXIO; +} + #endif /* IS_ENABLED(CONFIG_RPMSG) */ /* use a macro to avoid include chaining to get THIS_MODULE */ -- cgit v1.2.3 From f247f08da0ce822de0d6b2feec811dd6d4d599ce Mon Sep 17 00:00:00 2001 From: Siddharth Gupta Date: Fri, 24 Feb 2023 13:17:06 -0800 Subject: remoteproc: core: Export the rproc coredump APIs The remoteproc coredump APIs are currently only part of the internal remoteproc header. This prevents the remoteproc platform drivers from using these APIs when needed. This change moves the rproc_coredump() and rproc_coredump_cleanup() APIs to the linux header and marks them as exported symbols. Signed-off-by: Siddharth Gupta Signed-off-by: Gokul krishna Krishnakumar Link: https://lore.kernel.org/r/20230224211707.30916-2-quic_gokukris@quicinc.com Signed-off-by: Bjorn Andersson --- drivers/remoteproc/remoteproc_coredump.c | 2 ++ drivers/remoteproc/remoteproc_internal.h | 4 ---- include/linux/remoteproc.h | 4 ++++ 3 files changed, 6 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/drivers/remoteproc/remoteproc_coredump.c b/drivers/remoteproc/remoteproc_coredump.c index bc0e1603a7a3..6ede8c0c93ad 100644 --- a/drivers/remoteproc/remoteproc_coredump.c +++ b/drivers/remoteproc/remoteproc_coredump.c @@ -32,6 +32,7 @@ void rproc_coredump_cleanup(struct rproc *rproc) kfree(entry); } } +EXPORT_SYMBOL_GPL(rproc_coredump_cleanup); /** * rproc_coredump_add_segment() - add segment of device memory to coredump @@ -327,6 +328,7 @@ void rproc_coredump(struct rproc *rproc) */ wait_for_completion(&dump_state.dump_done); } +EXPORT_SYMBOL_GPL(rproc_coredump); /** * rproc_coredump_using_sections() - perform coredump using section headers diff --git a/drivers/remoteproc/remoteproc_internal.h b/drivers/remoteproc/remoteproc_internal.h index d4dbb8d1d80c..f62a82d71dfa 100644 --- a/drivers/remoteproc/remoteproc_internal.h +++ b/drivers/remoteproc/remoteproc_internal.h @@ -76,10 +76,6 @@ extern struct class rproc_class; int rproc_init_sysfs(void); void rproc_exit_sysfs(void); -/* from remoteproc_coredump.c */ -void rproc_coredump_cleanup(struct rproc *rproc); -void rproc_coredump(struct rproc *rproc); - #ifdef CONFIG_REMOTEPROC_CDEV void rproc_init_cdev(void); void rproc_exit_cdev(void); diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h index fe8978eb69f1..b4795698d8c2 100644 --- a/include/linux/remoteproc.h +++ b/include/linux/remoteproc.h @@ -690,6 +690,10 @@ int rproc_detach(struct rproc *rproc); int rproc_set_firmware(struct rproc *rproc, const char *fw_name); void rproc_report_crash(struct rproc *rproc, enum rproc_crash_type type); void *rproc_da_to_va(struct rproc *rproc, u64 da, size_t len, bool *is_iomem); + +/* from remoteproc_coredump.c */ +void rproc_coredump_cleanup(struct rproc *rproc); +void rproc_coredump(struct rproc *rproc); void rproc_coredump_using_sections(struct rproc *rproc); int rproc_coredump_add_segment(struct rproc *rproc, dma_addr_t da, size_t size); int rproc_coredump_add_custom_segment(struct rproc *rproc, -- cgit v1.2.3 From b4f78ff746ec5274fffa92fa2a4dc531360b5016 Mon Sep 17 00:00:00 2001 From: Uwe Kleine-König Date: Fri, 14 Jul 2023 22:56:14 +0200 Subject: pwm: Use a consistent name for pwm_chip pointers in the core MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Most variables of type struct pwm_chip * are named "chip", there are only three outliers called "pc". Change these three to "chip", too, for consistency. Signed-off-by: Uwe Kleine-König Signed-off-by: Thierry Reding --- drivers/pwm/core.c | 28 ++++++++++++++-------------- include/linux/pwm.h | 6 +++--- 2 files changed, 17 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/drivers/pwm/core.c b/drivers/pwm/core.c index 3dacceaef4a9..8c798753c016 100644 --- a/drivers/pwm/core.c +++ b/drivers/pwm/core.c @@ -127,28 +127,28 @@ static int pwm_device_request(struct pwm_device *pwm, const char *label) } struct pwm_device * -of_pwm_xlate_with_flags(struct pwm_chip *pc, const struct of_phandle_args *args) +of_pwm_xlate_with_flags(struct pwm_chip *chip, const struct of_phandle_args *args) { struct pwm_device *pwm; - if (pc->of_pwm_n_cells < 2) + if (chip->of_pwm_n_cells < 2) return ERR_PTR(-EINVAL); /* flags in the third cell are optional */ if (args->args_count < 2) return ERR_PTR(-EINVAL); - if (args->args[0] >= pc->npwm) + if (args->args[0] >= chip->npwm) return ERR_PTR(-EINVAL); - pwm = pwm_request_from_chip(pc, args->args[0], NULL); + pwm = pwm_request_from_chip(chip, args->args[0], NULL); if (IS_ERR(pwm)) return pwm; pwm->args.period = args->args[1]; pwm->args.polarity = PWM_POLARITY_NORMAL; - if (pc->of_pwm_n_cells >= 3) { + if (chip->of_pwm_n_cells >= 3) { if (args->args_count > 2 && args->args[2] & PWM_POLARITY_INVERTED) pwm->args.polarity = PWM_POLARITY_INVERSED; } @@ -158,18 +158,18 @@ of_pwm_xlate_with_flags(struct pwm_chip *pc, const struct of_phandle_args *args) EXPORT_SYMBOL_GPL(of_pwm_xlate_with_flags); struct pwm_device * -of_pwm_single_xlate(struct pwm_chip *pc, const struct of_phandle_args *args) +of_pwm_single_xlate(struct pwm_chip *chip, const struct of_phandle_args *args) { struct pwm_device *pwm; - if (pc->of_pwm_n_cells < 1) + if (chip->of_pwm_n_cells < 1) return ERR_PTR(-EINVAL); /* validate that one cell is specified, optionally with flags */ if (args->args_count != 1 && args->args_count != 2) return ERR_PTR(-EINVAL); - pwm = pwm_request_from_chip(pc, 0, NULL); + pwm = pwm_request_from_chip(chip, 0, NULL); if (IS_ERR(pwm)) return pwm; @@ -692,7 +692,7 @@ static struct pwm_device *of_pwm_get(struct device *dev, struct device_node *np, struct pwm_device *pwm = NULL; struct of_phandle_args args; struct device_link *dl; - struct pwm_chip *pc; + struct pwm_chip *chip; int index = 0; int err; @@ -709,16 +709,16 @@ static struct pwm_device *of_pwm_get(struct device *dev, struct device_node *np, return ERR_PTR(err); } - pc = fwnode_to_pwmchip(of_fwnode_handle(args.np)); - if (IS_ERR(pc)) { - if (PTR_ERR(pc) != -EPROBE_DEFER) + chip = fwnode_to_pwmchip(of_fwnode_handle(args.np)); + if (IS_ERR(chip)) { + if (PTR_ERR(chip) != -EPROBE_DEFER) pr_err("%s(): PWM chip not found\n", __func__); - pwm = ERR_CAST(pc); + pwm = ERR_CAST(chip); goto put; } - pwm = pc->of_xlate(pc, &args); + pwm = chip->of_xlate(chip, &args); if (IS_ERR(pwm)) goto put; diff --git a/include/linux/pwm.h b/include/linux/pwm.h index 04ae1d9073a7..d2f9f690a9c1 100644 --- a/include/linux/pwm.h +++ b/include/linux/pwm.h @@ -298,7 +298,7 @@ struct pwm_chip { int base; unsigned int npwm; - struct pwm_device * (*of_xlate)(struct pwm_chip *pc, + struct pwm_device * (*of_xlate)(struct pwm_chip *chip, const struct of_phandle_args *args); unsigned int of_pwm_n_cells; @@ -395,9 +395,9 @@ struct pwm_device *pwm_request_from_chip(struct pwm_chip *chip, unsigned int index, const char *label); -struct pwm_device *of_pwm_xlate_with_flags(struct pwm_chip *pc, +struct pwm_device *of_pwm_xlate_with_flags(struct pwm_chip *chip, const struct of_phandle_args *args); -struct pwm_device *of_pwm_single_xlate(struct pwm_chip *pc, +struct pwm_device *of_pwm_single_xlate(struct pwm_chip *chip, const struct of_phandle_args *args); struct pwm_device *pwm_get(struct device *dev, const char *con_id); -- cgit v1.2.3 From 9e70a5e109a4a23367810de09be826c52d27ee2f Mon Sep 17 00:00:00 2001 From: John Ogness Date: Mon, 17 Jul 2023 21:52:06 +0206 Subject: printk: Add per-console suspended state Currently the global @console_suspended is used to determine if consoles are in a suspended state. Its primary purpose is to allow usage of the console_lock when suspended without causing console printing. It is synchronized by the console_lock. Rather than relying on the console_lock to determine suspended state, make it an official per-console state that is set within console->flags. This allows the state to be queried via SRCU. Remove @console_suspended. Console printing will still be avoided when suspended because console_is_usable() returns false when the new suspended flag is set for that console. Signed-off-by: John Ogness Reviewed-by: Sergey Senozhatsky Reviewed-by: Petr Mladek Signed-off-by: Petr Mladek Link: https://lore.kernel.org/r/20230717194607.145135-7-john.ogness@linutronix.de --- include/linux/console.h | 3 ++ kernel/printk/printk.c | 74 +++++++++++++++++++++++++++++-------------------- 2 files changed, 47 insertions(+), 30 deletions(-) (limited to 'include/linux') diff --git a/include/linux/console.h b/include/linux/console.h index d3195664baa5..7de11c763eb3 100644 --- a/include/linux/console.h +++ b/include/linux/console.h @@ -154,6 +154,8 @@ static inline int con_debug_leave(void) * receiving the printk spam for obvious reasons. * @CON_EXTENDED: The console supports the extended output format of * /dev/kmesg which requires a larger output buffer. + * @CON_SUSPENDED: Indicates if a console is suspended. If true, the + * printing callbacks must not be called. */ enum cons_flags { CON_PRINTBUFFER = BIT(0), @@ -163,6 +165,7 @@ enum cons_flags { CON_ANYTIME = BIT(4), CON_BRL = BIT(5), CON_EXTENDED = BIT(6), + CON_SUSPENDED = BIT(7), }; /** diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 6e853a1441a7..efe577477913 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -86,7 +86,7 @@ EXPORT_SYMBOL(oops_in_progress); static DEFINE_MUTEX(console_mutex); /* - * console_sem protects updates to console->seq and console_suspended, + * console_sem protects updates to console->seq * and also provides serialization for console printing. */ static DEFINE_SEMAPHORE(console_sem); @@ -359,7 +359,7 @@ static bool panic_in_progress(void) * paths in the console code where we end up in places I want * locked without the console semaphore held). */ -static int console_locked, console_suspended; +static int console_locked; /* * Array of consoles built from command line options (console=) @@ -2549,22 +2549,46 @@ MODULE_PARM_DESC(console_no_auto_verbose, "Disable console loglevel raise to hig */ void suspend_console(void) { + struct console *con; + if (!console_suspend_enabled) return; pr_info("Suspending console(s) (use no_console_suspend to debug)\n"); pr_flush(1000, true); - console_lock(); - console_suspended = 1; - up_console_sem(); + + console_list_lock(); + for_each_console(con) + console_srcu_write_flags(con, con->flags | CON_SUSPENDED); + console_list_unlock(); + + /* + * Ensure that all SRCU list walks have completed. All printing + * contexts must be able to see that they are suspended so that it + * is guaranteed that all printing has stopped when this function + * completes. + */ + synchronize_srcu(&console_srcu); } void resume_console(void) { + struct console *con; + if (!console_suspend_enabled) return; - down_console_sem(); - console_suspended = 0; - console_unlock(); + + console_list_lock(); + for_each_console(con) + console_srcu_write_flags(con, con->flags & ~CON_SUSPENDED); + console_list_unlock(); + + /* + * Ensure that all SRCU list walks have completed. All printing + * contexts must be able to see they are no longer suspended so + * that they are guaranteed to wake up and resume printing. + */ + synchronize_srcu(&console_srcu); + pr_flush(1000, true); } @@ -2623,8 +2647,6 @@ void console_lock(void) msleep(1000); down_console_sem(); - if (console_suspended) - return; console_locked = 1; console_may_schedule = 1; } @@ -2645,10 +2667,6 @@ int console_trylock(void) return 0; if (down_trylock_console_sem()) return 0; - if (console_suspended) { - up_console_sem(); - return 0; - } console_locked = 1; console_may_schedule = 0; return 1; @@ -2674,6 +2692,9 @@ static inline bool console_is_usable(struct console *con) if (!(flags & CON_ENABLED)) return false; + if ((flags & CON_SUSPENDED)) + return false; + if (!con->write) return false; @@ -2992,11 +3013,6 @@ void console_unlock(void) bool flushed; u64 next_seq; - if (console_suspended) { - up_console_sem(); - return; - } - /* * Console drivers are called with interrupts disabled, so * @console_may_schedule should be cleared before; however, we may @@ -3726,8 +3742,7 @@ static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progre /* * Hold the console_lock to guarantee safe access to - * console->seq and to prevent changes to @console_suspended - * until all consoles have been processed. + * console->seq. */ console_lock(); @@ -3735,6 +3750,11 @@ static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progre for_each_console_srcu(c) { if (con && con != c) continue; + /* + * If consoles are not usable, it cannot be expected + * that they make forward progress, so only increment + * @diff for usable consoles. + */ if (!console_is_usable(c)) continue; printk_seq = c->seq; @@ -3743,18 +3763,12 @@ static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progre } console_srcu_read_unlock(cookie); - /* - * If consoles are suspended, it cannot be expected that they - * make forward progress, so timeout immediately. @diff is - * still used to return a valid flush status. - */ - if (console_suspended) - remaining = 0; - else if (diff != last_diff && reset_on_progress) + if (diff != last_diff && reset_on_progress) remaining = timeout_ms; console_unlock(); + /* Note: @diff is 0 if there are no usable consoles. */ if (diff == 0 || remaining == 0) break; @@ -3788,7 +3802,7 @@ static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progre * printer has been seen to make some forward progress. * * Context: Process context. May sleep while acquiring console lock. - * Return: true if all enabled printers are caught up. + * Return: true if all usable printers are caught up. */ static bool pr_flush(int timeout_ms, bool reset_on_progress) { -- cgit v1.2.3 From 481461f5109919babbb393d6f68002936b8e2493 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sun, 16 Jul 2023 19:15:54 +0900 Subject: linux/export.h: make independent of CONFIG_MODULES Currently, all files with EXPORT_SYMBOL() are rebuilt when CONFIG_MODULES is flipped due to depending on CONFIG_MODULES. Now that modpost can make a final decision about export symbols, does not need to make EXPORT_SYMBOL() no-op. Instead, modpost can skip emitting KSYMTAB when CONFIG_MODULES is unset. This commit will reduce the number of recompilation when CONFIG_MODULES is toggled. Signed-off-by: Masahiro Yamada --- include/linux/export.h | 4 ++-- scripts/Makefile.modpost | 1 + scripts/mod/modpost.c | 8 ++++++-- 3 files changed, 9 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/export.h b/include/linux/export.h index beed8387e0a4..9911508a9604 100644 --- a/include/linux/export.h +++ b/include/linux/export.h @@ -50,7 +50,7 @@ extern struct module __this_module; __EXPORT_SYMBOL_REF(sym) ASM_NL \ .previous -#if !defined(CONFIG_MODULES) || defined(__DISABLE_EXPORTS) +#if defined(__DISABLE_EXPORTS) /* * Allow symbol exports to be disabled completely so that C code may @@ -75,7 +75,7 @@ extern struct module __this_module; __ADDRESSABLE(sym) \ asm(__stringify(___EXPORT_SYMBOL(sym, license, ns))) -#endif /* CONFIG_MODULES */ +#endif #ifdef DEFAULT_SYMBOL_NAMESPACE #define _EXPORT_SYMBOL(sym, license) __EXPORT_SYMBOL(sym, license, __stringify(DEFAULT_SYMBOL_NAMESPACE)) diff --git a/scripts/Makefile.modpost b/scripts/Makefile.modpost index 39472e834b63..739402f45509 100644 --- a/scripts/Makefile.modpost +++ b/scripts/Makefile.modpost @@ -41,6 +41,7 @@ include $(srctree)/scripts/Kbuild.include MODPOST = scripts/mod/modpost modpost-args = \ + $(if $(CONFIG_MODULES),-M) \ $(if $(CONFIG_MODVERSIONS),-m) \ $(if $(CONFIG_MODULE_SRCVERSION_ALL),-a) \ $(if $(CONFIG_SECTION_MISMATCH_WARN_ONLY),,-E) \ diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index b29b29707f10..8227641dd087 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -24,6 +24,7 @@ #include "../../include/linux/license.h" #include "../../include/linux/module_symbol.h" +static bool module_enabled; /* Are we using CONFIG_MODVERSIONS? */ static bool modversions; /* Is CONFIG_MODULE_SRCVERSION_ALL set? */ @@ -1242,7 +1243,7 @@ static void check_section_mismatch(struct module *mod, struct elf_info *elf, const char *tosec = sec_name(elf, get_secindex(elf, sym)); const struct sectioncheck *mismatch; - if (elf->export_symbol_secndx == fsecndx) { + if (module_enabled && elf->export_symbol_secndx == fsecndx) { check_export_symbol(mod, elf, faddr, tosec, sym); return; } @@ -2272,7 +2273,7 @@ int main(int argc, char **argv) LIST_HEAD(dump_lists); struct dump_list *dl, *dl2; - while ((opt = getopt(argc, argv, "ei:mnT:to:au:WwENd:")) != -1) { + while ((opt = getopt(argc, argv, "ei:MmnT:to:au:WwENd:")) != -1) { switch (opt) { case 'e': external_module = true; @@ -2282,6 +2283,9 @@ int main(int argc, char **argv) dl->file = optarg; list_add_tail(&dl->list, &dump_lists); break; + case 'M': + module_enabled = true; + break; case 'm': modversions = true; break; -- cgit v1.2.3 From db2d6038c5e795cab4f0a8d3e86b4f7e33338629 Mon Sep 17 00:00:00 2001 From: Benjamin Bara Date: Sat, 15 Jul 2023 09:53:25 +0200 Subject: kernel/reboot: Add device to sys_off_handler If the dev is known (e.g. a devm-based sys_off_handler is used), it can be passed to the handler's callback to have it available there. Otherwise, cb_data might be set to the dev in most of the cases. Reviewed-by: Dmitry Osipenko Signed-off-by: Benjamin Bara Link: https://lore.kernel.org/r/20230327-tegra-pmic-reboot-v7-3-18699d5dcd76@skidata.com Signed-off-by: Lee Jones --- include/linux/reboot.h | 3 +++ kernel/reboot.c | 3 +++ 2 files changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/reboot.h b/include/linux/reboot.h index 2b6bb593be5b..c4cc3b89ced1 100644 --- a/include/linux/reboot.h +++ b/include/linux/reboot.h @@ -129,11 +129,14 @@ enum sys_off_mode { * @cb_data: User's callback data. * @cmd: Command string. Currently used only by the sys-off restart mode, * NULL otherwise. + * @dev: Device of the sys-off handler. Only if known (devm_register_*), + * NULL otherwise. */ struct sys_off_data { int mode; void *cb_data; const char *cmd; + struct device *dev; }; struct sys_off_handler * diff --git a/kernel/reboot.c b/kernel/reboot.c index 6ebef11c8876..395a0ea3c7a8 100644 --- a/kernel/reboot.c +++ b/kernel/reboot.c @@ -55,6 +55,7 @@ struct sys_off_handler { enum sys_off_mode mode; bool blocking; void *list; + struct device *dev; }; /* @@ -324,6 +325,7 @@ static int sys_off_notify(struct notifier_block *nb, data.cb_data = handler->cb_data; data.mode = mode; data.cmd = cmd; + data.dev = handler->dev; return handler->sys_off_cb(&data); } @@ -511,6 +513,7 @@ int devm_register_sys_off_handler(struct device *dev, handler = register_sys_off_handler(mode, priority, callback, cb_data); if (IS_ERR(handler)) return PTR_ERR(handler); + handler->dev = dev; return devm_add_action_or_reset(dev, devm_unregister_sys_off_handler, handler); -- cgit v1.2.3 From 687fe7dfb736b03ab820d172ea5dbfc1ec447135 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Sun, 23 Jul 2023 22:30:18 -0700 Subject: Input: tca6416-keypad - always expect proper IRQ number in i2c client Remove option having i2c client contain raw gpio number instead of proper IRQ number. There are no users of this facility in mainline and it will allow cleaning up the driver code with regard to wakeup handling, etc. Link: https://lore.kernel.org/r/20230724053024.352054-1-dmitry.torokhov@gmail.com Signed-off-by: Dmitry Torokhov --- drivers/input/keyboard/tca6416-keypad.c | 27 ++++++++++----------------- include/linux/tca6416_keypad.h | 1 - 2 files changed, 10 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/drivers/input/keyboard/tca6416-keypad.c b/drivers/input/keyboard/tca6416-keypad.c index 2f745cabf4f2..01bc0b881188 100644 --- a/drivers/input/keyboard/tca6416-keypad.c +++ b/drivers/input/keyboard/tca6416-keypad.c @@ -148,7 +148,7 @@ static int tca6416_keys_open(struct input_dev *dev) if (chip->use_polling) schedule_delayed_work(&chip->dwork, msecs_to_jiffies(100)); else - enable_irq(chip->irqnum); + enable_irq(chip->client->irq); return 0; } @@ -160,7 +160,7 @@ static void tca6416_keys_close(struct input_dev *dev) if (chip->use_polling) cancel_delayed_work_sync(&chip->dwork); else - disable_irq(chip->irqnum); + disable_irq(chip->client->irq); } static int tca6416_setup_registers(struct tca6416_keypad_chip *chip) @@ -266,12 +266,7 @@ static int tca6416_keypad_probe(struct i2c_client *client) goto fail1; if (!chip->use_polling) { - if (pdata->irq_is_gpio) - chip->irqnum = gpio_to_irq(client->irq); - else - chip->irqnum = client->irq; - - error = request_threaded_irq(chip->irqnum, NULL, + error = request_threaded_irq(client->irq, NULL, tca6416_keys_isr, IRQF_TRIGGER_FALLING | IRQF_ONESHOT | IRQF_NO_AUTOEN, @@ -279,7 +274,7 @@ static int tca6416_keypad_probe(struct i2c_client *client) if (error) { dev_dbg(&client->dev, "Unable to claim irq %d; error %d\n", - chip->irqnum, error); + client->irq, error); goto fail1; } } @@ -298,8 +293,8 @@ static int tca6416_keypad_probe(struct i2c_client *client) fail2: if (!chip->use_polling) { - free_irq(chip->irqnum, chip); - enable_irq(chip->irqnum); + free_irq(client->irq, chip); + enable_irq(client->irq); } fail1: input_free_device(input); @@ -312,8 +307,8 @@ static void tca6416_keypad_remove(struct i2c_client *client) struct tca6416_keypad_chip *chip = i2c_get_clientdata(client); if (!chip->use_polling) { - free_irq(chip->irqnum, chip); - enable_irq(chip->irqnum); + free_irq(client->irq, chip); + enable_irq(client->irq); } input_unregister_device(chip->input); @@ -323,10 +318,9 @@ static void tca6416_keypad_remove(struct i2c_client *client) static int tca6416_keypad_suspend(struct device *dev) { struct i2c_client *client = to_i2c_client(dev); - struct tca6416_keypad_chip *chip = i2c_get_clientdata(client); if (device_may_wakeup(dev)) - enable_irq_wake(chip->irqnum); + enable_irq_wake(client->irq); return 0; } @@ -334,10 +328,9 @@ static int tca6416_keypad_suspend(struct device *dev) static int tca6416_keypad_resume(struct device *dev) { struct i2c_client *client = to_i2c_client(dev); - struct tca6416_keypad_chip *chip = i2c_get_clientdata(client); if (device_may_wakeup(dev)) - disable_irq_wake(chip->irqnum); + disable_irq_wake(client->irq); return 0; } diff --git a/include/linux/tca6416_keypad.h b/include/linux/tca6416_keypad.h index b0d36a9934cc..5cf6f6f82aa7 100644 --- a/include/linux/tca6416_keypad.h +++ b/include/linux/tca6416_keypad.h @@ -25,7 +25,6 @@ struct tca6416_keys_platform_data { unsigned int rep:1; /* enable input subsystem auto repeat */ uint16_t pinmask; uint16_t invert; - int irq_is_gpio; int use_polling; /* use polling if Interrupt is not connected*/ }; #endif -- cgit v1.2.3 From 63b93099359eca37c11e4d5db5ea2c3a375f6026 Mon Sep 17 00:00:00 2001 From: Sergey Shtylyov Date: Sat, 29 Jul 2023 23:17:46 +0300 Subject: ata: libata: fix parameter type of ata_deadline() ata_deadline() passes its 'unsigned long timeout_msecs' parameter verbatim to msecs_to_jiffies() which takes just 'unsigned int' -- eliminate unneeded implicit cast... Signed-off-by: Sergey Shtylyov Signed-off-by: Damien Le Moal --- include/linux/libata.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/libata.h b/include/linux/libata.h index 820f7a3a2749..d9edb3d62a42 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -1876,7 +1876,7 @@ static inline int ata_check_ready(u8 status) } static inline unsigned long ata_deadline(unsigned long from_jiffies, - unsigned long timeout_msecs) + unsigned int timeout_msecs) { return from_jiffies + msecs_to_jiffies(timeout_msecs); } -- cgit v1.2.3 From 84abed36d7de7145d393f9d5d05b36717e0cb49d Mon Sep 17 00:00:00 2001 From: Sergey Shtylyov Date: Sat, 29 Jul 2023 23:17:47 +0300 Subject: ata: libata-core: fix parameter types of ata_wait_register() ata_wait_register() passes its 'unsigned long {interval|timeout}' params verbatim to ata_{msleep|deadline}() that just take 'unsigned int' param for the time intervals in ms -- eliminate unneeded implicit casts... Signed-off-by: Sergey Shtylyov Signed-off-by: Damien Le Moal --- drivers/ata/libata-core.c | 2 +- include/linux/libata.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 04db0f2c683a..54cc342c0b4f 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -6684,7 +6684,7 @@ EXPORT_SYMBOL_GPL(ata_msleep); * The final register value. */ u32 ata_wait_register(struct ata_port *ap, void __iomem *reg, u32 mask, u32 val, - unsigned long interval, unsigned long timeout) + unsigned int interval, unsigned int timeout) { unsigned long deadline; u32 tmp; diff --git a/include/linux/libata.h b/include/linux/libata.h index d9edb3d62a42..4772f64af734 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -1116,7 +1116,7 @@ static inline void ata_sas_port_resume(struct ata_port *ap) extern int ata_ratelimit(void); extern void ata_msleep(struct ata_port *ap, unsigned int msecs); extern u32 ata_wait_register(struct ata_port *ap, void __iomem *reg, u32 mask, - u32 val, unsigned long interval, unsigned long timeout); + u32 val, unsigned int interval, unsigned int timeout); extern int atapi_cmd_type(u8 opcode); extern unsigned int ata_pack_xfermask(unsigned int pio_mask, unsigned int mwdma_mask, -- cgit v1.2.3 From d14d41cc5aaef138face9d5a145b460e2b63697a Mon Sep 17 00:00:00 2001 From: Sergey Shtylyov Date: Sat, 29 Jul 2023 23:17:49 +0300 Subject: ata: fix debounce timings type sata_deb_timing_{hotplug|long|normal}[] store 'unsigned long' debounce timeouts in ms, while sata_link_debounce() eventually uses those timeouts by calling ata_{deadline|msleep}( which take just 'unsigned int'. Change the debounce timeout table element's type to 'unsigned int' -- all these timeouts happily fit into 'unsigned int'... Signed-off-by: Sergey Shtylyov Signed-off-by: Damien Le Moal --- drivers/ata/ahci.c | 2 +- drivers/ata/ahci_qoriq.c | 2 +- drivers/ata/ahci_xgene.c | 2 +- drivers/ata/libahci.c | 2 +- drivers/ata/libata-core.c | 4 ++-- drivers/ata/libata-sata.c | 16 ++++++++-------- drivers/ata/libata-sff.c | 2 +- drivers/ata/sata_highbank.c | 2 +- drivers/ata/sata_inic162x.c | 2 +- drivers/ata/sata_mv.c | 2 +- drivers/ata/sata_nv.c | 2 +- include/linux/libata.h | 20 ++++++++++---------- 12 files changed, 29 insertions(+), 29 deletions(-) (limited to 'include/linux') diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c index addba109406b..02503e903e4a 100644 --- a/drivers/ata/ahci.c +++ b/drivers/ata/ahci.c @@ -807,7 +807,7 @@ static int ahci_p5wdh_hardreset(struct ata_link *link, unsigned int *class, static int ahci_avn_hardreset(struct ata_link *link, unsigned int *class, unsigned long deadline) { - const unsigned long *timing = sata_ehc_deb_timing(&link->eh_context); + const unsigned int *timing = sata_ehc_deb_timing(&link->eh_context); struct ata_port *ap = link->ap; struct ahci_port_priv *pp = ap->private_data; struct ahci_host_priv *hpriv = ap->host->private_data; diff --git a/drivers/ata/ahci_qoriq.c b/drivers/ata/ahci_qoriq.c index 7bb9ad40605e..b1a4e57578e2 100644 --- a/drivers/ata/ahci_qoriq.c +++ b/drivers/ata/ahci_qoriq.c @@ -88,7 +88,7 @@ MODULE_DEVICE_TABLE(acpi, ahci_qoriq_acpi_match); static int ahci_qoriq_hardreset(struct ata_link *link, unsigned int *class, unsigned long deadline) { - const unsigned long *timing = sata_ehc_deb_timing(&link->eh_context); + const unsigned int *timing = sata_ehc_deb_timing(&link->eh_context); void __iomem *port_mmio = ahci_port_base(link->ap); u32 px_cmd, px_is, px_val; struct ata_port *ap = link->ap; diff --git a/drivers/ata/ahci_xgene.c b/drivers/ata/ahci_xgene.c index f5deaf648663..8e88c86a2a78 100644 --- a/drivers/ata/ahci_xgene.c +++ b/drivers/ata/ahci_xgene.c @@ -350,7 +350,7 @@ static void xgene_ahci_set_phy_cfg(struct xgene_ahci_context *ctx, int channel) static int xgene_ahci_do_hardreset(struct ata_link *link, unsigned long deadline, bool *online) { - const unsigned long *timing = sata_ehc_deb_timing(&link->eh_context); + const unsigned int *timing = sata_ehc_deb_timing(&link->eh_context); struct ata_port *ap = link->ap; struct ahci_host_priv *hpriv = ap->host->private_data; struct xgene_ahci_context *ctx = hpriv->plat_data; diff --git a/drivers/ata/libahci.c b/drivers/ata/libahci.c index 06aec35f88f2..ad2bfcbff3bc 100644 --- a/drivers/ata/libahci.c +++ b/drivers/ata/libahci.c @@ -1587,7 +1587,7 @@ static int ahci_pmp_retry_softreset(struct ata_link *link, unsigned int *class, int ahci_do_hardreset(struct ata_link *link, unsigned int *class, unsigned long deadline, bool *online) { - const unsigned long *timing = sata_ehc_deb_timing(&link->eh_context); + const unsigned int *timing = sata_ehc_deb_timing(&link->eh_context); struct ata_port *ap = link->ap; struct ahci_port_priv *pp = ap->private_data; struct ahci_host_priv *hpriv = ap->host->private_data; diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 54cc342c0b4f..079ec8d0860f 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -3782,7 +3782,7 @@ int ata_std_prereset(struct ata_link *link, unsigned long deadline) { struct ata_port *ap = link->ap; struct ata_eh_context *ehc = &link->eh_context; - const unsigned long *timing = sata_ehc_deb_timing(ehc); + const unsigned int *timing = sata_ehc_deb_timing(ehc); int rc; /* if we're about to do hardreset, nothing more to do */ @@ -3824,7 +3824,7 @@ EXPORT_SYMBOL_GPL(ata_std_prereset); int sata_std_hardreset(struct ata_link *link, unsigned int *class, unsigned long deadline) { - const unsigned long *timing = sata_ehc_deb_timing(&link->eh_context); + const unsigned int *timing = sata_ehc_deb_timing(&link->eh_context); bool online; int rc; diff --git a/drivers/ata/libata-sata.c b/drivers/ata/libata-sata.c index 85e279a12f62..5d393432fa06 100644 --- a/drivers/ata/libata-sata.c +++ b/drivers/ata/libata-sata.c @@ -19,11 +19,11 @@ #include "libata-transport.h" /* debounce timing parameters in msecs { interval, duration, timeout } */ -const unsigned long sata_deb_timing_normal[] = { 5, 100, 2000 }; +const unsigned int sata_deb_timing_normal[] = { 5, 100, 2000 }; EXPORT_SYMBOL_GPL(sata_deb_timing_normal); -const unsigned long sata_deb_timing_hotplug[] = { 25, 500, 2000 }; +const unsigned int sata_deb_timing_hotplug[] = { 25, 500, 2000 }; EXPORT_SYMBOL_GPL(sata_deb_timing_hotplug); -const unsigned long sata_deb_timing_long[] = { 100, 2000, 5000 }; +const unsigned int sata_deb_timing_long[] = { 100, 2000, 5000 }; EXPORT_SYMBOL_GPL(sata_deb_timing_long); /** @@ -232,11 +232,11 @@ EXPORT_SYMBOL_GPL(ata_tf_from_fis); * RETURNS: * 0 on success, -errno on failure. */ -int sata_link_debounce(struct ata_link *link, const unsigned long *params, +int sata_link_debounce(struct ata_link *link, const unsigned int *params, unsigned long deadline) { - unsigned long interval = params[0]; - unsigned long duration = params[1]; + unsigned int interval = params[0]; + unsigned int duration = params[1]; unsigned long last_jiffies, t; u32 last, cur; int rc; @@ -295,7 +295,7 @@ EXPORT_SYMBOL_GPL(sata_link_debounce); * RETURNS: * 0 on success, -errno on failure. */ -int sata_link_resume(struct ata_link *link, const unsigned long *params, +int sata_link_resume(struct ata_link *link, const unsigned int *params, unsigned long deadline) { int tries = ATA_LINK_RESUME_TRIES; @@ -528,7 +528,7 @@ EXPORT_SYMBOL_GPL(sata_set_spd); * RETURNS: * 0 on success, -errno otherwise. */ -int sata_link_hardreset(struct ata_link *link, const unsigned long *timing, +int sata_link_hardreset(struct ata_link *link, const unsigned int *timing, unsigned long deadline, bool *online, int (*check_ready)(struct ata_link *)) { diff --git a/drivers/ata/libata-sff.c b/drivers/ata/libata-sff.c index 9d28badfe41d..ac55dfc2d85f 100644 --- a/drivers/ata/libata-sff.c +++ b/drivers/ata/libata-sff.c @@ -1971,7 +1971,7 @@ int sata_sff_hardreset(struct ata_link *link, unsigned int *class, unsigned long deadline) { struct ata_eh_context *ehc = &link->eh_context; - const unsigned long *timing = sata_ehc_deb_timing(ehc); + const unsigned int *timing = sata_ehc_deb_timing(ehc); bool online; int rc; diff --git a/drivers/ata/sata_highbank.c b/drivers/ata/sata_highbank.c index 7a6d41b7a02d..63ef7bb073ce 100644 --- a/drivers/ata/sata_highbank.c +++ b/drivers/ata/sata_highbank.c @@ -385,7 +385,7 @@ static int highbank_initialize_phys(struct device *dev, void __iomem *addr) static int ahci_highbank_hardreset(struct ata_link *link, unsigned int *class, unsigned long deadline) { - static const unsigned long timing[] = { 5, 100, 500}; + static const unsigned int timing[] = { 5, 100, 500}; struct ata_port *ap = link->ap; struct ahci_port_priv *pp = ap->private_data; struct ahci_host_priv *hpriv = ap->host->private_data; diff --git a/drivers/ata/sata_inic162x.c b/drivers/ata/sata_inic162x.c index 2c8c78ed86c1..db9c255dc9f2 100644 --- a/drivers/ata/sata_inic162x.c +++ b/drivers/ata/sata_inic162x.c @@ -619,7 +619,7 @@ static int inic_hardreset(struct ata_link *link, unsigned int *class, struct ata_port *ap = link->ap; void __iomem *port_base = inic_port_base(ap); void __iomem *idma_ctl = port_base + PORT_IDMA_CTL; - const unsigned long *timing = sata_ehc_deb_timing(&link->eh_context); + const unsigned int *timing = sata_ehc_deb_timing(&link->eh_context); int rc; /* hammer it into sane state */ diff --git a/drivers/ata/sata_mv.c b/drivers/ata/sata_mv.c index d404e631d152..41c107e15c40 100644 --- a/drivers/ata/sata_mv.c +++ b/drivers/ata/sata_mv.c @@ -3633,7 +3633,7 @@ static int mv_hardreset(struct ata_link *link, unsigned int *class, /* Workaround for errata FEr SATA#10 (part 2) */ do { - const unsigned long *timing = + const unsigned int *timing = sata_ehc_deb_timing(&link->eh_context); rc = sata_link_hardreset(link, timing, deadline + extra, diff --git a/drivers/ata/sata_nv.c b/drivers/ata/sata_nv.c index abf5651c87ab..0a0cee755bde 100644 --- a/drivers/ata/sata_nv.c +++ b/drivers/ata/sata_nv.c @@ -1529,7 +1529,7 @@ static int nv_hardreset(struct ata_link *link, unsigned int *class, sata_link_hardreset(link, sata_deb_timing_hotplug, deadline, NULL, NULL); else { - const unsigned long *timing = sata_ehc_deb_timing(ehc); + const unsigned int *timing = sata_ehc_deb_timing(ehc); int rc; if (!(ehc->i.flags & ATA_EHI_QUIET)) diff --git a/include/linux/libata.h b/include/linux/libata.h index 4772f64af734..8d510fb00591 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -1166,11 +1166,11 @@ extern void ata_scsi_cmd_error_handler(struct Scsi_Host *host, struct ata_port * * SATA specific code - drivers/ata/libata-sata.c */ #ifdef CONFIG_SATA_HOST -extern const unsigned long sata_deb_timing_normal[]; -extern const unsigned long sata_deb_timing_hotplug[]; -extern const unsigned long sata_deb_timing_long[]; +extern const unsigned int sata_deb_timing_normal[]; +extern const unsigned int sata_deb_timing_hotplug[]; +extern const unsigned int sata_deb_timing_long[]; -static inline const unsigned long * +static inline const unsigned int * sata_ehc_deb_timing(struct ata_eh_context *ehc) { if (ehc->i.flags & ATA_EHI_HOTPLUGGED) @@ -1185,14 +1185,14 @@ extern int sata_scr_write(struct ata_link *link, int reg, u32 val); extern int sata_scr_write_flush(struct ata_link *link, int reg, u32 val); extern int sata_set_spd(struct ata_link *link); extern int sata_link_hardreset(struct ata_link *link, - const unsigned long *timing, unsigned long deadline, + const unsigned int *timing, unsigned long deadline, bool *online, int (*check_ready)(struct ata_link *)); -extern int sata_link_resume(struct ata_link *link, const unsigned long *params, +extern int sata_link_resume(struct ata_link *link, const unsigned int *params, unsigned long deadline); extern int ata_eh_read_sense_success_ncq_log(struct ata_link *link); extern void ata_eh_analyze_ncq_error(struct ata_link *link); #else -static inline const unsigned long * +static inline const unsigned int * sata_ehc_deb_timing(struct ata_eh_context *ehc) { return NULL; @@ -1212,7 +1212,7 @@ static inline int sata_scr_write_flush(struct ata_link *link, int reg, u32 val) } static inline int sata_set_spd(struct ata_link *link) { return -EOPNOTSUPP; } static inline int sata_link_hardreset(struct ata_link *link, - const unsigned long *timing, + const unsigned int *timing, unsigned long deadline, bool *online, int (*check_ready)(struct ata_link *)) @@ -1222,7 +1222,7 @@ static inline int sata_link_hardreset(struct ata_link *link, return -EOPNOTSUPP; } static inline int sata_link_resume(struct ata_link *link, - const unsigned long *params, + const unsigned int *params, unsigned long deadline) { return -EOPNOTSUPP; @@ -1234,7 +1234,7 @@ static inline int ata_eh_read_sense_success_ncq_log(struct ata_link *link) static inline void ata_eh_analyze_ncq_error(struct ata_link *link) { } #endif extern int sata_link_debounce(struct ata_link *link, - const unsigned long *params, unsigned long deadline); + const unsigned int *params, unsigned long deadline); extern int sata_link_scr_lpm(struct ata_link *link, enum ata_lpm_policy policy, bool spm_wakeup); extern int ata_slave_link_init(struct ata_port *ap); -- cgit v1.2.3 From ff8072d589dcff7c1f0345a6ec98b5fc1e9ee2a1 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Mon, 31 Jul 2023 16:34:12 +0200 Subject: ata: libata: remove references to non-existing error_handler() With commit 65a15d6560df ("scsi: ipr: Remove SATA support") all libata drivers now have the error_handler() callback provided, so we can stop checking for non-existing error_handler callback. Signed-off-by: Hannes Reinecke [niklas: fixed review comments, rebased, solved conflicts during rebase, fixed bug that unconditionally dumped all QCs, removed the now unused function ata_dump_status(), removed the now unreachable failure paths in atapi_qc_complete(), removed the non-EH function to request ATAPI sense] Signed-off-by: Niklas Cassel Reviewed-by: John Garry Reviewed-by: Jason Yan Reviewed-by: Martin K. Petersen Signed-off-by: Damien Le Moal --- drivers/ata/libata-core.c | 209 +++++++++++++++++++--------------------------- drivers/ata/libata-eh.c | 152 ++++++++++++++------------------- drivers/ata/libata-sata.c | 7 +- drivers/ata/libata-scsi.c | 161 +++-------------------------------- drivers/ata/libata-sff.c | 30 +++---- include/linux/libata.h | 2 +- 6 files changed, 170 insertions(+), 391 deletions(-) (limited to 'include/linux') diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 079ec8d0860f..cc59d3158e1d 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -1586,13 +1586,11 @@ static unsigned ata_exec_internal_sg(struct ata_device *dev, } } - if (ap->ops->error_handler) - ata_eh_release(ap); + ata_eh_release(ap); rc = wait_for_completion_timeout(&wait, msecs_to_jiffies(timeout)); - if (ap->ops->error_handler) - ata_eh_acquire(ap); + ata_eh_acquire(ap); ata_sff_flush_pio_task(ap); @@ -1607,10 +1605,7 @@ static unsigned ata_exec_internal_sg(struct ata_device *dev, if (qc->flags & ATA_QCFLAG_ACTIVE) { qc->err_mask |= AC_ERR_TIMEOUT; - if (ap->ops->error_handler) - ata_port_freeze(ap); - else - ata_qc_complete(qc); + ata_port_freeze(ap); ata_dev_warn(dev, "qc timeout after %u msecs (cmd 0x%x)\n", timeout, command); @@ -4874,126 +4869,103 @@ static void ata_verify_xfer(struct ata_queued_cmd *qc) void ata_qc_complete(struct ata_queued_cmd *qc) { struct ata_port *ap = qc->ap; + struct ata_device *dev = qc->dev; + struct ata_eh_info *ehi = &dev->link->eh_info; /* Trigger the LED (if available) */ ledtrig_disk_activity(!!(qc->tf.flags & ATA_TFLAG_WRITE)); - /* XXX: New EH and old EH use different mechanisms to - * synchronize EH with regular execution path. - * - * In new EH, a qc owned by EH is marked with ATA_QCFLAG_EH. - * Normal execution path is responsible for not accessing a - * qc owned by EH. libata core enforces the rule by returning NULL - * from ata_qc_from_tag() for qcs owned by EH. + /* + * In order to synchronize EH with the regular execution path, a qc that + * is owned by EH is marked with ATA_QCFLAG_EH. * - * Old EH depends on ata_qc_complete() nullifying completion - * requests if ATA_QCFLAG_EH_SCHEDULED is set. Old EH does - * not synchronize with interrupt handler. Only PIO task is - * taken care of. + * The normal execution path is responsible for not accessing a qc owned + * by EH. libata core enforces the rule by returning NULL from + * ata_qc_from_tag() for qcs owned by EH. */ - if (ap->ops->error_handler) { - struct ata_device *dev = qc->dev; - struct ata_eh_info *ehi = &dev->link->eh_info; - - if (unlikely(qc->err_mask)) - qc->flags |= ATA_QCFLAG_EH; + if (unlikely(qc->err_mask)) + qc->flags |= ATA_QCFLAG_EH; - /* - * Finish internal commands without any further processing - * and always with the result TF filled. - */ - if (unlikely(ata_tag_internal(qc->tag))) { - fill_result_tf(qc); - trace_ata_qc_complete_internal(qc); - __ata_qc_complete(qc); - return; - } + /* + * Finish internal commands without any further processing and always + * with the result TF filled. + */ + if (unlikely(ata_tag_internal(qc->tag))) { + fill_result_tf(qc); + trace_ata_qc_complete_internal(qc); + __ata_qc_complete(qc); + return; + } - /* - * Non-internal qc has failed. Fill the result TF and - * summon EH. - */ - if (unlikely(qc->flags & ATA_QCFLAG_EH)) { - fill_result_tf(qc); - trace_ata_qc_complete_failed(qc); - ata_qc_schedule_eh(qc); - return; - } + /* Non-internal qc has failed. Fill the result TF and summon EH. */ + if (unlikely(qc->flags & ATA_QCFLAG_EH)) { + fill_result_tf(qc); + trace_ata_qc_complete_failed(qc); + ata_qc_schedule_eh(qc); + return; + } - WARN_ON_ONCE(ata_port_is_frozen(ap)); + WARN_ON_ONCE(ata_port_is_frozen(ap)); - /* read result TF if requested */ - if (qc->flags & ATA_QCFLAG_RESULT_TF) - fill_result_tf(qc); + /* read result TF if requested */ + if (qc->flags & ATA_QCFLAG_RESULT_TF) + fill_result_tf(qc); - trace_ata_qc_complete_done(qc); + trace_ata_qc_complete_done(qc); + /* + * For CDL commands that completed without an error, check if we have + * sense data (ATA_SENSE is set). If we do, then the command may have + * been aborted by the device due to a limit timeout using the policy + * 0xD. For these commands, invoke EH to get the command sense data. + */ + if (qc->result_tf.status & ATA_SENSE && + ((ata_is_ncq(qc->tf.protocol) && + dev->flags & ATA_DFLAG_CDL_ENABLED) || + (!ata_is_ncq(qc->tf.protocol) && + ata_id_sense_reporting_enabled(dev->id)))) { /* - * For CDL commands that completed without an error, check if - * we have sense data (ATA_SENSE is set). If we do, then the - * command may have been aborted by the device due to a limit - * timeout using the policy 0xD. For these commands, invoke EH - * to get the command sense data. + * Tell SCSI EH to not overwrite scmd->result even if this + * command is finished with result SAM_STAT_GOOD. */ - if (qc->result_tf.status & ATA_SENSE && - ((ata_is_ncq(qc->tf.protocol) && - dev->flags & ATA_DFLAG_CDL_ENABLED) || - (!ata_is_ncq(qc->tf.protocol) && - ata_id_sense_reporting_enabled(dev->id)))) { - /* - * Tell SCSI EH to not overwrite scmd->result even if - * this command is finished with result SAM_STAT_GOOD. - */ - qc->scsicmd->flags |= SCMD_FORCE_EH_SUCCESS; - qc->flags |= ATA_QCFLAG_EH_SUCCESS_CMD; - ehi->dev_action[dev->devno] |= ATA_EH_GET_SUCCESS_SENSE; - - /* - * set pending so that ata_qc_schedule_eh() does not - * trigger fast drain, and freeze the port. - */ - ap->pflags |= ATA_PFLAG_EH_PENDING; - ata_qc_schedule_eh(qc); - return; - } + qc->scsicmd->flags |= SCMD_FORCE_EH_SUCCESS; + qc->flags |= ATA_QCFLAG_EH_SUCCESS_CMD; + ehi->dev_action[dev->devno] |= ATA_EH_GET_SUCCESS_SENSE; - /* Some commands need post-processing after successful - * completion. + /* + * set pending so that ata_qc_schedule_eh() does not trigger + * fast drain, and freeze the port. */ - switch (qc->tf.command) { - case ATA_CMD_SET_FEATURES: - if (qc->tf.feature != SETFEATURES_WC_ON && - qc->tf.feature != SETFEATURES_WC_OFF && - qc->tf.feature != SETFEATURES_RA_ON && - qc->tf.feature != SETFEATURES_RA_OFF) - break; - fallthrough; - case ATA_CMD_INIT_DEV_PARAMS: /* CHS translation changed */ - case ATA_CMD_SET_MULTI: /* multi_count changed */ - /* revalidate device */ - ehi->dev_action[dev->devno] |= ATA_EH_REVALIDATE; - ata_port_schedule_eh(ap); - break; + ap->pflags |= ATA_PFLAG_EH_PENDING; + ata_qc_schedule_eh(qc); + return; + } - case ATA_CMD_SLEEP: - dev->flags |= ATA_DFLAG_SLEEPING; + /* Some commands need post-processing after successful completion. */ + switch (qc->tf.command) { + case ATA_CMD_SET_FEATURES: + if (qc->tf.feature != SETFEATURES_WC_ON && + qc->tf.feature != SETFEATURES_WC_OFF && + qc->tf.feature != SETFEATURES_RA_ON && + qc->tf.feature != SETFEATURES_RA_OFF) break; - } - - if (unlikely(dev->flags & ATA_DFLAG_DUBIOUS_XFER)) - ata_verify_xfer(qc); + fallthrough; + case ATA_CMD_INIT_DEV_PARAMS: /* CHS translation changed */ + case ATA_CMD_SET_MULTI: /* multi_count changed */ + /* revalidate device */ + ehi->dev_action[dev->devno] |= ATA_EH_REVALIDATE; + ata_port_schedule_eh(ap); + break; - __ata_qc_complete(qc); - } else { - if (qc->flags & ATA_QCFLAG_EH_SCHEDULED) - return; + case ATA_CMD_SLEEP: + dev->flags |= ATA_DFLAG_SLEEPING; + break; + } - /* read result TF if failed or requested */ - if (qc->err_mask || qc->flags & ATA_QCFLAG_RESULT_TF) - fill_result_tf(qc); + if (unlikely(dev->flags & ATA_DFLAG_DUBIOUS_XFER)) + ata_verify_xfer(qc); - __ata_qc_complete(qc); - } + __ata_qc_complete(qc); } EXPORT_SYMBOL_GPL(ata_qc_complete); @@ -5039,11 +5011,8 @@ void ata_qc_issue(struct ata_queued_cmd *qc) struct ata_link *link = qc->dev->link; u8 prot = qc->tf.protocol; - /* Make sure only one non-NCQ command is outstanding. The - * check is skipped for old EH because it reuses active qc to - * request ATAPI sense. - */ - WARN_ON_ONCE(ap->ops->error_handler && ata_tag_valid(link->active_tag)); + /* Make sure only one non-NCQ command is outstanding. */ + WARN_ON_ONCE(ata_tag_valid(link->active_tag)); if (ata_is_ncq(prot)) { WARN_ON_ONCE(link->sactive & (1 << qc->hw_tag)); @@ -5917,15 +5886,9 @@ void __ata_port_probe(struct ata_port *ap) int ata_port_probe(struct ata_port *ap) { - int rc = 0; - - if (ap->ops->error_handler) { - __ata_port_probe(ap); - ata_port_wait_eh(ap); - } else { - rc = ata_bus_probe(ap); - } - return rc; + __ata_port_probe(ap); + ata_port_wait_eh(ap); + return 0; } @@ -6130,9 +6093,6 @@ static void ata_port_detach(struct ata_port *ap) struct ata_link *link; struct ata_device *dev; - if (!ap->ops->error_handler) - goto skip_eh; - /* tell EH we're leaving & flush EH */ spin_lock_irqsave(ap->lock, flags); ap->pflags |= ATA_PFLAG_UNLOADING; @@ -6148,7 +6108,6 @@ static void ata_port_detach(struct ata_port *ap) cancel_delayed_work_sync(&ap->hotplug_task); cancel_delayed_work_sync(&ap->scsi_rescan_task); - skip_eh: /* clean up zpodd on port removal */ ata_for_each_link(link, ap, HOST_FIRST) { ata_for_each_dev(dev, link, ALL) { diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c index 0e2acca36c10..159ba6ba19eb 100644 --- a/drivers/ata/libata-eh.c +++ b/drivers/ata/libata-eh.c @@ -571,13 +571,10 @@ void ata_scsi_cmd_error_handler(struct Scsi_Host *host, struct ata_port *ap, /* make sure sff pio task is not running */ ata_sff_flush_pio_task(ap); - if (!ap->ops->error_handler) - return; - /* synchronize with host lock and sort out timeouts */ /* - * For new EH, all qcs are finished in one of three ways - + * For EH, all qcs are finished in one of three ways - * normal completion, error completion, and SCSI timeout. * Both completions can race against SCSI timeout. When normal * completion wins, the qc never reaches EH. When error @@ -659,94 +656,87 @@ EXPORT_SYMBOL(ata_scsi_cmd_error_handler); void ata_scsi_port_error_handler(struct Scsi_Host *host, struct ata_port *ap) { unsigned long flags; + struct ata_link *link; - /* invoke error handler */ - if (ap->ops->error_handler) { - struct ata_link *link; - - /* acquire EH ownership */ - ata_eh_acquire(ap); + /* acquire EH ownership */ + ata_eh_acquire(ap); repeat: - /* kill fast drain timer */ - del_timer_sync(&ap->fastdrain_timer); + /* kill fast drain timer */ + del_timer_sync(&ap->fastdrain_timer); - /* process port resume request */ - ata_eh_handle_port_resume(ap); + /* process port resume request */ + ata_eh_handle_port_resume(ap); - /* fetch & clear EH info */ - spin_lock_irqsave(ap->lock, flags); + /* fetch & clear EH info */ + spin_lock_irqsave(ap->lock, flags); - ata_for_each_link(link, ap, HOST_FIRST) { - struct ata_eh_context *ehc = &link->eh_context; - struct ata_device *dev; + ata_for_each_link(link, ap, HOST_FIRST) { + struct ata_eh_context *ehc = &link->eh_context; + struct ata_device *dev; - memset(&link->eh_context, 0, sizeof(link->eh_context)); - link->eh_context.i = link->eh_info; - memset(&link->eh_info, 0, sizeof(link->eh_info)); + memset(&link->eh_context, 0, sizeof(link->eh_context)); + link->eh_context.i = link->eh_info; + memset(&link->eh_info, 0, sizeof(link->eh_info)); - ata_for_each_dev(dev, link, ENABLED) { - int devno = dev->devno; + ata_for_each_dev(dev, link, ENABLED) { + int devno = dev->devno; - ehc->saved_xfer_mode[devno] = dev->xfer_mode; - if (ata_ncq_enabled(dev)) - ehc->saved_ncq_enabled |= 1 << devno; - } + ehc->saved_xfer_mode[devno] = dev->xfer_mode; + if (ata_ncq_enabled(dev)) + ehc->saved_ncq_enabled |= 1 << devno; } + } - ap->pflags |= ATA_PFLAG_EH_IN_PROGRESS; - ap->pflags &= ~ATA_PFLAG_EH_PENDING; - ap->excl_link = NULL; /* don't maintain exclusion over EH */ + ap->pflags |= ATA_PFLAG_EH_IN_PROGRESS; + ap->pflags &= ~ATA_PFLAG_EH_PENDING; + ap->excl_link = NULL; /* don't maintain exclusion over EH */ - spin_unlock_irqrestore(ap->lock, flags); + spin_unlock_irqrestore(ap->lock, flags); - /* invoke EH, skip if unloading or suspended */ - if (!(ap->pflags & (ATA_PFLAG_UNLOADING | ATA_PFLAG_SUSPENDED))) - ap->ops->error_handler(ap); - else { - /* if unloading, commence suicide */ - if ((ap->pflags & ATA_PFLAG_UNLOADING) && - !(ap->pflags & ATA_PFLAG_UNLOADED)) - ata_eh_unload(ap); - ata_eh_finish(ap); - } + /* invoke EH, skip if unloading or suspended */ + if (!(ap->pflags & (ATA_PFLAG_UNLOADING | ATA_PFLAG_SUSPENDED))) + ap->ops->error_handler(ap); + else { + /* if unloading, commence suicide */ + if ((ap->pflags & ATA_PFLAG_UNLOADING) && + !(ap->pflags & ATA_PFLAG_UNLOADED)) + ata_eh_unload(ap); + ata_eh_finish(ap); + } - /* process port suspend request */ - ata_eh_handle_port_suspend(ap); + /* process port suspend request */ + ata_eh_handle_port_suspend(ap); - /* Exception might have happened after ->error_handler - * recovered the port but before this point. Repeat - * EH in such case. - */ - spin_lock_irqsave(ap->lock, flags); + /* + * Exception might have happened after ->error_handler recovered the + * port but before this point. Repeat EH in such case. + */ + spin_lock_irqsave(ap->lock, flags); - if (ap->pflags & ATA_PFLAG_EH_PENDING) { - if (--ap->eh_tries) { - spin_unlock_irqrestore(ap->lock, flags); - goto repeat; - } - ata_port_err(ap, - "EH pending after %d tries, giving up\n", - ATA_EH_MAX_TRIES); - ap->pflags &= ~ATA_PFLAG_EH_PENDING; + if (ap->pflags & ATA_PFLAG_EH_PENDING) { + if (--ap->eh_tries) { + spin_unlock_irqrestore(ap->lock, flags); + goto repeat; } + ata_port_err(ap, + "EH pending after %d tries, giving up\n", + ATA_EH_MAX_TRIES); + ap->pflags &= ~ATA_PFLAG_EH_PENDING; + } - /* this run is complete, make sure EH info is clear */ - ata_for_each_link(link, ap, HOST_FIRST) - memset(&link->eh_info, 0, sizeof(link->eh_info)); + /* this run is complete, make sure EH info is clear */ + ata_for_each_link(link, ap, HOST_FIRST) + memset(&link->eh_info, 0, sizeof(link->eh_info)); - /* end eh (clear host_eh_scheduled) while holding - * ap->lock such that if exception occurs after this - * point but before EH completion, SCSI midlayer will - * re-initiate EH. - */ - ap->ops->end_eh(ap); + /* + * end eh (clear host_eh_scheduled) while holding ap->lock such that if + * exception occurs after this point but before EH completion, SCSI + * midlayer will re-initiate EH. + */ + ap->ops->end_eh(ap); - spin_unlock_irqrestore(ap->lock, flags); - ata_eh_release(ap); - } else { - WARN_ON(ata_qc_from_tag(ap, ap->link.active_tag) == NULL); - ap->ops->eng_timeout(ap); - } + spin_unlock_irqrestore(ap->lock, flags); + ata_eh_release(ap); scsi_eh_flush_done_q(&ap->eh_done_q); @@ -912,8 +902,6 @@ void ata_qc_schedule_eh(struct ata_queued_cmd *qc) { struct ata_port *ap = qc->ap; - WARN_ON(!ap->ops->error_handler); - qc->flags |= ATA_QCFLAG_EH; ata_eh_set_pending(ap, 1); @@ -934,8 +922,6 @@ void ata_qc_schedule_eh(struct ata_queued_cmd *qc) */ void ata_std_sched_eh(struct ata_port *ap) { - WARN_ON(!ap->ops->error_handler); - if (ap->pflags & ATA_PFLAG_INITIALIZING) return; @@ -989,8 +975,6 @@ static int ata_do_link_abort(struct ata_port *ap, struct ata_link *link) struct ata_queued_cmd *qc; int tag, nr_aborted = 0; - WARN_ON(!ap->ops->error_handler); - /* we're gonna abort all commands, no need for fast drain */ ata_eh_set_pending(ap, 0); @@ -1065,8 +1049,6 @@ EXPORT_SYMBOL_GPL(ata_port_abort); */ static void __ata_port_freeze(struct ata_port *ap) { - WARN_ON(!ap->ops->error_handler); - if (ap->ops->freeze) ap->ops->freeze(ap); @@ -1091,8 +1073,6 @@ static void __ata_port_freeze(struct ata_port *ap) */ int ata_port_freeze(struct ata_port *ap) { - WARN_ON(!ap->ops->error_handler); - __ata_port_freeze(ap); return ata_port_abort(ap); @@ -1112,9 +1092,6 @@ void ata_eh_freeze_port(struct ata_port *ap) { unsigned long flags; - if (!ap->ops->error_handler) - return; - spin_lock_irqsave(ap->lock, flags); __ata_port_freeze(ap); spin_unlock_irqrestore(ap->lock, flags); @@ -1134,9 +1111,6 @@ void ata_eh_thaw_port(struct ata_port *ap) { unsigned long flags; - if (!ap->ops->error_handler) - return; - spin_lock_irqsave(ap->lock, flags); ap->pflags &= ~ATA_PFLAG_FROZEN; diff --git a/drivers/ata/libata-sata.c b/drivers/ata/libata-sata.c index 5d393432fa06..cf401a54c9e1 100644 --- a/drivers/ata/libata-sata.c +++ b/drivers/ata/libata-sata.c @@ -1158,12 +1158,7 @@ EXPORT_SYMBOL_GPL(ata_sas_port_alloc); */ int ata_sas_port_start(struct ata_port *ap) { - /* - * the port is marked as frozen at allocation time, but if we don't - * have new eh, we won't thaw it - */ - if (!ap->ops->error_handler) - ap->pflags &= ~ATA_PFLAG_FROZEN; + /* the port is marked as frozen at allocation time */ return 0; } EXPORT_SYMBOL_GPL(ata_sas_port_start); diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index f39d603f05bc..c3f745a4e5ea 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -709,47 +709,6 @@ static void ata_qc_set_pc_nbytes(struct ata_queued_cmd *qc) qc->nbytes = scsi_bufflen(scmd) + qc->extrabytes; } -/** - * ata_dump_status - user friendly display of error info - * @ap: the port in question - * @tf: ptr to filled out taskfile - * - * Decode and dump the ATA error/status registers for the user so - * that they have some idea what really happened at the non - * make-believe layer. - * - * LOCKING: - * inherited from caller - */ -static void ata_dump_status(struct ata_port *ap, struct ata_taskfile *tf) -{ - u8 stat = tf->status, err = tf->error; - - if (stat & ATA_BUSY) { - ata_port_warn(ap, "status=0x%02x {Busy} ", stat); - } else { - ata_port_warn(ap, "status=0x%02x { %s%s%s%s%s%s%s} ", stat, - stat & ATA_DRDY ? "DriveReady " : "", - stat & ATA_DF ? "DeviceFault " : "", - stat & ATA_DSC ? "SeekComplete " : "", - stat & ATA_DRQ ? "DataRequest " : "", - stat & ATA_CORR ? "CorrectedError " : "", - stat & ATA_SENSE ? "Sense " : "", - stat & ATA_ERR ? "Error " : ""); - if (err) - ata_port_warn(ap, "error=0x%02x {%s%s%s%s%s%s", err, - err & ATA_ABORTED ? - "DriveStatusError " : "", - err & ATA_ICRC ? - (err & ATA_ABORTED ? - "BadCRC " : "Sector ") : "", - err & ATA_UNC ? "UncorrectableError " : "", - err & ATA_IDNF ? "SectorIdNotFound " : "", - err & ATA_TRK0NF ? "TrackZeroNotFound " : "", - err & ATA_AMNF ? "AddrMarkNotFound " : ""); - } -} - /** * ata_to_sense_error - convert ATA error to SCSI error * @id: ATA device number @@ -758,7 +717,6 @@ static void ata_dump_status(struct ata_port *ap, struct ata_taskfile *tf) * @sk: the sense key we'll fill out * @asc: the additional sense code we'll fill out * @ascq: the additional sense code qualifier we'll fill out - * @verbose: be verbose * * Converts an ATA error into a SCSI error. Fill out pointers to * SK, ASC, and ASCQ bytes for later use in fixed or descriptor @@ -768,7 +726,7 @@ static void ata_dump_status(struct ata_port *ap, struct ata_taskfile *tf) * spin_lock_irqsave(host lock) */ static void ata_to_sense_error(unsigned id, u8 drv_stat, u8 drv_err, u8 *sk, - u8 *asc, u8 *ascq, int verbose) + u8 *asc, u8 *ascq) { int i; @@ -847,7 +805,7 @@ static void ata_to_sense_error(unsigned id, u8 drv_stat, u8 drv_err, u8 *sk, *sk = sense_table[i][1]; *asc = sense_table[i][2]; *ascq = sense_table[i][3]; - goto translate_done; + return; } } } @@ -862,7 +820,7 @@ static void ata_to_sense_error(unsigned id, u8 drv_stat, u8 drv_err, u8 *sk, *sk = stat_table[i][1]; *asc = stat_table[i][2]; *ascq = stat_table[i][3]; - goto translate_done; + return; } } @@ -873,12 +831,6 @@ static void ata_to_sense_error(unsigned id, u8 drv_stat, u8 drv_err, u8 *sk, *sk = ABORTED_COMMAND; *asc = 0x00; *ascq = 0x00; - - translate_done: - if (verbose) - pr_err("ata%u: translated ATA stat/err 0x%02x/%02x to SCSI SK/ASC/ASCQ 0x%x/%02x/%02x\n", - id, drv_stat, drv_err, *sk, *asc, *ascq); - return; } /* @@ -904,7 +856,6 @@ static void ata_gen_passthru_sense(struct ata_queued_cmd *qc) struct ata_taskfile *tf = &qc->result_tf; unsigned char *sb = cmd->sense_buffer; unsigned char *desc = sb + 8; - int verbose = qc->ap->ops->error_handler == NULL; u8 sense_key, asc, ascq; memset(sb, 0, SCSI_SENSE_BUFFERSIZE); @@ -916,7 +867,7 @@ static void ata_gen_passthru_sense(struct ata_queued_cmd *qc) if (qc->err_mask || tf->status & (ATA_BUSY | ATA_DF | ATA_ERR | ATA_DRQ)) { ata_to_sense_error(qc->ap->print_id, tf->status, tf->error, - &sense_key, &asc, &ascq, verbose); + &sense_key, &asc, &ascq); ata_scsi_set_sense(qc->dev, cmd, sense_key, asc, ascq); } else { /* @@ -999,7 +950,6 @@ static void ata_gen_ata_sense(struct ata_queued_cmd *qc) struct scsi_cmnd *cmd = qc->scsicmd; struct ata_taskfile *tf = &qc->result_tf; unsigned char *sb = cmd->sense_buffer; - int verbose = qc->ap->ops->error_handler == NULL; u64 block; u8 sense_key, asc, ascq; @@ -1017,7 +967,7 @@ static void ata_gen_ata_sense(struct ata_queued_cmd *qc) if (qc->err_mask || tf->status & (ATA_BUSY | ATA_DF | ATA_ERR | ATA_DRQ)) { ata_to_sense_error(qc->ap->print_id, tf->status, tf->error, - &sense_key, &asc, &ascq, verbose); + &sense_key, &asc, &ascq); ata_scsi_set_sense(dev, cmd, sense_key, asc, ascq); } else { /* Could not decode error */ @@ -1179,9 +1129,6 @@ void ata_scsi_slave_destroy(struct scsi_device *sdev) unsigned long flags; struct ata_device *dev; - if (!ap->ops->error_handler) - return; - spin_lock_irqsave(ap->lock, flags); dev = __ata_scsi_find_dev(ap, sdev); if (dev && dev->sdev) { @@ -1668,7 +1615,6 @@ static void ata_qc_done(struct ata_queued_cmd *qc) static void ata_scsi_qc_complete(struct ata_queued_cmd *qc) { - struct ata_port *ap = qc->ap; struct scsi_cmnd *cmd = qc->scsicmd; u8 *cdb = cmd->cmnd; int need_sense = (qc->err_mask != 0) && @@ -1692,9 +1638,6 @@ static void ata_scsi_qc_complete(struct ata_queued_cmd *qc) /* Keep the SCSI ML and status byte, clear host byte. */ cmd->result &= 0x0000ffff; - if (need_sense && !ap->ops->error_handler) - ata_dump_status(ap, &qc->result_tf); - ata_qc_done(qc); } @@ -2601,71 +2544,6 @@ static unsigned int ata_scsiop_report_luns(struct ata_scsi_args *args, u8 *rbuf) return 0; } -static void atapi_sense_complete(struct ata_queued_cmd *qc) -{ - if (qc->err_mask && ((qc->err_mask & AC_ERR_DEV) == 0)) { - /* FIXME: not quite right; we don't want the - * translation of taskfile registers into - * a sense descriptors, since that's only - * correct for ATA, not ATAPI - */ - ata_gen_passthru_sense(qc); - } - - ata_qc_done(qc); -} - -/* is it pointless to prefer PIO for "safety reasons"? */ -static inline int ata_pio_use_silly(struct ata_port *ap) -{ - return (ap->flags & ATA_FLAG_PIO_DMA); -} - -static void atapi_request_sense(struct ata_queued_cmd *qc) -{ - struct ata_port *ap = qc->ap; - struct scsi_cmnd *cmd = qc->scsicmd; - - memset(cmd->sense_buffer, 0, SCSI_SENSE_BUFFERSIZE); - -#ifdef CONFIG_ATA_SFF - if (ap->ops->sff_tf_read) - ap->ops->sff_tf_read(ap, &qc->tf); -#endif - - /* fill these in, for the case where they are -not- overwritten */ - cmd->sense_buffer[0] = 0x70; - cmd->sense_buffer[2] = qc->tf.error >> 4; - - ata_qc_reinit(qc); - - /* setup sg table and init transfer direction */ - sg_init_one(&qc->sgent, cmd->sense_buffer, SCSI_SENSE_BUFFERSIZE); - ata_sg_init(qc, &qc->sgent, 1); - qc->dma_dir = DMA_FROM_DEVICE; - - memset(&qc->cdb, 0, qc->dev->cdb_len); - qc->cdb[0] = REQUEST_SENSE; - qc->cdb[4] = SCSI_SENSE_BUFFERSIZE; - - qc->tf.flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE; - qc->tf.command = ATA_CMD_PACKET; - - if (ata_pio_use_silly(ap)) { - qc->tf.protocol = ATAPI_PROT_DMA; - qc->tf.feature |= ATAPI_PKT_DMA; - } else { - qc->tf.protocol = ATAPI_PROT_PIO; - qc->tf.lbam = SCSI_SENSE_BUFFERSIZE; - qc->tf.lbah = 0; - } - qc->nbytes = SCSI_SENSE_BUFFERSIZE; - - qc->complete_fn = atapi_sense_complete; - - ata_qc_issue(qc); -} - /* * ATAPI devices typically report zero for their SCSI version, and sometimes * deviate from the spec WRT response data format. If SCSI version is @@ -2691,9 +2569,8 @@ static void atapi_qc_complete(struct ata_queued_cmd *qc) struct scsi_cmnd *cmd = qc->scsicmd; unsigned int err_mask = qc->err_mask; - /* handle completion from new EH */ - if (unlikely(qc->ap->ops->error_handler && - (err_mask || qc->flags & ATA_QCFLAG_SENSE_VALID))) { + /* handle completion from EH */ + if (unlikely(err_mask || qc->flags & ATA_QCFLAG_SENSE_VALID)) { if (!(qc->flags & ATA_QCFLAG_SENSE_VALID)) { /* FIXME: not quite right; we don't want the @@ -2725,23 +2602,10 @@ static void atapi_qc_complete(struct ata_queued_cmd *qc) return; } - /* successful completion or old EH failure path */ - if (unlikely(err_mask & AC_ERR_DEV)) { - cmd->result = SAM_STAT_CHECK_CONDITION; - atapi_request_sense(qc); - return; - } else if (unlikely(err_mask)) { - /* FIXME: not quite right; we don't want the - * translation of taskfile registers into - * a sense descriptors, since that's only - * correct for ATA, not ATAPI - */ - ata_gen_passthru_sense(qc); - } else { - if (cmd->cmnd[0] == INQUIRY && (cmd->cmnd[1] & 0x03) == 0) - atapi_fixup_inquiry(cmd); - cmd->result = SAM_STAT_GOOD; - } + /* successful completion path */ + if (cmd->cmnd[0] == INQUIRY && (cmd->cmnd[1] & 0x03) == 0) + atapi_fixup_inquiry(cmd); + cmd->result = SAM_STAT_GOOD; ata_qc_done(qc); } @@ -4790,9 +4654,6 @@ int ata_scsi_user_scan(struct Scsi_Host *shost, unsigned int channel, unsigned long flags; int devno, rc = 0; - if (!ap->ops->error_handler) - return -EOPNOTSUPP; - if (lun != SCAN_WILD_CARD && lun) return -EINVAL; diff --git a/drivers/ata/libata-sff.c b/drivers/ata/libata-sff.c index ac55dfc2d85f..8fcc622fcb3d 100644 --- a/drivers/ata/libata-sff.c +++ b/drivers/ata/libata-sff.c @@ -883,31 +883,21 @@ static void ata_hsm_qc_complete(struct ata_queued_cmd *qc, int in_wq) { struct ata_port *ap = qc->ap; - if (ap->ops->error_handler) { - if (in_wq) { - /* EH might have kicked in while host lock is - * released. - */ - qc = ata_qc_from_tag(ap, qc->tag); - if (qc) { - if (likely(!(qc->err_mask & AC_ERR_HSM))) { - ata_sff_irq_on(ap); - ata_qc_complete(qc); - } else - ata_port_freeze(ap); - } - } else { - if (likely(!(qc->err_mask & AC_ERR_HSM))) + if (in_wq) { + /* EH might have kicked in while host lock is released. */ + qc = ata_qc_from_tag(ap, qc->tag); + if (qc) { + if (likely(!(qc->err_mask & AC_ERR_HSM))) { + ata_sff_irq_on(ap); ata_qc_complete(qc); - else + } else ata_port_freeze(ap); } } else { - if (in_wq) { - ata_sff_irq_on(ap); - ata_qc_complete(qc); - } else + if (likely(!(qc->err_mask & AC_ERR_HSM))) ata_qc_complete(qc); + else + ata_port_freeze(ap); } } diff --git a/include/linux/libata.h b/include/linux/libata.h index 8d510fb00591..d61e5465076e 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -1785,7 +1785,7 @@ static inline struct ata_queued_cmd *ata_qc_from_tag(struct ata_port *ap, { struct ata_queued_cmd *qc = __ata_qc_from_tag(ap, tag); - if (unlikely(!qc) || !ap->ops->error_handler) + if (unlikely(!qc)) return qc; if ((qc->flags & (ATA_QCFLAG_ACTIVE | -- cgit v1.2.3 From 43aa43351bb551a7732c1b9f6e8ebb9a6f30b063 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Mon, 31 Jul 2023 16:34:13 +0200 Subject: ata,scsi: remove ata_sas_port_{start,stop} callbacks Callbacks are empty now, so remove them. Also, remove the call to ap->ops->port_start() in ata_sas_port_init(), as this would otherwise cause a NULL pointer dereference, now when the callback is gone. Signed-off-by: Hannes Reinecke [niklas: remove the call to ap->ops->port_start() in ata_sas_port_init()] Signed-off-by: Niklas Cassel Reviewed-by: Jason Yan Reviewed-by: John Garry Reviewed-by: Martin K. Petersen Signed-off-by: Damien Le Moal --- drivers/ata/libata-sata.c | 38 -------------------------------------- drivers/scsi/libsas/sas_ata.c | 2 -- include/linux/libata.h | 2 -- 3 files changed, 42 deletions(-) (limited to 'include/linux') diff --git a/drivers/ata/libata-sata.c b/drivers/ata/libata-sata.c index cf401a54c9e1..ce392b5b5930 100644 --- a/drivers/ata/libata-sata.c +++ b/drivers/ata/libata-sata.c @@ -1144,40 +1144,6 @@ struct ata_port *ata_sas_port_alloc(struct ata_host *host, } EXPORT_SYMBOL_GPL(ata_sas_port_alloc); -/** - * ata_sas_port_start - Set port up for dma. - * @ap: Port to initialize - * - * Called just after data structures for each port are - * initialized. - * - * May be used as the port_start() entry in ata_port_operations. - * - * LOCKING: - * Inherited from caller. - */ -int ata_sas_port_start(struct ata_port *ap) -{ - /* the port is marked as frozen at allocation time */ - return 0; -} -EXPORT_SYMBOL_GPL(ata_sas_port_start); - -/** - * ata_sas_port_stop - Undo ata_sas_port_start() - * @ap: Port to shut down - * - * May be used as the port_stop() entry in ata_port_operations. - * - * LOCKING: - * Inherited from caller. - */ - -void ata_sas_port_stop(struct ata_port *ap) -{ -} -EXPORT_SYMBOL_GPL(ata_sas_port_stop); - /** * ata_sas_async_probe - simply schedule probing and return * @ap: Port to probe @@ -1211,10 +1177,6 @@ EXPORT_SYMBOL_GPL(ata_sas_sync_probe); int ata_sas_port_init(struct ata_port *ap) { - int rc = ap->ops->port_start(ap); - - if (rc) - return rc; ap->print_id = atomic_inc_return(&ata_print_id); return 0; } diff --git a/drivers/scsi/libsas/sas_ata.c b/drivers/scsi/libsas/sas_ata.c index 77714a495cbb..7ead1f1be97f 100644 --- a/drivers/scsi/libsas/sas_ata.c +++ b/drivers/scsi/libsas/sas_ata.c @@ -565,8 +565,6 @@ static struct ata_port_operations sas_sata_ops = { .qc_prep = ata_noop_qc_prep, .qc_issue = sas_ata_qc_issue, .qc_fill_rtf = sas_ata_qc_fill_rtf, - .port_start = ata_sas_port_start, - .port_stop = ata_sas_port_stop, .set_dmamode = sas_ata_set_dmamode, .sched_eh = sas_ata_sched_eh, .end_eh = sas_ata_end_eh, diff --git a/include/linux/libata.h b/include/linux/libata.h index d61e5465076e..f8f8b558a8be 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -1244,10 +1244,8 @@ extern struct ata_port *ata_sas_port_alloc(struct ata_host *, extern void ata_sas_async_probe(struct ata_port *ap); extern int ata_sas_sync_probe(struct ata_port *ap); extern int ata_sas_port_init(struct ata_port *); -extern int ata_sas_port_start(struct ata_port *ap); extern int ata_sas_tport_add(struct device *parent, struct ata_port *ap); extern void ata_sas_tport_delete(struct ata_port *ap); -extern void ata_sas_port_stop(struct ata_port *ap); extern int ata_sas_slave_configure(struct scsi_device *, struct ata_port *); extern int ata_sas_queuecmd(struct scsi_cmnd *cmd, struct ata_port *ap); extern void ata_tf_to_fis(const struct ata_taskfile *tf, -- cgit v1.2.3 From 6c2fe21e08c28bbdb53ada668edea7df1aa9fb1e Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Mon, 31 Jul 2023 16:34:14 +0200 Subject: ata,scsi: remove ata_sas_port_destroy() Is now a wrapper around kfree(), so call it directly. Signed-off-by: Hannes Reinecke Signed-off-by: Niklas Cassel Reviewed-by: John Garry Reviewed-by: Jason Yan Reviewed-by: Martin K. Petersen Signed-off-by: Damien Le Moal --- drivers/ata/libata-sata.c | 14 -------------- drivers/scsi/libsas/sas_ata.c | 2 +- drivers/scsi/libsas/sas_discover.c | 2 +- include/linux/libata.h | 1 - 4 files changed, 2 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/drivers/ata/libata-sata.c b/drivers/ata/libata-sata.c index ce392b5b5930..c149a46fadaf 100644 --- a/drivers/ata/libata-sata.c +++ b/drivers/ata/libata-sata.c @@ -1194,20 +1194,6 @@ void ata_sas_tport_delete(struct ata_port *ap) } EXPORT_SYMBOL_GPL(ata_sas_tport_delete); -/** - * ata_sas_port_destroy - Destroy a SATA port allocated by ata_sas_port_alloc - * @ap: SATA port to destroy - * - */ - -void ata_sas_port_destroy(struct ata_port *ap) -{ - if (ap->ops->port_stop) - ap->ops->port_stop(ap); - kfree(ap); -} -EXPORT_SYMBOL_GPL(ata_sas_port_destroy); - /** * ata_sas_slave_configure - Default slave_config routine for libata devices * @sdev: SCSI device to configure diff --git a/drivers/scsi/libsas/sas_ata.c b/drivers/scsi/libsas/sas_ata.c index 7ead1f1be97f..a2eb9a2191c0 100644 --- a/drivers/scsi/libsas/sas_ata.c +++ b/drivers/scsi/libsas/sas_ata.c @@ -619,7 +619,7 @@ int sas_ata_init(struct domain_device *found_dev) return 0; destroy_port: - ata_sas_port_destroy(ap); + kfree(ap); free_host: ata_host_put(ata_host); return rc; diff --git a/drivers/scsi/libsas/sas_discover.c b/drivers/scsi/libsas/sas_discover.c index 8c6afe724944..07e18cdb85c7 100644 --- a/drivers/scsi/libsas/sas_discover.c +++ b/drivers/scsi/libsas/sas_discover.c @@ -301,7 +301,7 @@ void sas_free_device(struct kref *kref) if (dev_is_sata(dev) && dev->sata_dev.ap) { ata_sas_tport_delete(dev->sata_dev.ap); - ata_sas_port_destroy(dev->sata_dev.ap); + kfree(dev->sata_dev.ap); ata_host_put(dev->sata_dev.ata_host); dev->sata_dev.ata_host = NULL; dev->sata_dev.ap = NULL; diff --git a/include/linux/libata.h b/include/linux/libata.h index f8f8b558a8be..1d1fd16a1492 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -1238,7 +1238,6 @@ extern int sata_link_debounce(struct ata_link *link, extern int sata_link_scr_lpm(struct ata_link *link, enum ata_lpm_policy policy, bool spm_wakeup); extern int ata_slave_link_init(struct ata_port *ap); -extern void ata_sas_port_destroy(struct ata_port *); extern struct ata_port *ata_sas_port_alloc(struct ata_host *, struct ata_port_info *, struct Scsi_Host *); extern void ata_sas_async_probe(struct ata_port *ap); -- cgit v1.2.3 From 8ac161ea2b3709b789c192de7da31a58aec9d7ee Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Mon, 31 Jul 2023 16:34:15 +0200 Subject: ata: libata-sata: remove ata_sas_sync_probe() Unused. Signed-off-by: Hannes Reinecke Signed-off-by: Niklas Cassel Reviewed-by: Jason Yan Reviewed-by: John Garry Reviewed-by: Martin K. Petersen Signed-off-by: Damien Le Moal --- drivers/ata/libata-sata.c | 7 ------- include/linux/libata.h | 1 - 2 files changed, 8 deletions(-) (limited to 'include/linux') diff --git a/drivers/ata/libata-sata.c b/drivers/ata/libata-sata.c index c149a46fadaf..04ab2bedcaa2 100644 --- a/drivers/ata/libata-sata.c +++ b/drivers/ata/libata-sata.c @@ -1157,13 +1157,6 @@ void ata_sas_async_probe(struct ata_port *ap) } EXPORT_SYMBOL_GPL(ata_sas_async_probe); -int ata_sas_sync_probe(struct ata_port *ap) -{ - return ata_port_probe(ap); -} -EXPORT_SYMBOL_GPL(ata_sas_sync_probe); - - /** * ata_sas_port_init - Initialize a SATA device * @ap: SATA port to initialize diff --git a/include/linux/libata.h b/include/linux/libata.h index 1d1fd16a1492..d08dde8c8ad2 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -1241,7 +1241,6 @@ extern int ata_slave_link_init(struct ata_port *ap); extern struct ata_port *ata_sas_port_alloc(struct ata_host *, struct ata_port_info *, struct Scsi_Host *); extern void ata_sas_async_probe(struct ata_port *ap); -extern int ata_sas_sync_probe(struct ata_port *ap); extern int ata_sas_port_init(struct ata_port *); extern int ata_sas_tport_add(struct device *parent, struct ata_port *ap); extern void ata_sas_tport_delete(struct ata_port *ap); -- cgit v1.2.3 From a76f1b637ce92c2b8d5da912826079010b11f138 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Mon, 31 Jul 2023 16:34:17 +0200 Subject: ata,scsi: cleanup __ata_port_probe() Rename __ata_port_probe() to ata_port_probe() and drop the wrapper ata_sas_async_probe(). Signed-off-by: Hannes Reinecke Signed-off-by: Niklas Cassel Reviewed-by: Jason Yan Reviewed-by: John Garry Reviewed-by: Martin K. Petersen Signed-off-by: Damien Le Moal --- drivers/ata/libata-core.c | 5 +++-- drivers/ata/libata-sata.c | 13 ------------- drivers/ata/libata.h | 2 -- drivers/scsi/hisi_sas/hisi_sas_main.c | 2 +- drivers/scsi/libsas/sas_ata.c | 2 +- include/linux/libata.h | 2 +- 6 files changed, 6 insertions(+), 20 deletions(-) (limited to 'include/linux') diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index c6e624d27f46..fdd4804d0bf2 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -5865,7 +5865,7 @@ void ata_host_init(struct ata_host *host, struct device *dev, } EXPORT_SYMBOL_GPL(ata_host_init); -void __ata_port_probe(struct ata_port *ap) +void ata_port_probe(struct ata_port *ap) { struct ata_eh_info *ehi = &ap->link.eh_info; unsigned long flags; @@ -5883,6 +5883,7 @@ void __ata_port_probe(struct ata_port *ap) spin_unlock_irqrestore(ap->lock, flags); } +EXPORT_SYMBOL_GPL(ata_port_probe); static void async_port_probe(void *data, async_cookie_t cookie) { @@ -5898,7 +5899,7 @@ static void async_port_probe(void *data, async_cookie_t cookie) if (!(ap->host->flags & ATA_HOST_PARALLEL_SCAN) && ap->port_no != 0) async_synchronize_cookie(cookie); - __ata_port_probe(ap); + ata_port_probe(ap); ata_port_wait_eh(ap); /* in order to keep device order, we need to synchronize at this point */ diff --git a/drivers/ata/libata-sata.c b/drivers/ata/libata-sata.c index 04ab2bedcaa2..eb81641d383a 100644 --- a/drivers/ata/libata-sata.c +++ b/drivers/ata/libata-sata.c @@ -1144,19 +1144,6 @@ struct ata_port *ata_sas_port_alloc(struct ata_host *host, } EXPORT_SYMBOL_GPL(ata_sas_port_alloc); -/** - * ata_sas_async_probe - simply schedule probing and return - * @ap: Port to probe - * - * For batch scheduling of probe for sas attached ata devices, assumes - * the port has already been through ata_sas_port_init() - */ -void ata_sas_async_probe(struct ata_port *ap) -{ - __ata_port_probe(ap); -} -EXPORT_SYMBOL_GPL(ata_sas_async_probe); - /** * ata_sas_port_init - Initialize a SATA device * @ap: SATA port to initialize diff --git a/drivers/ata/libata.h b/drivers/ata/libata.h index cf993885d2b2..1ec9b4427b84 100644 --- a/drivers/ata/libata.h +++ b/drivers/ata/libata.h @@ -78,8 +78,6 @@ extern int ata_task_ioctl(struct scsi_device *scsidev, void __user *arg); extern int ata_cmd_ioctl(struct scsi_device *scsidev, void __user *arg); extern struct ata_port *ata_port_alloc(struct ata_host *host); extern const char *sata_spd_string(unsigned int spd); -extern int ata_port_probe(struct ata_port *ap); -extern void __ata_port_probe(struct ata_port *ap); extern unsigned int ata_read_log_page(struct ata_device *dev, u8 log, u8 page, void *buf, unsigned int sectors); diff --git a/drivers/scsi/hisi_sas/hisi_sas_main.c b/drivers/scsi/hisi_sas/hisi_sas_main.c index 8f22ece957bd..b2f07c6f30e7 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_main.c +++ b/drivers/scsi/hisi_sas/hisi_sas_main.c @@ -787,7 +787,7 @@ static int hisi_sas_init_device(struct domain_device *device) * However we don't need to issue a hard reset here for these * reasons: * a. When probing the device, libsas/libata already issues a - * hard reset in sas_probe_sata() -> ata_sas_async_probe(). + * hard reset in sas_probe_sata() -> ata_port_probe(). * Note that in hisi_sas_debug_I_T_nexus_reset() we take care * to issue a hard reset by checking the dev status (== INIT). * b. When resetting the controller, this is simply unnecessary. diff --git a/drivers/scsi/libsas/sas_ata.c b/drivers/scsi/libsas/sas_ata.c index a2eb9a2191c0..d6bb37b3974a 100644 --- a/drivers/scsi/libsas/sas_ata.c +++ b/drivers/scsi/libsas/sas_ata.c @@ -653,7 +653,7 @@ void sas_probe_sata(struct asd_sas_port *port) if (!dev_is_sata(dev)) continue; - ata_sas_async_probe(dev->sata_dev.ap); + ata_port_probe(dev->sata_dev.ap); } mutex_unlock(&port->ha->disco_mutex); diff --git a/include/linux/libata.h b/include/linux/libata.h index d08dde8c8ad2..7468a330fc77 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -1240,7 +1240,7 @@ extern int sata_link_scr_lpm(struct ata_link *link, enum ata_lpm_policy policy, extern int ata_slave_link_init(struct ata_port *ap); extern struct ata_port *ata_sas_port_alloc(struct ata_host *, struct ata_port_info *, struct Scsi_Host *); -extern void ata_sas_async_probe(struct ata_port *ap); +extern void ata_port_probe(struct ata_port *ap); extern int ata_sas_port_init(struct ata_port *); extern int ata_sas_tport_add(struct device *parent, struct ata_port *ap); extern void ata_sas_tport_delete(struct ata_port *ap); -- cgit v1.2.3 From 541528170a5cb1342c378dfd46b04dfe024dbc7a Mon Sep 17 00:00:00 2001 From: Niklas Cassel Date: Mon, 31 Jul 2023 16:34:18 +0200 Subject: ata,scsi: remove ata_sas_port_init() ata_sas_port_init() now only contains a single initialization. Move this single initialization to ata_sas_port_alloc(), since: 1) ata_sas_port_alloc() already initializes some of the struct members. 2) ata_sas_port_alloc() is only used by libsas. Suggested-by: John Garry Signed-off-by: Niklas Cassel Reviewed-by: John Garry Reviewed-by: Martin K. Petersen Signed-off-by: Damien Le Moal --- drivers/ata/libata-sata.c | 19 +------------------ drivers/scsi/libsas/sas_ata.c | 3 --- include/linux/libata.h | 1 - 3 files changed, 1 insertion(+), 22 deletions(-) (limited to 'include/linux') diff --git a/drivers/ata/libata-sata.c b/drivers/ata/libata-sata.c index eb81641d383a..5d31c08be013 100644 --- a/drivers/ata/libata-sata.c +++ b/drivers/ata/libata-sata.c @@ -1139,29 +1139,12 @@ struct ata_port *ata_sas_port_alloc(struct ata_host *host, ap->flags |= port_info->flags; ap->ops = port_info->port_ops; ap->cbl = ATA_CBL_SATA; + ap->print_id = atomic_inc_return(&ata_print_id); return ap; } EXPORT_SYMBOL_GPL(ata_sas_port_alloc); -/** - * ata_sas_port_init - Initialize a SATA device - * @ap: SATA port to initialize - * - * LOCKING: - * PCI/etc. bus probe sem. - * - * RETURNS: - * Zero on success, non-zero on error. - */ - -int ata_sas_port_init(struct ata_port *ap) -{ - ap->print_id = atomic_inc_return(&ata_print_id); - return 0; -} -EXPORT_SYMBOL_GPL(ata_sas_port_init); - int ata_sas_tport_add(struct device *parent, struct ata_port *ap) { return ata_tport_add(parent, ap); diff --git a/drivers/scsi/libsas/sas_ata.c b/drivers/scsi/libsas/sas_ata.c index d6bb37b3974a..cd16a1ac379d 100644 --- a/drivers/scsi/libsas/sas_ata.c +++ b/drivers/scsi/libsas/sas_ata.c @@ -605,9 +605,6 @@ int sas_ata_init(struct domain_device *found_dev) ap->private_data = found_dev; ap->cbl = ATA_CBL_SATA; ap->scsi_host = shost; - rc = ata_sas_port_init(ap); - if (rc) - goto destroy_port; rc = ata_sas_tport_add(ata_host->dev, ap); if (rc) diff --git a/include/linux/libata.h b/include/linux/libata.h index 7468a330fc77..0980992c54c2 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -1241,7 +1241,6 @@ extern int ata_slave_link_init(struct ata_port *ap); extern struct ata_port *ata_sas_port_alloc(struct ata_host *, struct ata_port_info *, struct Scsi_Host *); extern void ata_port_probe(struct ata_port *ap); -extern int ata_sas_port_init(struct ata_port *); extern int ata_sas_tport_add(struct device *parent, struct ata_port *ap); extern void ata_sas_tport_delete(struct ata_port *ap); extern int ata_sas_slave_configure(struct scsi_device *, struct ata_port *); -- cgit v1.2.3 From 89329c7384ef56c407269157e30e781f55c3c4d2 Mon Sep 17 00:00:00 2001 From: Niklas Cassel Date: Mon, 31 Jul 2023 16:34:20 +0200 Subject: ata: libata-core: remove ata_bus_probe() Remove ata_bus_probe() as it is unused. Also, remove references to ata_bus_probe and port_disable in Documentation/driver-api/libata.rst, as neither exist anymore. Signed-off-by: Niklas Cassel Reviewed-by: John Garry Reviewed-by: Jason Yan Reviewed-by: Martin K. Petersen Signed-off-by: Damien Le Moal --- Documentation/driver-api/libata.rst | 16 ----- drivers/ata/libata-core.c | 138 ------------------------------------ drivers/ata/libata.h | 1 - include/linux/libata.h | 1 - 4 files changed, 156 deletions(-) (limited to 'include/linux') diff --git a/Documentation/driver-api/libata.rst b/Documentation/driver-api/libata.rst index 311af516a3fd..eecb8b81e185 100644 --- a/Documentation/driver-api/libata.rst +++ b/Documentation/driver-api/libata.rst @@ -32,22 +32,6 @@ register blocks. :c:type:`struct ata_port_operations ` ---------------------------------------------------------- -Disable ATA port -~~~~~~~~~~~~~~~~ - -:: - - void (*port_disable) (struct ata_port *); - - -Called from :c:func:`ata_bus_probe` error path, as well as when unregistering -from the SCSI module (rmmod, hot unplug). This function should do -whatever needs to be done to take the port out of use. In most cases, -:c:func:`ata_port_disable` can be used as this hook. - -Called from :c:func:`ata_bus_probe` on a failed probe. Called from -:c:func:`ata_scsi_release`. - Post-IDENTIFY device configuration ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index fdd4804d0bf2..988d1fc451be 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -3057,144 +3057,6 @@ int ata_cable_sata(struct ata_port *ap) } EXPORT_SYMBOL_GPL(ata_cable_sata); -/** - * ata_bus_probe - Reset and probe ATA bus - * @ap: Bus to probe - * - * Master ATA bus probing function. Initiates a hardware-dependent - * bus reset, then attempts to identify any devices found on - * the bus. - * - * LOCKING: - * PCI/etc. bus probe sem. - * - * RETURNS: - * Zero on success, negative errno otherwise. - */ - -int ata_bus_probe(struct ata_port *ap) -{ - unsigned int classes[ATA_MAX_DEVICES]; - int tries[ATA_MAX_DEVICES]; - int rc; - struct ata_device *dev; - - ata_for_each_dev(dev, &ap->link, ALL) - tries[dev->devno] = ATA_PROBE_MAX_TRIES; - - retry: - ata_for_each_dev(dev, &ap->link, ALL) { - /* If we issue an SRST then an ATA drive (not ATAPI) - * may change configuration and be in PIO0 timing. If - * we do a hard reset (or are coming from power on) - * this is true for ATA or ATAPI. Until we've set a - * suitable controller mode we should not touch the - * bus as we may be talking too fast. - */ - dev->pio_mode = XFER_PIO_0; - dev->dma_mode = 0xff; - - /* If the controller has a pio mode setup function - * then use it to set the chipset to rights. Don't - * touch the DMA setup as that will be dealt with when - * configuring devices. - */ - if (ap->ops->set_piomode) - ap->ops->set_piomode(ap, dev); - } - - /* reset and determine device classes */ - ap->ops->phy_reset(ap); - - ata_for_each_dev(dev, &ap->link, ALL) { - if (dev->class != ATA_DEV_UNKNOWN) - classes[dev->devno] = dev->class; - else - classes[dev->devno] = ATA_DEV_NONE; - - dev->class = ATA_DEV_UNKNOWN; - } - - /* read IDENTIFY page and configure devices. We have to do the identify - specific sequence bass-ackwards so that PDIAG- is released by - the slave device */ - - ata_for_each_dev(dev, &ap->link, ALL_REVERSE) { - if (tries[dev->devno]) - dev->class = classes[dev->devno]; - - if (!ata_dev_enabled(dev)) - continue; - - rc = ata_dev_read_id(dev, &dev->class, ATA_READID_POSTRESET, - dev->id); - if (rc) - goto fail; - } - - /* Now ask for the cable type as PDIAG- should have been released */ - if (ap->ops->cable_detect) - ap->cbl = ap->ops->cable_detect(ap); - - /* We may have SATA bridge glue hiding here irrespective of - * the reported cable types and sensed types. When SATA - * drives indicate we have a bridge, we don't know which end - * of the link the bridge is which is a problem. - */ - ata_for_each_dev(dev, &ap->link, ENABLED) - if (ata_id_is_sata(dev->id)) - ap->cbl = ATA_CBL_SATA; - - /* After the identify sequence we can now set up the devices. We do - this in the normal order so that the user doesn't get confused */ - - ata_for_each_dev(dev, &ap->link, ENABLED) { - ap->link.eh_context.i.flags |= ATA_EHI_PRINTINFO; - rc = ata_dev_configure(dev); - ap->link.eh_context.i.flags &= ~ATA_EHI_PRINTINFO; - if (rc) - goto fail; - } - - /* configure transfer mode */ - rc = ata_set_mode(&ap->link, &dev); - if (rc) - goto fail; - - ata_for_each_dev(dev, &ap->link, ENABLED) - return 0; - - return -ENODEV; - - fail: - tries[dev->devno]--; - - switch (rc) { - case -EINVAL: - /* eeek, something went very wrong, give up */ - tries[dev->devno] = 0; - break; - - case -ENODEV: - /* give it just one more chance */ - tries[dev->devno] = min(tries[dev->devno], 1); - fallthrough; - case -EIO: - if (tries[dev->devno] == 1) { - /* This is the last chance, better to slow - * down than lose it. - */ - sata_down_spd_limit(&ap->link, 0); - ata_down_xfermask_limit(dev, ATA_DNXFER_PIO); - } - } - - if (!tries[dev->devno]) - ata_dev_disable(dev); - - goto retry; -} - /** * sata_print_link_status - Print SATA link status * @link: SATA link to printk link status about diff --git a/drivers/ata/libata.h b/drivers/ata/libata.h index 1ec9b4427b84..6e7d352803bd 100644 --- a/drivers/ata/libata.h +++ b/drivers/ata/libata.h @@ -122,7 +122,6 @@ extern void ata_scsi_media_change_notify(struct ata_device *dev); extern void ata_scsi_hotplug(struct work_struct *work); extern void ata_schedule_scsi_eh(struct Scsi_Host *shost); extern void ata_scsi_dev_rescan(struct work_struct *work); -extern int ata_bus_probe(struct ata_port *ap); extern int ata_scsi_user_scan(struct Scsi_Host *shost, unsigned int channel, unsigned int id, u64 lun); void ata_scsi_sdev_config(struct scsi_device *sdev); diff --git a/include/linux/libata.h b/include/linux/libata.h index 0980992c54c2..e176d832467d 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -344,7 +344,6 @@ enum { ATA_LINK_RESUME_TRIES = 5, /* how hard are we gonna try to probe/recover devices */ - ATA_PROBE_MAX_TRIES = 3, ATA_EH_DEV_TRIES = 3, ATA_EH_PMP_TRIES = 5, ATA_EH_PMP_LINK_TRIES = 3, -- cgit v1.2.3 From 6b4f165e0858016f7bb5f360966882a819519f07 Mon Sep 17 00:00:00 2001 From: Niklas Cassel Date: Mon, 31 Jul 2023 16:34:21 +0200 Subject: ata: libata: remove deprecated EH callbacks Now that all libata drivers have migrated to use the error_handler callback, remove the deprecated phy_reset and eng_timeout callbacks. Also remove references to non-existent functions sata_phy_reset and ata_qc_timeout from Documentation/driver-api/libata.rst. Signed-off-by: Niklas Cassel Reviewed-by: John Garry Reviewed-by: Sergey Shtylyov Reviewed-by: Jason Yan Reviewed-by: Martin K. Petersen Signed-off-by: Damien Le Moal --- Documentation/driver-api/libata.rst | 22 ++++++---------------- drivers/ata/pata_sl82c105.c | 3 +-- include/linux/libata.h | 6 ------ 3 files changed, 7 insertions(+), 24 deletions(-) (limited to 'include/linux') diff --git a/Documentation/driver-api/libata.rst b/Documentation/driver-api/libata.rst index eecb8b81e185..5da27a749246 100644 --- a/Documentation/driver-api/libata.rst +++ b/Documentation/driver-api/libata.rst @@ -256,14 +256,6 @@ advanced drivers implement their own ``->qc_issue``. Exception and probe handling (EH) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -:: - - void (*eng_timeout) (struct ata_port *ap); - void (*phy_reset) (struct ata_port *ap); - - -Deprecated. Use ``->error_handler()`` instead. - :: void (*freeze) (struct ata_port *ap); @@ -348,8 +340,7 @@ SATA phy read/write u32 val); -Read and write standard SATA phy registers. Currently only used if -``->phy_reset`` hook called the :c:func:`sata_phy_reset` helper function. +Read and write standard SATA phy registers. sc_reg is one of SCR_STATUS, SCR_CONTROL, SCR_ERROR, or SCR_ACTIVE. Init and shutdown @@ -520,13 +511,12 @@ to return without deallocating the qc. This leads us to :c:func:`ata_scsi_error` is the current ``transportt->eh_strategy_handler()`` for libata. As discussed above, this will be entered in two cases - -timeout and ATAPI error completion. This function calls low level libata -driver's :c:func:`eng_timeout` callback, the standard callback for which is -:c:func:`ata_eng_timeout`. It checks if a qc is active and calls -:c:func:`ata_qc_timeout` on the qc if so. Actual error handling occurs in -:c:func:`ata_qc_timeout`. +timeout and ATAPI error completion. This function will check if a qc is active +and has not failed yet. Such a qc will be marked with AC_ERR_TIMEOUT such that +EH will know to handle it later. Then it calls low level libata driver's +:c:func:`error_handler` callback. -If EH is invoked for timeout, :c:func:`ata_qc_timeout` stops BMDMA and +When the :c:func:`error_handler` callback is invoked it stops BMDMA and completes the qc. Note that as we're currently in EH, we cannot call scsi_done. As described in SCSI EH doc, a recovered scmd should be either retried with :c:func:`scsi_queue_insert` or finished with diff --git a/drivers/ata/pata_sl82c105.c b/drivers/ata/pata_sl82c105.c index 3b62ea482f1a..93882e976ede 100644 --- a/drivers/ata/pata_sl82c105.c +++ b/drivers/ata/pata_sl82c105.c @@ -180,8 +180,7 @@ static void sl82c105_bmdma_start(struct ata_queued_cmd *qc) * document. * * This function is also called to turn off DMA when a timeout occurs - * during DMA operation. In both cases we need to reset the engine, - * so no actual eng_timeout handler is required. + * during DMA operation. In both cases we need to reset the engine. * * We assume bmdma_stop is always called if bmdma_start as called. If * not then we may need to wrap qc_issue. diff --git a/include/linux/libata.h b/include/linux/libata.h index e176d832467d..52d58b13e5ee 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -975,12 +975,6 @@ struct ata_port_operations { ssize_t (*transmit_led_message)(struct ata_port *ap, u32 state, ssize_t size); - /* - * Obsolete - */ - void (*phy_reset)(struct ata_port *ap); - void (*eng_timeout)(struct ata_port *ap); - /* * ->inherits must be the last field and all the preceding * fields must be pointers. -- cgit v1.2.3 From 54e73cd52250adeba836cd3afef3658b48ae8dc9 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 1 Aug 2023 12:58:15 +0200 Subject: virtio: Remove PM #ifdef guards to fix i2c driver A cleanup in the virtio i2c caused a build failure: drivers/i2c/busses/i2c-virtio.c:270:10: error: 'struct virtio_driver' has no member named 'freeze' drivers/i2c/busses/i2c-virtio.c:271:10: error: 'struct virtio_driver' has no member named 'restore' Change the structure definition to allow this cleanup to be applied everywhere. Fixes: 73d546c76235b ("i2c: virtio: Remove #ifdef guards for PM related functions") Signed-off-by: Arnd Bergmann Reviewed-by: Paul Cercueil Reviewed-by: Andi Shyti Link: https://lore.kernel.org/r/20230801105846.3708252-1-arnd@kernel.org Signed-off-by: Andi Shyti --- include/linux/virtio.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/virtio.h b/include/linux/virtio.h index de6041deee37..7ed071b5ef07 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -184,10 +184,8 @@ struct virtio_driver { void (*scan)(struct virtio_device *dev); void (*remove)(struct virtio_device *dev); void (*config_changed)(struct virtio_device *dev); -#ifdef CONFIG_PM int (*freeze)(struct virtio_device *dev); int (*restore)(struct virtio_device *dev); -#endif }; static inline struct virtio_driver *drv_to_virtio(struct device_driver *drv) -- cgit v1.2.3 From d890cfc25fe9421ffdff3a9ea678172addb36762 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Mon, 7 Aug 2023 09:31:06 +0200 Subject: rtc: ds2404: Convert to GPIO descriptors This converts the DS2404 to use GPIO descriptors instead of hard-coded global GPIO numbers. The platform data can be deleted because there are no in-tree users and it only contained GPIO numbers which are now passed using descriptor tables (or device tree or ACPI). The driver was rewritten to use a state container for the device driver state (struct ds2404 *chip) and pass that around instead of using a global singleton storage for the GPIO handles. When declaring GPIO descriptor tables or other hardware descriptions for the RTC driver, implementers should take care to flag the RESET line as active low, such as by using the GPIOD_ACTIVE_LOW flag in the descriptor table. Signed-off-by: Linus Walleij Link: https://lore.kernel.org/r/20230807-descriptors-rtc-v1-1-ce0f9187576e@linaro.org Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-ds2404.c | 169 +++++++++++++------------------ include/linux/platform_data/rtc-ds2404.h | 20 ---- 2 files changed, 69 insertions(+), 120 deletions(-) delete mode 100644 include/linux/platform_data/rtc-ds2404.h (limited to 'include/linux') diff --git a/drivers/rtc/rtc-ds2404.c b/drivers/rtc/rtc-ds2404.c index 0480f592307e..3231fd9f61da 100644 --- a/drivers/rtc/rtc-ds2404.c +++ b/drivers/rtc/rtc-ds2404.c @@ -7,9 +7,8 @@ #include #include #include -#include #include -#include +#include #include #include @@ -27,164 +26,140 @@ #define DS2404_CLK 1 #define DS2404_DQ 2 -struct ds2404_gpio { - const char *name; - unsigned int gpio; -}; - struct ds2404 { - struct ds2404_gpio *gpio; + struct device *dev; + struct gpio_desc *rst_gpiod; + struct gpio_desc *clk_gpiod; + struct gpio_desc *dq_gpiod; struct rtc_device *rtc; }; -static struct ds2404_gpio ds2404_gpio[] = { - { "RTC RST", 0 }, - { "RTC CLK", 0 }, - { "RTC DQ", 0 }, -}; - -static int ds2404_gpio_map(struct ds2404 *chip, struct platform_device *pdev, - struct ds2404_platform_data *pdata) +static int ds2404_gpio_map(struct ds2404 *chip, struct platform_device *pdev) { - int i, err; - - ds2404_gpio[DS2404_RST].gpio = pdata->gpio_rst; - ds2404_gpio[DS2404_CLK].gpio = pdata->gpio_clk; - ds2404_gpio[DS2404_DQ].gpio = pdata->gpio_dq; - - for (i = 0; i < ARRAY_SIZE(ds2404_gpio); i++) { - err = gpio_request(ds2404_gpio[i].gpio, ds2404_gpio[i].name); - if (err) { - dev_err(&pdev->dev, "error mapping gpio %s: %d\n", - ds2404_gpio[i].name, err); - goto err_request; - } - if (i != DS2404_DQ) - gpio_direction_output(ds2404_gpio[i].gpio, 1); - } + struct device *dev = &pdev->dev; - chip->gpio = ds2404_gpio; - return 0; + /* This will de-assert RESET, declare this GPIO as GPIOD_ACTIVE_LOW */ + chip->rst_gpiod = devm_gpiod_get(dev, "rst", GPIOD_OUT_LOW); + if (IS_ERR(chip->rst_gpiod)) + return PTR_ERR(chip->rst_gpiod); -err_request: - while (--i >= 0) - gpio_free(ds2404_gpio[i].gpio); - return err; -} + chip->clk_gpiod = devm_gpiod_get(dev, "clk", GPIOD_OUT_HIGH); + if (IS_ERR(chip->clk_gpiod)) + return PTR_ERR(chip->clk_gpiod); -static void ds2404_gpio_unmap(void *data) -{ - int i; + chip->dq_gpiod = devm_gpiod_get(dev, "dq", GPIOD_ASIS); + if (IS_ERR(chip->dq_gpiod)) + return PTR_ERR(chip->dq_gpiod); - for (i = 0; i < ARRAY_SIZE(ds2404_gpio); i++) - gpio_free(ds2404_gpio[i].gpio); + return 0; } -static void ds2404_reset(struct device *dev) +static void ds2404_reset(struct ds2404 *chip) { - gpio_set_value(ds2404_gpio[DS2404_RST].gpio, 0); + gpiod_set_value(chip->rst_gpiod, 1); udelay(1000); - gpio_set_value(ds2404_gpio[DS2404_RST].gpio, 1); - gpio_set_value(ds2404_gpio[DS2404_CLK].gpio, 0); - gpio_direction_output(ds2404_gpio[DS2404_DQ].gpio, 0); + gpiod_set_value(chip->rst_gpiod, 0); + gpiod_set_value(chip->clk_gpiod, 0); + gpiod_direction_output(chip->dq_gpiod, 0); udelay(10); } -static void ds2404_write_byte(struct device *dev, u8 byte) +static void ds2404_write_byte(struct ds2404 *chip, u8 byte) { int i; - gpio_direction_output(ds2404_gpio[DS2404_DQ].gpio, 1); + gpiod_direction_output(chip->dq_gpiod, 1); for (i = 0; i < 8; i++) { - gpio_set_value(ds2404_gpio[DS2404_DQ].gpio, byte & (1 << i)); + gpiod_set_value(chip->dq_gpiod, byte & (1 << i)); udelay(10); - gpio_set_value(ds2404_gpio[DS2404_CLK].gpio, 1); + gpiod_set_value(chip->clk_gpiod, 1); udelay(10); - gpio_set_value(ds2404_gpio[DS2404_CLK].gpio, 0); + gpiod_set_value(chip->clk_gpiod, 0); udelay(10); } } -static u8 ds2404_read_byte(struct device *dev) +static u8 ds2404_read_byte(struct ds2404 *chip) { int i; u8 ret = 0; - gpio_direction_input(ds2404_gpio[DS2404_DQ].gpio); + gpiod_direction_input(chip->dq_gpiod); for (i = 0; i < 8; i++) { - gpio_set_value(ds2404_gpio[DS2404_CLK].gpio, 0); + gpiod_set_value(chip->clk_gpiod, 0); udelay(10); - if (gpio_get_value(ds2404_gpio[DS2404_DQ].gpio)) + if (gpiod_get_value(chip->dq_gpiod)) ret |= 1 << i; - gpio_set_value(ds2404_gpio[DS2404_CLK].gpio, 1); + gpiod_set_value(chip->clk_gpiod, 1); udelay(10); } return ret; } -static void ds2404_read_memory(struct device *dev, u16 offset, +static void ds2404_read_memory(struct ds2404 *chip, u16 offset, int length, u8 *out) { - ds2404_reset(dev); - ds2404_write_byte(dev, DS2404_READ_MEMORY_CMD); - ds2404_write_byte(dev, offset & 0xff); - ds2404_write_byte(dev, (offset >> 8) & 0xff); + ds2404_reset(chip); + ds2404_write_byte(chip, DS2404_READ_MEMORY_CMD); + ds2404_write_byte(chip, offset & 0xff); + ds2404_write_byte(chip, (offset >> 8) & 0xff); while (length--) - *out++ = ds2404_read_byte(dev); + *out++ = ds2404_read_byte(chip); } -static void ds2404_write_memory(struct device *dev, u16 offset, +static void ds2404_write_memory(struct ds2404 *chip, u16 offset, int length, u8 *out) { int i; u8 ta01, ta02, es; - ds2404_reset(dev); - ds2404_write_byte(dev, DS2404_WRITE_SCRATCHPAD_CMD); - ds2404_write_byte(dev, offset & 0xff); - ds2404_write_byte(dev, (offset >> 8) & 0xff); + ds2404_reset(chip); + ds2404_write_byte(chip, DS2404_WRITE_SCRATCHPAD_CMD); + ds2404_write_byte(chip, offset & 0xff); + ds2404_write_byte(chip, (offset >> 8) & 0xff); for (i = 0; i < length; i++) - ds2404_write_byte(dev, out[i]); + ds2404_write_byte(chip, out[i]); - ds2404_reset(dev); - ds2404_write_byte(dev, DS2404_READ_SCRATCHPAD_CMD); + ds2404_reset(chip); + ds2404_write_byte(chip, DS2404_READ_SCRATCHPAD_CMD); - ta01 = ds2404_read_byte(dev); - ta02 = ds2404_read_byte(dev); - es = ds2404_read_byte(dev); + ta01 = ds2404_read_byte(chip); + ta02 = ds2404_read_byte(chip); + es = ds2404_read_byte(chip); for (i = 0; i < length; i++) { - if (out[i] != ds2404_read_byte(dev)) { - dev_err(dev, "read invalid data\n"); + if (out[i] != ds2404_read_byte(chip)) { + dev_err(chip->dev, "read invalid data\n"); return; } } - ds2404_reset(dev); - ds2404_write_byte(dev, DS2404_COPY_SCRATCHPAD_CMD); - ds2404_write_byte(dev, ta01); - ds2404_write_byte(dev, ta02); - ds2404_write_byte(dev, es); + ds2404_reset(chip); + ds2404_write_byte(chip, DS2404_COPY_SCRATCHPAD_CMD); + ds2404_write_byte(chip, ta01); + ds2404_write_byte(chip, ta02); + ds2404_write_byte(chip, es); - gpio_direction_input(ds2404_gpio[DS2404_DQ].gpio); - while (gpio_get_value(ds2404_gpio[DS2404_DQ].gpio)) + while (gpiod_get_value(chip->dq_gpiod)) ; } -static void ds2404_enable_osc(struct device *dev) +static void ds2404_enable_osc(struct ds2404 *chip) { u8 in[1] = { 0x10 }; /* enable oscillator */ - ds2404_write_memory(dev, 0x201, 1, in); + + ds2404_write_memory(chip, 0x201, 1, in); } static int ds2404_read_time(struct device *dev, struct rtc_time *dt) { + struct ds2404 *chip = dev_get_drvdata(dev); unsigned long time = 0; __le32 hw_time = 0; - ds2404_read_memory(dev, 0x203, 4, (u8 *)&hw_time); + ds2404_read_memory(chip, 0x203, 4, (u8 *)&hw_time); time = le32_to_cpu(hw_time); rtc_time64_to_tm(time, dt); @@ -193,8 +168,9 @@ static int ds2404_read_time(struct device *dev, struct rtc_time *dt) static int ds2404_set_time(struct device *dev, struct rtc_time *dt) { + struct ds2404 *chip = dev_get_drvdata(dev); u32 time = cpu_to_le32(rtc_tm_to_time64(dt)); - ds2404_write_memory(dev, 0x203, 4, (u8 *)&time); + ds2404_write_memory(chip, 0x203, 4, (u8 *)&time); return 0; } @@ -205,7 +181,6 @@ static const struct rtc_class_ops ds2404_rtc_ops = { static int rtc_probe(struct platform_device *pdev) { - struct ds2404_platform_data *pdata = dev_get_platdata(&pdev->dev); struct ds2404 *chip; int retval = -EBUSY; @@ -213,22 +188,16 @@ static int rtc_probe(struct platform_device *pdev) if (!chip) return -ENOMEM; + chip->dev = &pdev->dev; + chip->rtc = devm_rtc_allocate_device(&pdev->dev); if (IS_ERR(chip->rtc)) return PTR_ERR(chip->rtc); - retval = ds2404_gpio_map(chip, pdev, pdata); + retval = ds2404_gpio_map(chip, pdev); if (retval) return retval; - retval = devm_add_action_or_reset(&pdev->dev, ds2404_gpio_unmap, chip); - if (retval) - return retval; - - dev_info(&pdev->dev, "using GPIOs RST:%d, CLK:%d, DQ:%d\n", - chip->gpio[DS2404_RST].gpio, chip->gpio[DS2404_CLK].gpio, - chip->gpio[DS2404_DQ].gpio); - platform_set_drvdata(pdev, chip); chip->rtc->ops = &ds2404_rtc_ops; @@ -238,7 +207,7 @@ static int rtc_probe(struct platform_device *pdev) if (retval) return retval; - ds2404_enable_osc(&pdev->dev); + ds2404_enable_osc(chip); return 0; } diff --git a/include/linux/platform_data/rtc-ds2404.h b/include/linux/platform_data/rtc-ds2404.h deleted file mode 100644 index 22c53825528f..000000000000 --- a/include/linux/platform_data/rtc-ds2404.h +++ /dev/null @@ -1,20 +0,0 @@ -/* - * ds2404.h - platform data structure for the DS2404 RTC. - * - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - * - * Copyright (C) 2012 Sven Schnelle - */ - -#ifndef __LINUX_DS2404_H -#define __LINUX_DS2404_H - -struct ds2404_platform_data { - - unsigned int gpio_rst; - unsigned int gpio_clk; - unsigned int gpio_dq; -}; -#endif -- cgit v1.2.3 From afb48153220d35f330d0d979792920a31f7d9a81 Mon Sep 17 00:00:00 2001 From: Jean-Jacques Hiblot Date: Fri, 28 Jul 2023 17:37:28 +0200 Subject: leds: Provide devm_of_led_get_optional() Add an optional variant of devm_of_led_get(). It behaves the same as devm_of_led_get() except where the LED doesn't exist. In this case, instead of returning -ENOENT, the function returns NULL. Signed-off-by: Jean-Jacques Hiblot Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20230728153731.3742339-2-jjhiblot@traphandler.com Signed-off-by: Lee Jones --- drivers/leds/led-class.c | 25 +++++++++++++++++++++++++ include/linux/leds.h | 2 ++ 2 files changed, 27 insertions(+) (limited to 'include/linux') diff --git a/drivers/leds/led-class.c b/drivers/leds/led-class.c index 4758da2b59cf..78068b06d009 100644 --- a/drivers/leds/led-class.c +++ b/drivers/leds/led-class.c @@ -402,6 +402,31 @@ void led_remove_lookup(struct led_lookup_data *led_lookup) } EXPORT_SYMBOL_GPL(led_remove_lookup); +/** + * devm_of_led_get_optional - Resource-managed request of an optional LED device + * @dev: LED consumer + * @index: index of the LED to obtain in the consumer + * + * The device node of the device is parsed to find the requested LED device. + * The LED device returned from this function is automatically released + * on driver detach. + * + * @return a pointer to a LED device, ERR_PTR(errno) on failure and NULL if the + * led was not found. + */ +struct led_classdev *__must_check devm_of_led_get_optional(struct device *dev, + int index) +{ + struct led_classdev *led; + + led = devm_of_led_get(dev, index); + if (IS_ERR(led) && PTR_ERR(led) == -ENOENT) + return NULL; + + return led; +} +EXPORT_SYMBOL_GPL(devm_of_led_get_optional); + static int led_classdev_next_name(const char *init_name, char *name, size_t len) { diff --git a/include/linux/leds.h b/include/linux/leds.h index 7d428100b42b..8740b4e47f88 100644 --- a/include/linux/leds.h +++ b/include/linux/leds.h @@ -313,6 +313,8 @@ extern struct led_classdev *of_led_get(struct device_node *np, int index); extern void led_put(struct led_classdev *led_cdev); struct led_classdev *__must_check devm_of_led_get(struct device *dev, int index); +struct led_classdev *__must_check devm_of_led_get_optional(struct device *dev, + int index); /** * led_blink_set - set blinking with software fallback -- cgit v1.2.3 From c7d80059b086c4986cd994a1973ec7a5d75f8eea Mon Sep 17 00:00:00 2001 From: Jean-Jacques Hiblot Date: Fri, 28 Jul 2023 17:37:29 +0200 Subject: leds: class: Store the color index in struct led_classdev Store the color of the LED so that it is not lost after the LED's name has been composed. This color information can then be exposed to the user space or used by the LED consumer. Signed-off-by: Jean-Jacques Hiblot Link: https://lore.kernel.org/r/20230728153731.3742339-3-jjhiblot@traphandler.com Signed-off-by: Lee Jones --- Documentation/ABI/testing/sysfs-class-led | 9 +++++++++ drivers/leds/led-class.c | 21 +++++++++++++++++++++ include/linux/leds.h | 1 + 3 files changed, 31 insertions(+) (limited to 'include/linux') diff --git a/Documentation/ABI/testing/sysfs-class-led b/Documentation/ABI/testing/sysfs-class-led index 2e24ac3bd7ef..b2ff0012c0f2 100644 --- a/Documentation/ABI/testing/sysfs-class-led +++ b/Documentation/ABI/testing/sysfs-class-led @@ -59,6 +59,15 @@ Description: brightness. Reading this file when no hw brightness change event has happened will return an ENODATA error. +What: /sys/class/leds//color +Date: June 2023 +KernelVersion: 6.5 +Description: + Color of the LED. + + This is a read-only file. Reading this file returns the color + of the LED as a string (e.g: "red", "green", "multicolor"). + What: /sys/class/leds//trigger Date: March 2006 KernelVersion: 2.6.17 diff --git a/drivers/leds/led-class.c b/drivers/leds/led-class.c index 78068b06d009..4bcbd46ec75a 100644 --- a/drivers/leds/led-class.c +++ b/drivers/leds/led-class.c @@ -76,6 +76,19 @@ static ssize_t max_brightness_show(struct device *dev, } static DEVICE_ATTR_RO(max_brightness); +static ssize_t color_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + const char *color_text = "invalid"; + struct led_classdev *led_cdev = dev_get_drvdata(dev); + + if (led_cdev->color < LED_COLOR_ID_MAX) + color_text = led_colors[led_cdev->color]; + + return sysfs_emit(buf, "%s\n", color_text); +} +static DEVICE_ATTR_RO(color); + #ifdef CONFIG_LEDS_TRIGGERS static BIN_ATTR(trigger, 0644, led_trigger_read, led_trigger_write, 0); static struct bin_attribute *led_trigger_bin_attrs[] = { @@ -90,6 +103,7 @@ static const struct attribute_group led_trigger_group = { static struct attribute *led_class_attrs[] = { &dev_attr_brightness.attr, &dev_attr_max_brightness.attr, + &dev_attr_color.attr, NULL, }; @@ -486,6 +500,10 @@ int led_classdev_register_ext(struct device *parent, fwnode_property_read_u32(init_data->fwnode, "max-brightness", &led_cdev->max_brightness); + + if (fwnode_property_present(init_data->fwnode, "color")) + fwnode_property_read_u32(init_data->fwnode, "color", + &led_cdev->color); } } else { proposed_name = led_cdev->name; @@ -495,6 +513,9 @@ int led_classdev_register_ext(struct device *parent, if (ret < 0) return ret; + if (led_cdev->color >= LED_COLOR_ID_MAX) + dev_warn(parent, "LED %s color identifier out of range\n", final_name); + mutex_init(&led_cdev->led_access); mutex_lock(&led_cdev->led_access); led_cdev->dev = device_create_with_groups(leds_class, parent, 0, diff --git a/include/linux/leds.h b/include/linux/leds.h index 8740b4e47f88..aa16dc2a8230 100644 --- a/include/linux/leds.h +++ b/include/linux/leds.h @@ -100,6 +100,7 @@ struct led_classdev { const char *name; unsigned int brightness; unsigned int max_brightness; + unsigned int color; int flags; /* Lower 16 bits reflect status */ -- cgit v1.2.3 From a1342c8027288e345cc5fd16c6800f9d4eb788ed Mon Sep 17 00:00:00 2001 From: David Matlack Date: Fri, 11 Aug 2023 04:51:14 +0000 Subject: KVM: Rename kvm_arch_flush_remote_tlb() to kvm_arch_flush_remote_tlbs() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rename kvm_arch_flush_remote_tlb() and the associated macro __KVM_HAVE_ARCH_FLUSH_REMOTE_TLB to kvm_arch_flush_remote_tlbs() and __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS respectively. Making the name plural matches kvm_flush_remote_tlbs() and makes it more clear that this function can affect more than one remote TLB. No functional change intended. Signed-off-by: David Matlack Signed-off-by: Raghavendra Rao Ananta Reviewed-by: Gavin Shan Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Shaoqin Huang Acked-by: Sean Christopherson Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230811045127.3308641-2-rananta@google.com --- arch/mips/include/asm/kvm_host.h | 4 ++-- arch/mips/kvm/mips.c | 2 +- arch/x86/include/asm/kvm_host.h | 4 ++-- include/linux/kvm_host.h | 4 ++-- virt/kvm/kvm_main.c | 2 +- 5 files changed, 8 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h index 04cedf9f8811..9b0ad8f3bf32 100644 --- a/arch/mips/include/asm/kvm_host.h +++ b/arch/mips/include/asm/kvm_host.h @@ -896,7 +896,7 @@ static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {} static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {} static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {} -#define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLB -int kvm_arch_flush_remote_tlb(struct kvm *kvm); +#define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS +int kvm_arch_flush_remote_tlbs(struct kvm *kvm); #endif /* __MIPS_KVM_HOST_H__ */ diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c index aa5583a7b05b..4b7bc39a4173 100644 --- a/arch/mips/kvm/mips.c +++ b/arch/mips/kvm/mips.c @@ -981,7 +981,7 @@ void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot) } -int kvm_arch_flush_remote_tlb(struct kvm *kvm) +int kvm_arch_flush_remote_tlbs(struct kvm *kvm) { kvm_mips_callbacks->prepare_flush_shadow(kvm); return 1; diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 28bd38303d70..a2d3cfc2eb75 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1794,8 +1794,8 @@ static inline struct kvm *kvm_arch_alloc_vm(void) #define __KVM_HAVE_ARCH_VM_FREE void kvm_arch_free_vm(struct kvm *kvm); -#define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLB -static inline int kvm_arch_flush_remote_tlb(struct kvm *kvm) +#define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS +static inline int kvm_arch_flush_remote_tlbs(struct kvm *kvm) { if (kvm_x86_ops.flush_remote_tlbs && !static_call(kvm_x86_flush_remote_tlbs)(kvm)) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 9d3ac7720da9..e3f968b38ae9 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1479,8 +1479,8 @@ static inline void kvm_arch_free_vm(struct kvm *kvm) } #endif -#ifndef __KVM_HAVE_ARCH_FLUSH_REMOTE_TLB -static inline int kvm_arch_flush_remote_tlb(struct kvm *kvm) +#ifndef __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS +static inline int kvm_arch_flush_remote_tlbs(struct kvm *kvm) { return -ENOTSUPP; } diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index dfbaafbe3a00..70e5479797ac 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -361,7 +361,7 @@ void kvm_flush_remote_tlbs(struct kvm *kvm) * kvm_make_all_cpus_request() reads vcpu->mode. We reuse that * barrier here. */ - if (!kvm_arch_flush_remote_tlb(kvm) + if (!kvm_arch_flush_remote_tlbs(kvm) || kvm_make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH)) ++kvm->stat.generic.remote_tlb_flush; } -- cgit v1.2.3 From cfb0c08e80120928dda1e951718be135abd49bae Mon Sep 17 00:00:00 2001 From: Raghavendra Rao Ananta Date: Fri, 11 Aug 2023 04:51:15 +0000 Subject: KVM: Declare kvm_arch_flush_remote_tlbs() globally There's no reason for the architectures to declare kvm_arch_flush_remote_tlbs() in their own headers. Hence to avoid this duplication, make the declaration global, leaving the architectures to define only __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS as needed. Signed-off-by: Raghavendra Rao Ananta Reviewed-by: Shaoqin Huang Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230811045127.3308641-3-rananta@google.com --- arch/mips/include/asm/kvm_host.h | 1 - include/linux/kvm_host.h | 2 ++ 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h index 9b0ad8f3bf32..54a85f1d4f2c 100644 --- a/arch/mips/include/asm/kvm_host.h +++ b/arch/mips/include/asm/kvm_host.h @@ -897,6 +897,5 @@ static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {} static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {} #define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS -int kvm_arch_flush_remote_tlbs(struct kvm *kvm); #endif /* __MIPS_KVM_HOST_H__ */ diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index e3f968b38ae9..ade5d4500c2c 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1484,6 +1484,8 @@ static inline int kvm_arch_flush_remote_tlbs(struct kvm *kvm) { return -ENOTSUPP; } +#else +int kvm_arch_flush_remote_tlbs(struct kvm *kvm); #endif #ifdef __KVM_HAVE_ARCH_NONCOHERENT_DMA -- cgit v1.2.3 From d4788996051e3c07fadc6d9b214073fcf78810a8 Mon Sep 17 00:00:00 2001 From: David Matlack Date: Fri, 11 Aug 2023 04:51:18 +0000 Subject: KVM: Allow range-based TLB invalidation from common code Make kvm_flush_remote_tlbs_range() visible in common code and create a default implementation that just invalidates the whole TLB. This paves the way for several future features/cleanups: - Introduction of range-based TLBI on ARM. - Eliminating kvm_arch_flush_remote_tlbs_memslot() - Moving the KVM/x86 TDP MMU to common code. No functional change intended. Signed-off-by: David Matlack Signed-off-by: Raghavendra Rao Ananta Reviewed-by: Gavin Shan Reviewed-by: Shaoqin Huang Reviewed-by: Anup Patel Acked-by: Sean Christopherson Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230811045127.3308641-6-rananta@google.com --- arch/x86/include/asm/kvm_host.h | 2 ++ arch/x86/kvm/mmu/mmu.c | 12 ++++-------- arch/x86/kvm/mmu/mmu_internal.h | 3 --- include/linux/kvm_host.h | 11 +++++++++++ virt/kvm/kvm_main.c | 13 +++++++++++++ 5 files changed, 30 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index a2d3cfc2eb75..b547d17c58f6 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1804,6 +1804,8 @@ static inline int kvm_arch_flush_remote_tlbs(struct kvm *kvm) return -ENOTSUPP; } +#define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS_RANGE + #define kvm_arch_pmi_in_guest(vcpu) \ ((vcpu) && (vcpu)->arch.handling_intr_from_guest) diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index ec169f5c7dce..46ae672668e1 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -278,16 +278,12 @@ static inline bool kvm_available_flush_remote_tlbs_range(void) return kvm_x86_ops.flush_remote_tlbs_range; } -void kvm_flush_remote_tlbs_range(struct kvm *kvm, gfn_t start_gfn, - gfn_t nr_pages) +int kvm_arch_flush_remote_tlbs_range(struct kvm *kvm, gfn_t gfn, u64 nr_pages) { - int ret = -EOPNOTSUPP; + if (!kvm_x86_ops.flush_remote_tlbs_range) + return -EOPNOTSUPP; - if (kvm_x86_ops.flush_remote_tlbs_range) - ret = static_call(kvm_x86_flush_remote_tlbs_range)(kvm, start_gfn, - nr_pages); - if (ret) - kvm_flush_remote_tlbs(kvm); + return static_call(kvm_x86_flush_remote_tlbs_range)(kvm, gfn, nr_pages); } static gfn_t kvm_mmu_page_get_gfn(struct kvm_mmu_page *sp, int index); diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h index d39af5639ce9..86cb83bb3480 100644 --- a/arch/x86/kvm/mmu/mmu_internal.h +++ b/arch/x86/kvm/mmu/mmu_internal.h @@ -170,9 +170,6 @@ bool kvm_mmu_slot_gfn_write_protect(struct kvm *kvm, struct kvm_memory_slot *slot, u64 gfn, int min_level); -void kvm_flush_remote_tlbs_range(struct kvm *kvm, gfn_t start_gfn, - gfn_t nr_pages); - /* Flush the given page (huge or not) of guest memory. */ static inline void kvm_flush_remote_tlbs_gfn(struct kvm *kvm, gfn_t gfn, int level) { diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index ade5d4500c2c..89d2614e4b7a 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1359,6 +1359,7 @@ int kvm_vcpu_yield_to(struct kvm_vcpu *target); void kvm_vcpu_on_spin(struct kvm_vcpu *vcpu, bool yield_to_kernel_mode); void kvm_flush_remote_tlbs(struct kvm *kvm); +void kvm_flush_remote_tlbs_range(struct kvm *kvm, gfn_t gfn, u64 nr_pages); #ifdef KVM_ARCH_NR_OBJS_PER_MEMORY_CACHE int kvm_mmu_topup_memory_cache(struct kvm_mmu_memory_cache *mc, int min); @@ -1488,6 +1489,16 @@ static inline int kvm_arch_flush_remote_tlbs(struct kvm *kvm) int kvm_arch_flush_remote_tlbs(struct kvm *kvm); #endif +#ifndef __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS_RANGE +static inline int kvm_arch_flush_remote_tlbs_range(struct kvm *kvm, + gfn_t gfn, u64 nr_pages) +{ + return -EOPNOTSUPP; +} +#else +int kvm_arch_flush_remote_tlbs_range(struct kvm *kvm, gfn_t gfn, u64 nr_pages); +#endif + #ifdef __KVM_HAVE_ARCH_NONCOHERENT_DMA void kvm_arch_register_noncoherent_dma(struct kvm *kvm); void kvm_arch_unregister_noncoherent_dma(struct kvm *kvm); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index d6b050786155..26e91000f579 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -366,6 +366,19 @@ void kvm_flush_remote_tlbs(struct kvm *kvm) } EXPORT_SYMBOL_GPL(kvm_flush_remote_tlbs); +void kvm_flush_remote_tlbs_range(struct kvm *kvm, gfn_t gfn, u64 nr_pages) +{ + if (!kvm_arch_flush_remote_tlbs_range(kvm, gfn, nr_pages)) + return; + + /* + * Fall back to a flushing entire TLBs if the architecture range-based + * TLB invalidation is unsupported or can't be performed for whatever + * reason. + */ + kvm_flush_remote_tlbs(kvm); +} + static void kvm_flush_shadow_all(struct kvm *kvm) { kvm_arch_flush_shadow_all(kvm); -- cgit v1.2.3 From 619b5072443c05cf18c31b2c0320cdb42396d411 Mon Sep 17 00:00:00 2001 From: David Matlack Date: Fri, 11 Aug 2023 04:51:19 +0000 Subject: KVM: Move kvm_arch_flush_remote_tlbs_memslot() to common code Move kvm_arch_flush_remote_tlbs_memslot() to common code and drop "arch_" from the name. kvm_arch_flush_remote_tlbs_memslot() is just a range-based TLB invalidation where the range is defined by the memslot. Now that kvm_flush_remote_tlbs_range() can be called from common code we can just use that and drop a bunch of duplicate code from the arch directories. Note this adds a lockdep assertion for slots_lock being held when calling kvm_flush_remote_tlbs_memslot(), which was previously only asserted on x86. MIPS has calls to kvm_flush_remote_tlbs_memslot(), but they all hold the slots_lock, so the lockdep assertion continues to hold true. Also drop the CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT ifdef gating kvm_flush_remote_tlbs_memslot(), since it is no longer necessary. Signed-off-by: David Matlack Signed-off-by: Raghavendra Rao Ananta Reviewed-by: Gavin Shan Reviewed-by: Shaoqin Huang Acked-by: Anup Patel Acked-by: Sean Christopherson Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230811045127.3308641-7-rananta@google.com --- arch/arm64/kvm/arm.c | 6 ------ arch/mips/kvm/mips.c | 10 ++-------- arch/riscv/kvm/mmu.c | 6 ------ arch/x86/kvm/mmu/mmu.c | 16 +--------------- arch/x86/kvm/x86.c | 2 +- include/linux/kvm_host.h | 7 +++---- virt/kvm/kvm_main.c | 18 ++++++++++++++++-- 7 files changed, 23 insertions(+), 42 deletions(-) (limited to 'include/linux') diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 72dc53a75d1c..fd2af63d788d 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -1532,12 +1532,6 @@ void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot) } -void kvm_arch_flush_remote_tlbs_memslot(struct kvm *kvm, - const struct kvm_memory_slot *memslot) -{ - kvm_flush_remote_tlbs(kvm); -} - static int kvm_vm_ioctl_set_device_addr(struct kvm *kvm, struct kvm_arm_device_addr *dev_addr) { diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c index 4b7bc39a4173..231ac052b506 100644 --- a/arch/mips/kvm/mips.c +++ b/arch/mips/kvm/mips.c @@ -199,7 +199,7 @@ void kvm_arch_flush_shadow_memslot(struct kvm *kvm, /* Flush slot from GPA */ kvm_mips_flush_gpa_pt(kvm, slot->base_gfn, slot->base_gfn + slot->npages - 1); - kvm_arch_flush_remote_tlbs_memslot(kvm, slot); + kvm_flush_remote_tlbs_memslot(kvm, slot); spin_unlock(&kvm->mmu_lock); } @@ -235,7 +235,7 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, needs_flush = kvm_mips_mkclean_gpa_pt(kvm, new->base_gfn, new->base_gfn + new->npages - 1); if (needs_flush) - kvm_arch_flush_remote_tlbs_memslot(kvm, new); + kvm_flush_remote_tlbs_memslot(kvm, new); spin_unlock(&kvm->mmu_lock); } } @@ -987,12 +987,6 @@ int kvm_arch_flush_remote_tlbs(struct kvm *kvm) return 1; } -void kvm_arch_flush_remote_tlbs_memslot(struct kvm *kvm, - const struct kvm_memory_slot *memslot) -{ - kvm_flush_remote_tlbs(kvm); -} - int kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { int r; diff --git a/arch/riscv/kvm/mmu.c b/arch/riscv/kvm/mmu.c index f2eb47925806..97e129620686 100644 --- a/arch/riscv/kvm/mmu.c +++ b/arch/riscv/kvm/mmu.c @@ -406,12 +406,6 @@ void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot) { } -void kvm_arch_flush_remote_tlbs_memslot(struct kvm *kvm, - const struct kvm_memory_slot *memslot) -{ - kvm_flush_remote_tlbs(kvm); -} - void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free) { } diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 46ae672668e1..dbf3c6c2316c 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -6666,7 +6666,7 @@ static void kvm_rmap_zap_collapsible_sptes(struct kvm *kvm, */ if (walk_slot_rmaps(kvm, slot, kvm_mmu_zap_collapsible_spte, PG_LEVEL_4K, KVM_MAX_HUGEPAGE_LEVEL - 1, true)) - kvm_arch_flush_remote_tlbs_memslot(kvm, slot); + kvm_flush_remote_tlbs_memslot(kvm, slot); } void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm, @@ -6685,20 +6685,6 @@ void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm, } } -void kvm_arch_flush_remote_tlbs_memslot(struct kvm *kvm, - const struct kvm_memory_slot *memslot) -{ - /* - * All current use cases for flushing the TLBs for a specific memslot - * related to dirty logging, and many do the TLB flush out of mmu_lock. - * The interaction between the various operations on memslot must be - * serialized by slots_locks to ensure the TLB flush from one operation - * is observed by any other operation on the same memslot. - */ - lockdep_assert_held(&kvm->slots_lock); - kvm_flush_remote_tlbs_range(kvm, memslot->base_gfn, memslot->npages); -} - void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm, const struct kvm_memory_slot *memslot) { diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index a6b9bea62fb8..faeb2e307b36 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -12751,7 +12751,7 @@ static void kvm_mmu_slot_apply_flags(struct kvm *kvm, * See is_writable_pte() for more details (the case involving * access-tracked SPTEs is particularly relevant). */ - kvm_arch_flush_remote_tlbs_memslot(kvm, new); + kvm_flush_remote_tlbs_memslot(kvm, new); } } diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 89d2614e4b7a..394db2ce11e2 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1360,6 +1360,8 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *vcpu, bool yield_to_kernel_mode); void kvm_flush_remote_tlbs(struct kvm *kvm); void kvm_flush_remote_tlbs_range(struct kvm *kvm, gfn_t gfn, u64 nr_pages); +void kvm_flush_remote_tlbs_memslot(struct kvm *kvm, + const struct kvm_memory_slot *memslot); #ifdef KVM_ARCH_NR_OBJS_PER_MEMORY_CACHE int kvm_mmu_topup_memory_cache(struct kvm_mmu_memory_cache *mc, int min); @@ -1388,10 +1390,7 @@ void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm, unsigned long mask); void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot); -#ifdef CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT -void kvm_arch_flush_remote_tlbs_memslot(struct kvm *kvm, - const struct kvm_memory_slot *memslot); -#else /* !CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT */ +#ifndef CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log); int kvm_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log, int *is_dirty, struct kvm_memory_slot **memslot); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 26e91000f579..5d4d2e051aa0 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -379,6 +379,20 @@ void kvm_flush_remote_tlbs_range(struct kvm *kvm, gfn_t gfn, u64 nr_pages) kvm_flush_remote_tlbs(kvm); } +void kvm_flush_remote_tlbs_memslot(struct kvm *kvm, + const struct kvm_memory_slot *memslot) +{ + /* + * All current use cases for flushing the TLBs for a specific memslot + * are related to dirty logging, and many do the TLB flush out of + * mmu_lock. The interaction between the various operations on memslot + * must be serialized by slots_locks to ensure the TLB flush from one + * operation is observed by any other operation on the same memslot. + */ + lockdep_assert_held(&kvm->slots_lock); + kvm_flush_remote_tlbs_range(kvm, memslot->base_gfn, memslot->npages); +} + static void kvm_flush_shadow_all(struct kvm *kvm) { kvm_arch_flush_shadow_all(kvm); @@ -2191,7 +2205,7 @@ static int kvm_get_dirty_log_protect(struct kvm *kvm, struct kvm_dirty_log *log) } if (flush) - kvm_arch_flush_remote_tlbs_memslot(kvm, memslot); + kvm_flush_remote_tlbs_memslot(kvm, memslot); if (copy_to_user(log->dirty_bitmap, dirty_bitmap_buffer, n)) return -EFAULT; @@ -2308,7 +2322,7 @@ static int kvm_clear_dirty_log_protect(struct kvm *kvm, KVM_MMU_UNLOCK(kvm); if (flush) - kvm_arch_flush_remote_tlbs_memslot(kvm, memslot); + kvm_flush_remote_tlbs_memslot(kvm, memslot); return 0; } -- cgit v1.2.3 From 3e1efe2b67d3d38116ec010968dbcd89d29e4561 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 28 Jul 2023 17:41:44 -0700 Subject: KVM: Wrap kvm_{gfn,hva}_range.pte in a per-action union Wrap kvm_{gfn,hva}_range.pte in a union so that future notifier events can pass event specific information up and down the stack without needing to constantly expand and churn the APIs. Lockless aging of SPTEs will pass around a bitmap, and support for memory attributes will pass around the new attributes for the range. Add a "KVM_NO_ARG" placeholder to simplify handling events without an argument (creating a dummy union variable is midly annoying). Opportunstically drop explicit zero-initialization of the "pte" field, as omitting the field (now a union) has the same effect. Cc: Yu Zhao Link: https://lore.kernel.org/all/CAOUHufagkd2Jk3_HrVoFFptRXM=hX2CV8f+M-dka-hJU4bP8kw@mail.gmail.com Reviewed-by: Oliver Upton Acked-by: Yu Zhao Link: https://lore.kernel.org/r/20230729004144.1054885-1-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/arm64/kvm/mmu.c | 2 +- arch/mips/kvm/mmu.c | 2 +- arch/riscv/kvm/mmu.c | 2 +- arch/x86/kvm/mmu/mmu.c | 2 +- arch/x86/kvm/mmu/tdp_mmu.c | 6 +++--- include/linux/kvm_host.h | 6 +++++- virt/kvm/kvm_main.c | 19 ++++++++++--------- 7 files changed, 22 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 6db9ef288ec3..55f03a68f1cd 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -1721,7 +1721,7 @@ bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range) bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range) { - kvm_pfn_t pfn = pte_pfn(range->pte); + kvm_pfn_t pfn = pte_pfn(range->arg.pte); if (!kvm->arch.mmu.pgt) return false; diff --git a/arch/mips/kvm/mmu.c b/arch/mips/kvm/mmu.c index e8c08988ed37..7b2ac1319d70 100644 --- a/arch/mips/kvm/mmu.c +++ b/arch/mips/kvm/mmu.c @@ -447,7 +447,7 @@ bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range) bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range) { gpa_t gpa = range->start << PAGE_SHIFT; - pte_t hva_pte = range->pte; + pte_t hva_pte = range->arg.pte; pte_t *gpa_pte = kvm_mips_pte_for_gpa(kvm, NULL, gpa); pte_t old_pte; diff --git a/arch/riscv/kvm/mmu.c b/arch/riscv/kvm/mmu.c index f2eb47925806..857f4312b0f8 100644 --- a/arch/riscv/kvm/mmu.c +++ b/arch/riscv/kvm/mmu.c @@ -559,7 +559,7 @@ bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range) bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range) { int ret; - kvm_pfn_t pfn = pte_pfn(range->pte); + kvm_pfn_t pfn = pte_pfn(range->arg.pte); if (!kvm->arch.pgd) return false; diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index ec169f5c7dce..d72f2b20f430 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -1588,7 +1588,7 @@ static __always_inline bool kvm_handle_gfn_range(struct kvm *kvm, for_each_slot_rmap_range(range->slot, PG_LEVEL_4K, KVM_MAX_HUGEPAGE_LEVEL, range->start, range->end - 1, &iterator) ret |= handler(kvm, iterator.rmap, range->slot, iterator.gfn, - iterator.level, range->pte); + iterator.level, range->arg.pte); return ret; } diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c index 512163d52194..6250bd3d20c1 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.c +++ b/arch/x86/kvm/mmu/tdp_mmu.c @@ -1241,7 +1241,7 @@ static bool set_spte_gfn(struct kvm *kvm, struct tdp_iter *iter, u64 new_spte; /* Huge pages aren't expected to be modified without first being zapped. */ - WARN_ON(pte_huge(range->pte) || range->start + 1 != range->end); + WARN_ON(pte_huge(range->arg.pte) || range->start + 1 != range->end); if (iter->level != PG_LEVEL_4K || !is_shadow_present_pte(iter->old_spte)) @@ -1255,9 +1255,9 @@ static bool set_spte_gfn(struct kvm *kvm, struct tdp_iter *iter, */ tdp_mmu_iter_set_spte(kvm, iter, 0); - if (!pte_write(range->pte)) { + if (!pte_write(range->arg.pte)) { new_spte = kvm_mmu_changed_pte_notifier_make_spte(iter->old_spte, - pte_pfn(range->pte)); + pte_pfn(range->arg.pte)); tdp_mmu_iter_set_spte(kvm, iter, new_spte); } diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 9d3ac7720da9..9125d0ab642d 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -256,11 +256,15 @@ int kvm_async_pf_wakeup_all(struct kvm_vcpu *vcpu); #endif #ifdef KVM_ARCH_WANT_MMU_NOTIFIER +union kvm_mmu_notifier_arg { + pte_t pte; +}; + struct kvm_gfn_range { struct kvm_memory_slot *slot; gfn_t start; gfn_t end; - pte_t pte; + union kvm_mmu_notifier_arg arg; bool may_block; }; bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index dfbaafbe3a00..92c50dc159e8 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -526,7 +526,7 @@ typedef void (*on_unlock_fn_t)(struct kvm *kvm); struct kvm_hva_range { unsigned long start; unsigned long end; - pte_t pte; + union kvm_mmu_notifier_arg arg; hva_handler_t handler; on_lock_fn_t on_lock; on_unlock_fn_t on_unlock; @@ -547,6 +547,8 @@ static void kvm_null_fn(void) } #define IS_KVM_NULL_FN(fn) ((fn) == (void *)kvm_null_fn) +static const union kvm_mmu_notifier_arg KVM_MMU_NOTIFIER_NO_ARG; + /* Iterate over each memslot intersecting [start, last] (inclusive) range */ #define kvm_for_each_memslot_in_hva_range(node, slots, start, last) \ for (node = interval_tree_iter_first(&slots->hva_tree, start, last); \ @@ -591,7 +593,7 @@ static __always_inline int __kvm_handle_hva_range(struct kvm *kvm, * bother making these conditional (to avoid writes on * the second or later invocation of the handler). */ - gfn_range.pte = range->pte; + gfn_range.arg = range->arg; gfn_range.may_block = range->may_block; /* @@ -632,14 +634,14 @@ static __always_inline int __kvm_handle_hva_range(struct kvm *kvm, static __always_inline int kvm_handle_hva_range(struct mmu_notifier *mn, unsigned long start, unsigned long end, - pte_t pte, + union kvm_mmu_notifier_arg arg, hva_handler_t handler) { struct kvm *kvm = mmu_notifier_to_kvm(mn); const struct kvm_hva_range range = { .start = start, .end = end, - .pte = pte, + .arg = arg, .handler = handler, .on_lock = (void *)kvm_null_fn, .on_unlock = (void *)kvm_null_fn, @@ -659,7 +661,6 @@ static __always_inline int kvm_handle_hva_range_no_flush(struct mmu_notifier *mn const struct kvm_hva_range range = { .start = start, .end = end, - .pte = __pte(0), .handler = handler, .on_lock = (void *)kvm_null_fn, .on_unlock = (void *)kvm_null_fn, @@ -693,6 +694,7 @@ static void kvm_mmu_notifier_change_pte(struct mmu_notifier *mn, pte_t pte) { struct kvm *kvm = mmu_notifier_to_kvm(mn); + const union kvm_mmu_notifier_arg arg = { .pte = pte }; trace_kvm_set_spte_hva(address); @@ -708,7 +710,7 @@ static void kvm_mmu_notifier_change_pte(struct mmu_notifier *mn, if (!READ_ONCE(kvm->mmu_invalidate_in_progress)) return; - kvm_handle_hva_range(mn, address, address + 1, pte, kvm_change_spte_gfn); + kvm_handle_hva_range(mn, address, address + 1, arg, kvm_change_spte_gfn); } void kvm_mmu_invalidate_begin(struct kvm *kvm, unsigned long start, @@ -747,7 +749,6 @@ static int kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn, const struct kvm_hva_range hva_range = { .start = range->start, .end = range->end, - .pte = __pte(0), .handler = kvm_unmap_gfn_range, .on_lock = kvm_mmu_invalidate_begin, .on_unlock = kvm_arch_guest_memory_reclaimed, @@ -812,7 +813,6 @@ static void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn, const struct kvm_hva_range hva_range = { .start = range->start, .end = range->end, - .pte = __pte(0), .handler = (void *)kvm_null_fn, .on_lock = kvm_mmu_invalidate_end, .on_unlock = (void *)kvm_null_fn, @@ -845,7 +845,8 @@ static int kvm_mmu_notifier_clear_flush_young(struct mmu_notifier *mn, { trace_kvm_age_hva(start, end); - return kvm_handle_hva_range(mn, start, end, __pte(0), kvm_age_gfn); + return kvm_handle_hva_range(mn, start, end, KVM_MMU_NOTIFIER_NO_ARG, + kvm_age_gfn); } static int kvm_mmu_notifier_clear_young(struct mmu_notifier *mn, -- cgit v1.2.3 From 1f8403953f05af591ab72cf749b9b9b837ea9595 Mon Sep 17 00:00:00 2001 From: Yue Haibing Date: Mon, 14 Aug 2023 22:03:39 +0800 Subject: KVM: Remove unused kvm_device_{get,put}() declarations Commit 07f0a7bdec5c ("kvm: destroy emulated devices on VM exit") removed the functions but not these declarations. Signed-off-by: Yue Haibing Link: https://lore.kernel.org/r/20230814140339.47732-1-yuehaibing@huawei.com [sean: split to separate patch] Signed-off-by: Sean Christopherson --- include/linux/kvm_host.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 9125d0ab642d..a973a7cb45e0 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -2152,8 +2152,6 @@ struct kvm_device_ops { int (*mmap)(struct kvm_device *dev, struct vm_area_struct *vma); }; -void kvm_device_get(struct kvm_device *dev); -void kvm_device_put(struct kvm_device *dev); struct kvm_device *kvm_device_from_filp(struct file *filp); int kvm_register_device_ops(const struct kvm_device_ops *ops, u32 type); void kvm_unregister_device_ops(u32 type); -- cgit v1.2.3 From 458933d33af2cb3663bd8c0080c1efd1f9483db4 Mon Sep 17 00:00:00 2001 From: Yue Haibing Date: Mon, 14 Aug 2023 22:03:39 +0800 Subject: KVM: Remove unused kvm_make_cpus_request_mask() declaration Commit 7ee30bc132c6 ("KVM: x86: deliver KVM IOAPIC scan request to target vCPUs") declared but never implemented kvm_make_cpus_request_mask() as kvm_make_vcpus_request_mask() already existed. Note, KVM's APIs are painfully inconsistent, as the inclusive variant uses "vcpus", whereas the exclusive/all variants use "cpus", which is likely what led to the spurious declaration. The "vcpus" terminology is more correct, especially since the helpers will kick _physical_ CPUs by calling kvm_kick_many_cpus(). But that's a cleanup for the future. Signed-off-by: Yue Haibing Link: https://lore.kernel.org/r/20230814140339.47732-1-yuehaibing@huawei.com [sean: split to separate patch, call out inconsistent naming] Signed-off-by: Sean Christopherson --- include/linux/kvm_host.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index a973a7cb45e0..199698a5ffa6 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -190,8 +190,6 @@ bool kvm_make_vcpus_request_mask(struct kvm *kvm, unsigned int req, bool kvm_make_all_cpus_request(struct kvm *kvm, unsigned int req); bool kvm_make_all_cpus_request_except(struct kvm *kvm, unsigned int req, struct kvm_vcpu *except); -bool kvm_make_cpus_request_mask(struct kvm *kvm, unsigned int req, - unsigned long *vcpu_bitmap); #define KVM_USERSPACE_IRQ_SOURCE_ID 0 #define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID 1 -- cgit v1.2.3 From 2459f4dfe5529f8b847f452473e2da08a8fc9fe5 Mon Sep 17 00:00:00 2001 From: Yangtao Li Date: Thu, 6 Jul 2023 19:39:39 +0800 Subject: mfd: hi655x-pmic: Convert to devm_platform_ioremap_resource() Use devm_platform_ioremap_resource() to simplify code. Signed-off-by: Yangtao Li Link: https://lore.kernel.org/r/20230706113939.1178-7-frank.li@vivo.com Signed-off-by: Lee Jones --- drivers/mfd/hi655x-pmic.c | 3 +-- include/linux/mfd/hi655x-pmic.h | 1 - 2 files changed, 1 insertion(+), 3 deletions(-) (limited to 'include/linux') diff --git a/drivers/mfd/hi655x-pmic.c b/drivers/mfd/hi655x-pmic.c index a58e42ddcd0c..98ae40ee3f05 100644 --- a/drivers/mfd/hi655x-pmic.c +++ b/drivers/mfd/hi655x-pmic.c @@ -100,8 +100,7 @@ static int hi655x_pmic_probe(struct platform_device *pdev) return -ENOMEM; pmic->dev = dev; - pmic->res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - base = devm_ioremap_resource(dev, pmic->res); + base = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(base)) return PTR_ERR(base); diff --git a/include/linux/mfd/hi655x-pmic.h b/include/linux/mfd/hi655x-pmic.h index 6a012784dd1b..194556851ccf 100644 --- a/include/linux/mfd/hi655x-pmic.h +++ b/include/linux/mfd/hi655x-pmic.h @@ -52,7 +52,6 @@ #define OTMP_D1R_INT_MASK BIT(OTMP_D1R_INT) struct hi655x_pmic { - struct resource *res; struct device *dev; struct regmap *regmap; struct gpio_desc *gpio; -- cgit v1.2.3 From 2dfe293bcde2d302c045d0cd536ccb15f86385d2 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Thu, 20 Jul 2023 22:37:38 +0800 Subject: mfd: db8500-prcmu: Remove unused inline functions Since commit b0e846248de5 ("mfd: db8500-prcmu: Remove dead code for a non-existing config") these inline helpers also no need any more. Signed-off-by: YueHaibing Link: https://lore.kernel.org/r/20230720143738.13996-1-yuehaibing@huawei.com Signed-off-by: Lee Jones --- include/linux/mfd/dbx500-prcmu.h | 21 --------------------- 1 file changed, 21 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/dbx500-prcmu.h b/include/linux/mfd/dbx500-prcmu.h index e7a7e70fdb38..dd0fc891b228 100644 --- a/include/linux/mfd/dbx500-prcmu.h +++ b/include/linux/mfd/dbx500-prcmu.h @@ -556,16 +556,6 @@ static inline void prcmu_clear(unsigned int reg, u32 bits) #define PRCMU_QOS_ARM_OPP 3 #define PRCMU_QOS_DEFAULT_VALUE -1 -static inline unsigned long prcmu_qos_get_cpufreq_opp_delay(void) -{ - return 0; -} - -static inline int prcmu_qos_requirement(int prcmu_qos_class) -{ - return 0; -} - static inline int prcmu_qos_add_requirement(int prcmu_qos_class, char *name, s32 value) { @@ -582,15 +572,4 @@ static inline void prcmu_qos_remove_requirement(int prcmu_qos_class, char *name) { } -static inline int prcmu_qos_add_notifier(int prcmu_qos_class, - struct notifier_block *notifier) -{ - return 0; -} -static inline int prcmu_qos_remove_notifier(int prcmu_qos_class, - struct notifier_block *notifier) -{ - return 0; -} - #endif /* __MACH_PRCMU_H */ -- cgit v1.2.3 From 10d3340441bd0db857fc7fcb1733a800acf47a3d Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 19 Jul 2023 11:02:23 +0200 Subject: mfd: rz-mtu3: Link time dependencies The new set of drivers for RZ/G2L MTU3a tries to enable compile-testing the individual client drivers even when the MFD portion is disabled but gets it wrong, causing a link failure when the core is in a loadable module but the other drivers are built-in: x86_64-linux-ld: drivers/pwm/pwm-rz-mtu3.o: in function `rz_mtu3_pwm_apply': pwm-rz-mtu3.c:(.text+0x4bf): undefined reference to `rz_mtu3_8bit_ch_write' x86_64-linux-ld: pwm-rz-mtu3.c:(.text+0x509): undefined reference to `rz_mtu3_disable' arm-linux-gnueabi-ld: drivers/counter/rz-mtu3-cnt.o: in function `rz_mtu3_cascade_counts_enable_get': rz-mtu3-cnt.c:(.text+0xbec): undefined reference to `rz_mtu3_shared_reg_read' It seems better not to add the extra complexity here but instead just use a normal hard dependency, so remove the #else portion in the header along with the "|| COMPILE_TEST". This could also be fixed by having slightly more elaborate Kconfig dependencies or using the cursed 'IS_REACHABLE()' helper, but in practice it's already possible to compile-test all these drivers by enabling the mtd portion. Fixes: 254d3a727421c ("pwm: Add Renesas RZ/G2L MTU3a PWM driver") Fixes: 0be8907359df4 ("counter: Add Renesas RZ/G2L MTU3a counter driver") Fixes: 654c293e1687b ("mfd: Add Renesas RZ/G2L MTU3a core driver") Signed-off-by: Arnd Bergmann Acked-by: Thierry Reding Reviewed-by: Biju Das Link: https://lore.kernel.org/r/20230719090430.1925182-1-arnd@kernel.org Signed-off-by: Lee Jones --- drivers/counter/Kconfig | 2 +- drivers/pwm/Kconfig | 2 +- include/linux/mfd/rz-mtu3.h | 66 --------------------------------------------- 3 files changed, 2 insertions(+), 68 deletions(-) (limited to 'include/linux') diff --git a/drivers/counter/Kconfig b/drivers/counter/Kconfig index bca21df51168..62f118173355 100644 --- a/drivers/counter/Kconfig +++ b/drivers/counter/Kconfig @@ -92,7 +92,7 @@ config MICROCHIP_TCB_CAPTURE config RZ_MTU3_CNT tristate "Renesas RZ/G2L MTU3a counter driver" - depends on RZ_MTU3 || COMPILE_TEST + depends on RZ_MTU3 help Enable support for MTU3a counter driver found on Renesas RZ/G2L alike SoCs. This IP supports both 16-bit and 32-bit phase counting mode diff --git a/drivers/pwm/Kconfig b/drivers/pwm/Kconfig index 6210babb0741..8ebcddf91f7b 100644 --- a/drivers/pwm/Kconfig +++ b/drivers/pwm/Kconfig @@ -505,7 +505,7 @@ config PWM_ROCKCHIP config PWM_RZ_MTU3 tristate "Renesas RZ/G2L MTU3a PWM Timer support" - depends on RZ_MTU3 || COMPILE_TEST + depends on RZ_MTU3 depends on HAS_IOMEM help This driver exposes the MTU3a PWM Timer controller found in Renesas diff --git a/include/linux/mfd/rz-mtu3.h b/include/linux/mfd/rz-mtu3.h index c5173bc06270..8421d49500bf 100644 --- a/include/linux/mfd/rz-mtu3.h +++ b/include/linux/mfd/rz-mtu3.h @@ -151,7 +151,6 @@ struct rz_mtu3 { void *priv_data; }; -#if IS_ENABLED(CONFIG_RZ_MTU3) static inline bool rz_mtu3_request_channel(struct rz_mtu3_channel *ch) { mutex_lock(&ch->lock); @@ -188,70 +187,5 @@ void rz_mtu3_32bit_ch_write(struct rz_mtu3_channel *ch, u16 off, u32 val); void rz_mtu3_shared_reg_write(struct rz_mtu3_channel *ch, u16 off, u16 val); void rz_mtu3_shared_reg_update_bit(struct rz_mtu3_channel *ch, u16 off, u16 pos, u8 val); -#else -static inline bool rz_mtu3_request_channel(struct rz_mtu3_channel *ch) -{ - return false; -} - -static inline void rz_mtu3_release_channel(struct rz_mtu3_channel *ch) -{ -} - -static inline bool rz_mtu3_is_enabled(struct rz_mtu3_channel *ch) -{ - return false; -} - -static inline void rz_mtu3_disable(struct rz_mtu3_channel *ch) -{ -} - -static inline int rz_mtu3_enable(struct rz_mtu3_channel *ch) -{ - return 0; -} - -static inline u8 rz_mtu3_8bit_ch_read(struct rz_mtu3_channel *ch, u16 off) -{ - return 0; -} - -static inline u16 rz_mtu3_16bit_ch_read(struct rz_mtu3_channel *ch, u16 off) -{ - return 0; -} - -static inline u32 rz_mtu3_32bit_ch_read(struct rz_mtu3_channel *ch, u16 off) -{ - return 0; -} - -static inline u16 rz_mtu3_shared_reg_read(struct rz_mtu3_channel *ch, u16 off) -{ - return 0; -} - -static inline void rz_mtu3_8bit_ch_write(struct rz_mtu3_channel *ch, u16 off, u8 val) -{ -} - -static inline void rz_mtu3_16bit_ch_write(struct rz_mtu3_channel *ch, u16 off, u16 val) -{ -} - -static inline void rz_mtu3_32bit_ch_write(struct rz_mtu3_channel *ch, u16 off, u32 val) -{ -} - -static inline void rz_mtu3_shared_reg_write(struct rz_mtu3_channel *ch, u16 off, u16 val) -{ -} - -static inline void rz_mtu3_shared_reg_update_bit(struct rz_mtu3_channel *ch, - u16 off, u16 pos, u8 val) -{ -} -#endif #endif /* __MFD_RZ_MTU3_H__ */ -- cgit v1.2.3 From e0d77323824054f15f3137e890f7addc48198a3e Mon Sep 17 00:00:00 2001 From: Yue Haibing Date: Fri, 28 Jul 2023 21:27:09 +0800 Subject: mfd: max77686: Remove unused extern declarations max77686_irq_init() and max77686_irq_exit() are not used since commit 6f1c1e71d933 ("mfd: max77686: Convert to use regmap_irq"). And max77686_irq_resume() never be implemented since introduced in commit dae8a969d512 ("mfd: Add Maxim 77686 driver"). Signed-off-by: Yue Haibing Reviewed-by: Krzysztof Kozlowski Reviewed-by: Chanwoo Choi Link: https://lore.kernel.org/r/20230728132709.27052-1-yuehaibing@huawei.com Signed-off-by: Lee Jones --- include/linux/mfd/max77686-private.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/max77686-private.h b/include/linux/mfd/max77686-private.h index 3acceeedbaba..ea635d12a741 100644 --- a/include/linux/mfd/max77686-private.h +++ b/include/linux/mfd/max77686-private.h @@ -441,8 +441,4 @@ enum max77686_types { TYPE_MAX77802, }; -extern int max77686_irq_init(struct max77686_dev *max77686); -extern void max77686_irq_exit(struct max77686_dev *max77686); -extern int max77686_irq_resume(struct max77686_dev *max77686); - #endif /* __LINUX_MFD_MAX77686_PRIV_H */ -- cgit v1.2.3 From 733e2e9a28e6fa109e51e0b77901552f69df0ef1 Mon Sep 17 00:00:00 2001 From: Yue Haibing Date: Fri, 28 Jul 2023 21:24:39 +0800 Subject: mfd: ab8500: Remove unused extern declarations commit d28f1db8187d ("mfd: Remove confusing ab8500-i2c file and merge into ab8500-core") left behind this. Signed-off-by: Yue Haibing Link: https://lore.kernel.org/r/20230728132439.31568-1-yuehaibing@huawei.com Signed-off-by: Lee Jones --- include/linux/mfd/abx500/ab8500.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/abx500/ab8500.h b/include/linux/mfd/abx500/ab8500.h index 302a330c5c84..09fb3c56e7d7 100644 --- a/include/linux/mfd/abx500/ab8500.h +++ b/include/linux/mfd/abx500/ab8500.h @@ -382,10 +382,6 @@ struct ab8500_platform_data { struct ab8500_sysctrl_platform_data *sysctrl; }; -extern int ab8500_init(struct ab8500 *ab8500, - enum ab8500_version version); -extern int ab8500_exit(struct ab8500 *ab8500); - extern int ab8500_suspend(struct ab8500 *ab8500); static inline int is_ab8500(struct ab8500 *ab) -- cgit v1.2.3 From 54ab43a957bcb2643c13df5ab71de9dc3f72e5a6 Mon Sep 17 00:00:00 2001 From: Yue Haibing Date: Fri, 28 Jul 2023 21:28:41 +0800 Subject: mfd: 88pm860x: Remove unused extern declarations commit 260a127bfbeb ("mfd: 88pm860x-i2c: Purge unused functions") left behind this. Signed-off-by: Yue Haibing Link: https://lore.kernel.org/r/20230728132841.10648-1-yuehaibing@huawei.com Signed-off-by: Lee Jones --- include/linux/mfd/88pm860x.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/88pm860x.h b/include/linux/mfd/88pm860x.h index 473545a2c425..6fa21791fc85 100644 --- a/include/linux/mfd/88pm860x.h +++ b/include/linux/mfd/88pm860x.h @@ -472,13 +472,7 @@ extern int pm860x_bulk_read(struct i2c_client *, int, int, unsigned char *); extern int pm860x_bulk_write(struct i2c_client *, int, int, unsigned char *); extern int pm860x_set_bits(struct i2c_client *, int, unsigned char, unsigned char); -extern int pm860x_page_reg_read(struct i2c_client *, int); extern int pm860x_page_reg_write(struct i2c_client *, int, unsigned char); extern int pm860x_page_bulk_read(struct i2c_client *, int, int, unsigned char *); -extern int pm860x_page_bulk_write(struct i2c_client *, int, int, - unsigned char *); -extern int pm860x_page_set_bits(struct i2c_client *, int, unsigned char, - unsigned char); - #endif /* __LINUX_MFD_88PM860X_H */ -- cgit v1.2.3 From 875386b98857822b77ac7f95bdf367b70af5b78c Mon Sep 17 00:00:00 2001 From: Manish Rangankar Date: Mon, 21 Aug 2023 18:30:37 +0530 Subject: scsi: qla2xxx: Add Unsolicited LS Request and Response Support for NVMe Introduce infrastructure in the driver to support the processing of unsolicited LS (Link Service) requests. This will involve the utilization of a new pass-up of unsolicited FC-NVMe request IOCB interface. Unsolicited requests will be submitted to the NVMe transport layer through nvme_fc_rcv_ls_req(). Any received LS responses, which are sent using xmt_ls_rsp(), will be forwarded to the firmware through the existing Pass-Through IOCB interface, responsible for sending FC-NVMe Link Service requests and responses. Signed-off-by: Manish Rangankar Signed-off-by: Nilesh Javali Link: https://lore.kernel.org/r/20230821130045.34850-2-njavali@marvell.com Reviewed-by: Himanshu Madhani Signed-off-by: Martin K. Petersen --- drivers/scsi/qla2xxx/qla_dbg.c | 5 +- drivers/scsi/qla2xxx/qla_dbg.h | 1 + drivers/scsi/qla2xxx/qla_def.h | 34 +++- drivers/scsi/qla2xxx/qla_gbl.h | 14 +- drivers/scsi/qla2xxx/qla_init.c | 1 + drivers/scsi/qla2xxx/qla_iocb.c | 27 ++- drivers/scsi/qla2xxx/qla_isr.c | 146 ++++++++++++++- drivers/scsi/qla2xxx/qla_nvme.c | 401 +++++++++++++++++++++++++++++++++++++++- drivers/scsi/qla2xxx/qla_nvme.h | 17 +- drivers/scsi/qla2xxx/qla_os.c | 24 ++- include/linux/nvme-fc-driver.h | 6 +- 11 files changed, 642 insertions(+), 34 deletions(-) (limited to 'include/linux') diff --git a/drivers/scsi/qla2xxx/qla_dbg.c b/drivers/scsi/qla2xxx/qla_dbg.c index d7e8454304ce..4d104425146b 100644 --- a/drivers/scsi/qla2xxx/qla_dbg.c +++ b/drivers/scsi/qla2xxx/qla_dbg.c @@ -12,9 +12,8 @@ * ---------------------------------------------------------------------- * | Module Init and Probe | 0x0199 | | * | Mailbox commands | 0x1206 | 0x11a5-0x11ff | - * | Device Discovery | 0x2134 | 0x210e-0x2115 | - * | | | 0x211c-0x2128 | - * | | | 0x212c-0x2134 | + * | Device Discovery | 0x2134 | 0x2112-0x2115 | + * | | | 0x2127-0x2128 | * | Queue Command and IO tracing | 0x3074 | 0x300b | * | | | 0x3027-0x3028 | * | | | 0x303d-0x3041 | diff --git a/drivers/scsi/qla2xxx/qla_dbg.h b/drivers/scsi/qla2xxx/qla_dbg.h index 70482b55d240..54f0a412226f 100644 --- a/drivers/scsi/qla2xxx/qla_dbg.h +++ b/drivers/scsi/qla2xxx/qla_dbg.h @@ -368,6 +368,7 @@ ql_log_qp(uint32_t, struct qla_qpair *, int32_t, const char *fmt, ...); #define ql_dbg_tgt_tmr 0x00001000 /* Target mode task management */ #define ql_dbg_tgt_dif 0x00000800 /* Target mode dif */ #define ql_dbg_edif 0x00000400 /* edif and purex debug */ +#define ql_dbg_unsol 0x00000100 /* Unsolicited path debug */ extern int qla27xx_dump_mpi_ram(struct qla_hw_data *, uint32_t, uint32_t *, uint32_t, void **); diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h index 97ecb2158c8a..2007d5bb5f9f 100644 --- a/drivers/scsi/qla2xxx/qla_def.h +++ b/drivers/scsi/qla2xxx/qla_def.h @@ -346,6 +346,12 @@ struct name_list_extended { u8 sent; }; +struct qla_nvme_fc_rjt { + struct fcnvme_ls_rjt *c; + dma_addr_t cdma; + u16 size; +}; + struct els_reject { struct fc_els_ls_rjt *c; dma_addr_t cdma; @@ -503,6 +509,20 @@ struct ct_arg { port_id_t id; }; +struct qla_nvme_lsrjt_pt_arg { + struct fc_port *fcport; + u8 opcode; + u8 vp_idx; + u8 reason; + u8 explanation; + __le16 nport_handle; + u16 control_flags; + __le16 ox_id; + __le32 xchg_address; + u32 tx_byte_count, rx_byte_count; + dma_addr_t tx_addr, rx_addr; +}; + /* * SRB extensions. */ @@ -611,13 +631,16 @@ struct srb_iocb { void *desc; /* These are only used with ls4 requests */ - int cmd_len; - int rsp_len; + __le32 cmd_len; + __le32 rsp_len; dma_addr_t cmd_dma; dma_addr_t rsp_dma; enum nvmefc_fcp_datadir dir; uint32_t dl; uint32_t timeout_sec; + __le32 exchange_address; + __le16 nport_handle; + __le16 ox_id; struct list_head entry; } nvme; struct { @@ -707,6 +730,10 @@ typedef struct srb { struct fc_port *fcport; struct scsi_qla_host *vha; unsigned int start_timer:1; + unsigned int abort:1; + unsigned int aborted:1; + unsigned int completed:1; + unsigned int unsol_rsp:1; uint32_t handle; uint16_t flags; @@ -2542,6 +2569,7 @@ enum rscn_addr_format { typedef struct fc_port { struct list_head list; struct scsi_qla_host *vha; + struct list_head unsol_ctx_head; unsigned int conf_compl_supported:1; unsigned int deleted:2; @@ -4802,6 +4830,7 @@ struct qla_hw_data { struct els_reject elsrej; u8 edif_post_stop_cnt_down; struct qla_vp_map *vp_map; + struct qla_nvme_fc_rjt lsrjt; }; #define RX_ELS_SIZE (roundup(sizeof(struct enode) + ELS_MAX_PAYLOAD, SMP_CACHE_BYTES)) @@ -4834,6 +4863,7 @@ struct active_regions { * is variable) starting at "iocb". */ struct purex_item { + void *purls_context; struct list_head list; struct scsi_qla_host *vha; void (*process_item)(struct scsi_qla_host *vha, diff --git a/drivers/scsi/qla2xxx/qla_gbl.h b/drivers/scsi/qla2xxx/qla_gbl.h index 33fba9d62969..911e9adf41d3 100644 --- a/drivers/scsi/qla2xxx/qla_gbl.h +++ b/drivers/scsi/qla2xxx/qla_gbl.h @@ -611,7 +611,11 @@ qla2xxx_msix_rsp_q_hs(int irq, void *dev_id); fc_port_t *qla2x00_find_fcport_by_loopid(scsi_qla_host_t *, uint16_t); fc_port_t *qla2x00_find_fcport_by_wwpn(scsi_qla_host_t *, u8 *, u8); fc_port_t *qla2x00_find_fcport_by_nportid(scsi_qla_host_t *, port_id_t *, u8); -void __qla_consume_iocb(struct scsi_qla_host *vha, void **pkt, struct rsp_que **rsp); +void qla24xx_queue_purex_item(scsi_qla_host_t *, struct purex_item *, + void (*process_item)(struct scsi_qla_host *, + struct purex_item *)); +void __qla_consume_iocb(struct scsi_qla_host *, void **, struct rsp_que **); +void qla2xxx_process_purls_iocb(void **pkt, struct rsp_que **rsp); /* * Global Function Prototypes in qla_sup.c source file. @@ -674,9 +678,11 @@ extern int qla2xxx_get_vpd_field(scsi_qla_host_t *, char *, char *, size_t); extern void qla2xxx_flash_npiv_conf(scsi_qla_host_t *); extern int qla24xx_read_fcp_prio_cfg(scsi_qla_host_t *); extern int qla2x00_mailbox_passthru(struct bsg_job *bsg_job); -int __qla_copy_purex_to_buffer(struct scsi_qla_host *vha, void **pkt, - struct rsp_que **rsp, u8 *buf, u32 buf_len); - +int qla2x00_sys_ld_info(struct bsg_job *bsg_job); +int __qla_copy_purex_to_buffer(struct scsi_qla_host *, void **, + struct rsp_que **, u8 *, u32); +struct purex_item *qla27xx_copy_multiple_pkt(struct scsi_qla_host *vha, + void **pkt, struct rsp_que **rsp, bool is_purls, bool byte_order); int qla_mailbox_passthru(scsi_qla_host_t *vha, uint16_t *mbx_in, uint16_t *mbx_out); diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c index 53e76bbee915..27644957ae4e 100644 --- a/drivers/scsi/qla2xxx/qla_init.c +++ b/drivers/scsi/qla2xxx/qla_init.c @@ -5554,6 +5554,7 @@ qla2x00_alloc_fcport(scsi_qla_host_t *vha, gfp_t flags) INIT_WORK(&fcport->reg_work, qla_register_fcport_fn); INIT_LIST_HEAD(&fcport->gnl_entry); INIT_LIST_HEAD(&fcport->list); + INIT_LIST_HEAD(&fcport->unsol_ctx_head); INIT_LIST_HEAD(&fcport->sess_cmd_list); spin_lock_init(&fcport->sess_cmd_lock); diff --git a/drivers/scsi/qla2xxx/qla_iocb.c b/drivers/scsi/qla2xxx/qla_iocb.c index d0b12c746a8a..7fbd917f6e1f 100644 --- a/drivers/scsi/qla2xxx/qla_iocb.c +++ b/drivers/scsi/qla2xxx/qla_iocb.c @@ -3771,21 +3771,28 @@ qla_nvme_ls(srb_t *sp, struct pt_ls4_request *cmd_pkt) nvme = &sp->u.iocb_cmd; cmd_pkt->entry_type = PT_LS4_REQUEST; cmd_pkt->entry_count = 1; - cmd_pkt->control_flags = cpu_to_le16(CF_LS4_ORIGINATOR << CF_LS4_SHIFT); - cmd_pkt->timeout = cpu_to_le16(nvme->u.nvme.timeout_sec); - cmd_pkt->nport_handle = cpu_to_le16(sp->fcport->loop_id); cmd_pkt->vp_index = sp->fcport->vha->vp_idx; + if (sp->unsol_rsp) { + cmd_pkt->control_flags = + cpu_to_le16(CF_LS4_RESPONDER << CF_LS4_SHIFT); + cmd_pkt->nport_handle = nvme->u.nvme.nport_handle; + cmd_pkt->exchange_address = nvme->u.nvme.exchange_address; + } else { + cmd_pkt->control_flags = + cpu_to_le16(CF_LS4_ORIGINATOR << CF_LS4_SHIFT); + cmd_pkt->nport_handle = cpu_to_le16(sp->fcport->loop_id); + cmd_pkt->rx_dseg_count = cpu_to_le16(1); + cmd_pkt->rx_byte_count = nvme->u.nvme.rsp_len; + cmd_pkt->dsd[1].length = nvme->u.nvme.rsp_len; + put_unaligned_le64(nvme->u.nvme.rsp_dma, &cmd_pkt->dsd[1].address); + } + cmd_pkt->tx_dseg_count = cpu_to_le16(1); - cmd_pkt->tx_byte_count = cpu_to_le32(nvme->u.nvme.cmd_len); - cmd_pkt->dsd[0].length = cpu_to_le32(nvme->u.nvme.cmd_len); + cmd_pkt->tx_byte_count = nvme->u.nvme.cmd_len; + cmd_pkt->dsd[0].length = nvme->u.nvme.cmd_len; put_unaligned_le64(nvme->u.nvme.cmd_dma, &cmd_pkt->dsd[0].address); - - cmd_pkt->rx_dseg_count = cpu_to_le16(1); - cmd_pkt->rx_byte_count = cpu_to_le32(nvme->u.nvme.rsp_len); - cmd_pkt->dsd[1].length = cpu_to_le32(nvme->u.nvme.rsp_len); - put_unaligned_le64(nvme->u.nvme.rsp_dma, &cmd_pkt->dsd[1].address); } static void diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c index 1f42a413b598..867025c89909 100644 --- a/drivers/scsi/qla2xxx/qla_isr.c +++ b/drivers/scsi/qla2xxx/qla_isr.c @@ -823,6 +823,135 @@ qla83xx_handle_8200_aen(scsi_qla_host_t *vha, uint16_t *mb) } } +/** + * qla27xx_copy_multiple_pkt() - Copy over purex/purls packets that can + * span over multiple IOCBs. + * @vha: SCSI driver HA context + * @pkt: ELS packet + * @rsp: Response queue + * @is_purls: True, for Unsolicited Received FC-NVMe LS rsp IOCB + * false, for Unsolicited Received ELS IOCB + * @byte_order: True, to change the byte ordering of iocb payload + */ +struct purex_item * +qla27xx_copy_multiple_pkt(struct scsi_qla_host *vha, void **pkt, + struct rsp_que **rsp, bool is_purls, + bool byte_order) +{ + struct purex_entry_24xx *purex = NULL; + struct pt_ls4_rx_unsol *purls = NULL; + struct rsp_que *rsp_q = *rsp; + sts_cont_entry_t *new_pkt; + uint16_t no_bytes = 0, total_bytes = 0, pending_bytes = 0; + uint16_t buffer_copy_offset = 0, payload_size = 0; + uint16_t entry_count, entry_count_remaining; + struct purex_item *item; + void *iocb_pkt = NULL; + + if (is_purls) { + purls = *pkt; + total_bytes = (le16_to_cpu(purls->frame_size) & 0x0FFF) - + PURX_ELS_HEADER_SIZE; + entry_count = entry_count_remaining = purls->entry_count; + payload_size = sizeof(purls->payload); + } else { + purex = *pkt; + total_bytes = (le16_to_cpu(purex->frame_size) & 0x0FFF) - + PURX_ELS_HEADER_SIZE; + entry_count = entry_count_remaining = purex->entry_count; + payload_size = sizeof(purex->els_frame_payload); + } + + pending_bytes = total_bytes; + no_bytes = (pending_bytes > payload_size) ? payload_size : + pending_bytes; + ql_dbg(ql_dbg_async, vha, 0x509a, + "%s LS, frame_size 0x%x, entry count %d\n", + (is_purls ? "PURLS" : "FPIN"), total_bytes, entry_count); + + item = qla24xx_alloc_purex_item(vha, total_bytes); + if (!item) + return item; + + iocb_pkt = &item->iocb; + + if (is_purls) + memcpy(iocb_pkt, &purls->payload[0], no_bytes); + else + memcpy(iocb_pkt, &purex->els_frame_payload[0], no_bytes); + buffer_copy_offset += no_bytes; + pending_bytes -= no_bytes; + --entry_count_remaining; + + if (is_purls) + ((response_t *)purls)->signature = RESPONSE_PROCESSED; + else + ((response_t *)purex)->signature = RESPONSE_PROCESSED; + wmb(); + + do { + while ((total_bytes > 0) && (entry_count_remaining > 0)) { + if (rsp_q->ring_ptr->signature == RESPONSE_PROCESSED) { + ql_dbg(ql_dbg_async, vha, 0x5084, + "Ran out of IOCBs, partial data 0x%x\n", + buffer_copy_offset); + cpu_relax(); + continue; + } + + new_pkt = (sts_cont_entry_t *)rsp_q->ring_ptr; + *pkt = new_pkt; + + if (new_pkt->entry_type != STATUS_CONT_TYPE) { + ql_log(ql_log_warn, vha, 0x507a, + "Unexpected IOCB type, partial data 0x%x\n", + buffer_copy_offset); + break; + } + + rsp_q->ring_index++; + if (rsp_q->ring_index == rsp_q->length) { + rsp_q->ring_index = 0; + rsp_q->ring_ptr = rsp_q->ring; + } else { + rsp_q->ring_ptr++; + } + no_bytes = (pending_bytes > sizeof(new_pkt->data)) ? + sizeof(new_pkt->data) : pending_bytes; + if ((buffer_copy_offset + no_bytes) <= total_bytes) { + memcpy(((uint8_t *)iocb_pkt + buffer_copy_offset), + new_pkt->data, no_bytes); + buffer_copy_offset += no_bytes; + pending_bytes -= no_bytes; + --entry_count_remaining; + } else { + ql_log(ql_log_warn, vha, 0x5044, + "Attempt to copy more that we got, optimizing..%x\n", + buffer_copy_offset); + memcpy(((uint8_t *)iocb_pkt + buffer_copy_offset), + new_pkt->data, + total_bytes - buffer_copy_offset); + } + + ((response_t *)new_pkt)->signature = RESPONSE_PROCESSED; + wmb(); + } + + if (pending_bytes != 0 || entry_count_remaining != 0) { + ql_log(ql_log_fatal, vha, 0x508b, + "Dropping partial FPIN, underrun bytes = 0x%x, entry cnts 0x%x\n", + total_bytes, entry_count_remaining); + qla24xx_free_purex_item(item); + return NULL; + } + } while (entry_count_remaining > 0); + + if (byte_order) + host_to_fcp_swap((uint8_t *)&item->iocb, total_bytes); + + return item; +} + int qla2x00_is_a_vp_did(scsi_qla_host_t *vha, uint32_t rscn_entry) { @@ -958,7 +1087,7 @@ initialize_purex_header: return item; } -static void +void qla24xx_queue_purex_item(scsi_qla_host_t *vha, struct purex_item *pkt, void (*process_item)(struct scsi_qla_host *vha, struct purex_item *pkt)) @@ -3811,6 +3940,7 @@ void qla24xx_process_response_queue(struct scsi_qla_host *vha, struct qla_hw_data *ha = vha->hw; struct purex_entry_24xx *purex_entry; struct purex_item *pure_item; + struct pt_ls4_rx_unsol *p; u16 rsp_in = 0, cur_ring_index; int is_shadow_hba; @@ -3983,7 +4113,19 @@ process_err: qla28xx_sa_update_iocb_entry(vha, rsp->req, (struct sa_update_28xx *)pkt); break; - + case PT_LS4_UNSOL: + p = (void *)pkt; + if (qla_chk_cont_iocb_avail(vha, rsp, (response_t *)pkt, rsp_in)) { + rsp->ring_ptr = (response_t *)pkt; + rsp->ring_index = cur_ring_index; + + ql_dbg(ql_dbg_init, vha, 0x2124, + "Defer processing UNSOL LS req opcode %#x...\n", + p->payload[0]); + return; + } + qla2xxx_process_purls_iocb((void **)&pkt, &rsp); + break; default: /* Type Not Supported. */ ql_dbg(ql_dbg_async, vha, 0x5042, diff --git a/drivers/scsi/qla2xxx/qla_nvme.c b/drivers/scsi/qla2xxx/qla_nvme.c index 9941b38eac93..1a31e877e6cb 100644 --- a/drivers/scsi/qla2xxx/qla_nvme.c +++ b/drivers/scsi/qla2xxx/qla_nvme.c @@ -12,6 +12,26 @@ #include static struct nvme_fc_port_template qla_nvme_fc_transport; +static int qla_nvme_ls_reject_iocb(struct scsi_qla_host *vha, + struct qla_qpair *qp, + struct qla_nvme_lsrjt_pt_arg *a, + bool is_xchg_terminate); + +struct qla_nvme_unsol_ctx { + struct list_head elem; + struct scsi_qla_host *vha; + struct fc_port *fcport; + struct srb *sp; + struct nvmefc_ls_rsp lsrsp; + struct nvmefc_ls_rsp *fd_rsp; + struct work_struct lsrsp_work; + struct work_struct abort_work; + __le32 exchange_address; + __le16 nport_handle; + __le16 ox_id; + int comp_status; + spinlock_t cmd_lock; +}; int qla_nvme_register_remote(struct scsi_qla_host *vha, struct fc_port *fcport) { @@ -216,6 +236,55 @@ static void qla_nvme_sp_ls_done(srb_t *sp, int res) schedule_work(&priv->ls_work); } +static void qla_nvme_release_lsrsp_cmd_kref(struct kref *kref) +{ + struct srb *sp = container_of(kref, struct srb, cmd_kref); + struct qla_nvme_unsol_ctx *uctx = sp->priv; + struct nvmefc_ls_rsp *fd_rsp; + unsigned long flags; + + if (!uctx) { + qla2x00_rel_sp(sp); + return; + } + + spin_lock_irqsave(&uctx->cmd_lock, flags); + uctx->sp = NULL; + sp->priv = NULL; + spin_unlock_irqrestore(&uctx->cmd_lock, flags); + + fd_rsp = uctx->fd_rsp; + + list_del(&uctx->elem); + + fd_rsp->done(fd_rsp); + kfree(uctx); + qla2x00_rel_sp(sp); +} + +static void qla_nvme_lsrsp_complete(struct work_struct *work) +{ + struct qla_nvme_unsol_ctx *uctx = + container_of(work, struct qla_nvme_unsol_ctx, lsrsp_work); + + kref_put(&uctx->sp->cmd_kref, qla_nvme_release_lsrsp_cmd_kref); +} + +static void qla_nvme_sp_lsrsp_done(srb_t *sp, int res) +{ + struct qla_nvme_unsol_ctx *uctx = sp->priv; + + if (WARN_ON_ONCE(kref_read(&sp->cmd_kref) == 0)) + return; + + if (res) + res = -EINVAL; + + uctx->comp_status = res; + INIT_WORK(&uctx->lsrsp_work, qla_nvme_lsrsp_complete); + schedule_work(&uctx->lsrsp_work); +} + /* it assumed that QPair lock is held. */ static void qla_nvme_sp_done(srb_t *sp, int res) { @@ -288,6 +357,92 @@ out: kref_put(&sp->cmd_kref, sp->put_fn); } +static int qla_nvme_xmt_ls_rsp(struct nvme_fc_local_port *lport, + struct nvme_fc_remote_port *rport, + struct nvmefc_ls_rsp *fd_resp) +{ + struct qla_nvme_unsol_ctx *uctx = container_of(fd_resp, + struct qla_nvme_unsol_ctx, lsrsp); + struct qla_nvme_rport *qla_rport = rport->private; + fc_port_t *fcport = qla_rport->fcport; + struct scsi_qla_host *vha = uctx->vha; + struct qla_hw_data *ha = vha->hw; + struct qla_nvme_lsrjt_pt_arg a; + struct srb_iocb *nvme; + srb_t *sp; + int rval = QLA_FUNCTION_FAILED; + uint8_t cnt = 0; + + if (!fcport || fcport->deleted) + goto out; + + if (!ha->flags.fw_started) + goto out; + + /* Alloc SRB structure */ + sp = qla2x00_get_sp(vha, fcport, GFP_ATOMIC); + if (!sp) + goto out; + + sp->type = SRB_NVME_LS; + sp->name = "nvme_ls"; + sp->done = qla_nvme_sp_lsrsp_done; + sp->put_fn = qla_nvme_release_lsrsp_cmd_kref; + sp->priv = (void *)uctx; + sp->unsol_rsp = 1; + uctx->sp = sp; + spin_lock_init(&uctx->cmd_lock); + nvme = &sp->u.iocb_cmd; + uctx->fd_rsp = fd_resp; + nvme->u.nvme.desc = fd_resp; + nvme->u.nvme.dir = 0; + nvme->u.nvme.dl = 0; + nvme->u.nvme.timeout_sec = 0; + nvme->u.nvme.cmd_dma = fd_resp->rspdma; + nvme->u.nvme.cmd_len = fd_resp->rsplen; + nvme->u.nvme.rsp_len = 0; + nvme->u.nvme.rsp_dma = 0; + nvme->u.nvme.exchange_address = uctx->exchange_address; + nvme->u.nvme.nport_handle = uctx->nport_handle; + nvme->u.nvme.ox_id = uctx->ox_id; + dma_sync_single_for_device(&ha->pdev->dev, nvme->u.nvme.cmd_dma, + le32_to_cpu(fd_resp->rsplen), DMA_TO_DEVICE); + + ql_dbg(ql_dbg_unsol, vha, 0x2122, + "Unsol lsreq portid=%06x %8phC exchange_address 0x%x ox_id 0x%x hdl 0x%x\n", + fcport->d_id.b24, fcport->port_name, uctx->exchange_address, + uctx->ox_id, uctx->nport_handle); +retry: + rval = qla2x00_start_sp(sp); + switch (rval) { + case QLA_SUCCESS: + break; + case EAGAIN: + msleep(PURLS_MSLEEP_INTERVAL); + cnt++; + if (cnt < PURLS_RETRY_COUNT) + goto retry; + + fallthrough; + default: + ql_dbg(ql_log_warn, vha, 0x2123, + "Failed to xmit Unsol ls response = %d\n", rval); + rval = -EIO; + qla2x00_rel_sp(sp); + goto out; + } + + return 0; +out: + memset((void *)&a, 0, sizeof(a)); + a.vp_idx = vha->vp_idx; + a.nport_handle = uctx->nport_handle; + a.xchg_address = uctx->exchange_address; + qla_nvme_ls_reject_iocb(vha, ha->base_qpair, &a, true); + kfree(uctx); + return rval; +} + static void qla_nvme_ls_abort(struct nvme_fc_local_port *lport, struct nvme_fc_remote_port *rport, struct nvmefc_ls_req *fd) { @@ -355,7 +510,7 @@ static int qla_nvme_ls_req(struct nvme_fc_local_port *lport, nvme->u.nvme.timeout_sec = fd->timeout; nvme->u.nvme.cmd_dma = fd->rqstdma; dma_sync_single_for_device(&ha->pdev->dev, nvme->u.nvme.cmd_dma, - fd->rqstlen, DMA_TO_DEVICE); + le32_to_cpu(fd->rqstlen), DMA_TO_DEVICE); rval = qla2x00_start_sp(sp); if (rval != QLA_SUCCESS) { @@ -720,6 +875,7 @@ static struct nvme_fc_port_template qla_nvme_fc_transport = { .ls_abort = qla_nvme_ls_abort, .fcp_io = qla_nvme_post_cmd, .fcp_abort = qla_nvme_fcp_abort, + .xmt_ls_rsp = qla_nvme_xmt_ls_rsp, .map_queues = qla_nvme_map_queues, .max_hw_queues = DEF_NVME_HW_QUEUES, .max_sgl_segments = 1024, @@ -924,3 +1080,246 @@ inline void qla_wait_nvme_release_cmd_kref(srb_t *orig_sp) return; kref_put(&orig_sp->cmd_kref, orig_sp->put_fn); } + +static void qla_nvme_fc_format_rjt(void *buf, u8 ls_cmd, u8 reason, + u8 explanation, u8 vendor) +{ + struct fcnvme_ls_rjt *rjt = buf; + + rjt->w0.ls_cmd = FCNVME_LSDESC_RQST; + rjt->desc_list_len = fcnvme_lsdesc_len(sizeof(struct fcnvme_ls_rjt)); + rjt->rqst.desc_tag = cpu_to_be32(FCNVME_LSDESC_RQST); + rjt->rqst.desc_len = + fcnvme_lsdesc_len(sizeof(struct fcnvme_lsdesc_rqst)); + rjt->rqst.w0.ls_cmd = ls_cmd; + rjt->rjt.desc_tag = cpu_to_be32(FCNVME_LSDESC_RJT); + rjt->rjt.desc_len = fcnvme_lsdesc_len(sizeof(struct fcnvme_lsdesc_rjt)); + rjt->rjt.reason_code = reason; + rjt->rjt.reason_explanation = explanation; + rjt->rjt.vendor = vendor; +} + +static void qla_nvme_lsrjt_pt_iocb(struct scsi_qla_host *vha, + struct pt_ls4_request *lsrjt_iocb, + struct qla_nvme_lsrjt_pt_arg *a) +{ + lsrjt_iocb->entry_type = PT_LS4_REQUEST; + lsrjt_iocb->entry_count = 1; + lsrjt_iocb->sys_define = 0; + lsrjt_iocb->entry_status = 0; + lsrjt_iocb->handle = QLA_SKIP_HANDLE; + lsrjt_iocb->nport_handle = a->nport_handle; + lsrjt_iocb->exchange_address = a->xchg_address; + lsrjt_iocb->vp_index = a->vp_idx; + + lsrjt_iocb->control_flags = cpu_to_le16(a->control_flags); + + put_unaligned_le64(a->tx_addr, &lsrjt_iocb->dsd[0].address); + lsrjt_iocb->dsd[0].length = cpu_to_le32(a->tx_byte_count); + lsrjt_iocb->tx_dseg_count = cpu_to_le16(1); + lsrjt_iocb->tx_byte_count = cpu_to_le32(a->tx_byte_count); + + put_unaligned_le64(a->rx_addr, &lsrjt_iocb->dsd[1].address); + lsrjt_iocb->dsd[1].length = 0; + lsrjt_iocb->rx_dseg_count = 0; + lsrjt_iocb->rx_byte_count = 0; +} + +static int +qla_nvme_ls_reject_iocb(struct scsi_qla_host *vha, struct qla_qpair *qp, + struct qla_nvme_lsrjt_pt_arg *a, bool is_xchg_terminate) +{ + struct pt_ls4_request *lsrjt_iocb; + + lsrjt_iocb = __qla2x00_alloc_iocbs(qp, NULL); + if (!lsrjt_iocb) { + ql_log(ql_log_warn, vha, 0x210e, + "qla2x00_alloc_iocbs failed.\n"); + return QLA_FUNCTION_FAILED; + } + + if (!is_xchg_terminate) { + qla_nvme_fc_format_rjt((void *)vha->hw->lsrjt.c, a->opcode, + a->reason, a->explanation, 0); + + a->tx_byte_count = sizeof(struct fcnvme_ls_rjt); + a->tx_addr = vha->hw->lsrjt.cdma; + a->control_flags = CF_LS4_RESPONDER << CF_LS4_SHIFT; + + ql_dbg(ql_dbg_unsol, vha, 0x211f, + "Sending nvme fc ls reject ox_id %04x op %04x\n", + a->ox_id, a->opcode); + ql_dump_buffer(ql_dbg_unsol + ql_dbg_verbose, vha, 0x210f, + vha->hw->lsrjt.c, sizeof(*vha->hw->lsrjt.c)); + } else { + a->tx_byte_count = 0; + a->control_flags = CF_LS4_RESPONDER_TERM << CF_LS4_SHIFT; + ql_dbg(ql_dbg_unsol, vha, 0x2110, + "Terminate nvme ls xchg 0x%x\n", a->xchg_address); + } + + qla_nvme_lsrjt_pt_iocb(vha, lsrjt_iocb, a); + /* flush iocb to mem before notifying hw doorbell */ + wmb(); + qla2x00_start_iocbs(vha, qp->req); + return 0; +} + +/* + * qla2xxx_process_purls_pkt() - Pass-up Unsolicited + * Received FC-NVMe Link Service pkt to nvme_fc_rcv_ls_req(). + * LLDD need to provide memory for response buffer, which + * will be used to reference the exchange corresponding + * to the LS when issuing an ls response. LLDD will have to free + * response buffer in lport->ops->xmt_ls_rsp(). + * + * @vha: SCSI qla host + * @item: ptr to purex_item + */ +static void +qla2xxx_process_purls_pkt(struct scsi_qla_host *vha, struct purex_item *item) +{ + struct qla_nvme_unsol_ctx *uctx = item->purls_context; + fc_port_t *fcport = uctx->fcport; + struct qla_nvme_lsrjt_pt_arg a; + int ret; + + ret = nvme_fc_rcv_ls_req(fcport->nvme_remote_port, &uctx->lsrsp, + &item->iocb, item->size); + if (ret) { + ql_dbg(ql_dbg_unsol, vha, 0x2125, "NVMe tranport ls_req failed\n"); + memset((void *)&a, 0, sizeof(a)); + a.vp_idx = vha->vp_idx; + a.nport_handle = uctx->nport_handle; + a.xchg_address = uctx->exchange_address; + qla_nvme_ls_reject_iocb(vha, vha->hw->base_qpair, &a, true); + list_del(&uctx->elem); + kfree(uctx); + } +} + +static scsi_qla_host_t * +qla2xxx_get_vha_from_vp_idx(struct qla_hw_data *ha, uint16_t vp_index) +{ + scsi_qla_host_t *base_vha, *vha, *tvp; + unsigned long flags; + + base_vha = pci_get_drvdata(ha->pdev); + + if (!vp_index && !ha->num_vhosts) + return base_vha; + + spin_lock_irqsave(&ha->vport_slock, flags); + list_for_each_entry_safe(vha, tvp, &ha->vp_list, list) { + if (vha->vp_idx == vp_index) { + spin_unlock_irqrestore(&ha->vport_slock, flags); + return vha; + } + } + spin_unlock_irqrestore(&ha->vport_slock, flags); + + return NULL; +} + +void qla2xxx_process_purls_iocb(void **pkt, struct rsp_que **rsp) +{ + struct nvme_fc_remote_port *rport; + struct qla_nvme_rport *qla_rport; + struct qla_nvme_lsrjt_pt_arg a; + struct pt_ls4_rx_unsol *p = *pkt; + struct qla_nvme_unsol_ctx *uctx; + struct rsp_que *rsp_q = *rsp; + struct qla_hw_data *ha; + scsi_qla_host_t *vha; + fc_port_t *fcport = NULL; + struct purex_item *item; + port_id_t d_id = {0}; + port_id_t id = {0}; + u8 *opcode; + bool xmt_reject = false; + + ha = rsp_q->hw; + + vha = qla2xxx_get_vha_from_vp_idx(ha, p->vp_index); + if (!vha) { + ql_log(ql_log_warn, NULL, 0x2110, "Invalid vp index %d\n", p->vp_index); + WARN_ON_ONCE(1); + return; + } + + memset((void *)&a, 0, sizeof(a)); + opcode = (u8 *)&p->payload[0]; + a.opcode = opcode[3]; + a.vp_idx = p->vp_index; + a.nport_handle = p->nport_handle; + a.ox_id = p->ox_id; + a.xchg_address = p->exchange_address; + + id.b.domain = p->s_id.domain; + id.b.area = p->s_id.area; + id.b.al_pa = p->s_id.al_pa; + d_id.b.domain = p->d_id[2]; + d_id.b.area = p->d_id[1]; + d_id.b.al_pa = p->d_id[0]; + + fcport = qla2x00_find_fcport_by_nportid(vha, &id, 0); + if (!fcport) { + ql_dbg(ql_dbg_unsol, vha, 0x211e, + "Failed to find sid=%06x did=%06x\n", + id.b24, d_id.b24); + a.reason = FCNVME_RJT_RC_INV_ASSOC; + a.explanation = FCNVME_RJT_EXP_NONE; + xmt_reject = true; + goto out; + } + rport = fcport->nvme_remote_port; + qla_rport = rport->private; + + item = qla27xx_copy_multiple_pkt(vha, pkt, rsp, true, false); + if (!item) { + a.reason = FCNVME_RJT_RC_LOGIC; + a.explanation = FCNVME_RJT_EXP_NONE; + xmt_reject = true; + goto out; + } + + uctx = kzalloc(sizeof(*uctx), GFP_ATOMIC); + if (!uctx) { + ql_log(ql_log_info, vha, 0x2126, "Failed allocate memory\n"); + a.reason = FCNVME_RJT_RC_LOGIC; + a.explanation = FCNVME_RJT_EXP_NONE; + xmt_reject = true; + kfree(item); + goto out; + } + + uctx->vha = vha; + uctx->fcport = fcport; + uctx->exchange_address = p->exchange_address; + uctx->nport_handle = p->nport_handle; + uctx->ox_id = p->ox_id; + qla_rport->uctx = uctx; + INIT_LIST_HEAD(&uctx->elem); + list_add_tail(&uctx->elem, &fcport->unsol_ctx_head); + item->purls_context = (void *)uctx; + + ql_dbg(ql_dbg_unsol, vha, 0x2121, + "PURLS OP[%01x] size %d xchg addr 0x%x portid %06x\n", + item->iocb.iocb[3], item->size, uctx->exchange_address, + fcport->d_id.b24); + /* +48 0 1 2 3 4 5 6 7 8 9 A B C D E F + * ----- ----------------------------------------------- + * 0000: 00 00 00 05 28 00 00 00 07 00 00 00 08 00 00 00 + * 0010: ab ec 0f cc 00 00 8d 7d 05 00 00 00 10 00 00 00 + * 0020: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 + */ + ql_dump_buffer(ql_dbg_unsol + ql_dbg_verbose, vha, 0x2120, + &item->iocb, item->size); + + qla24xx_queue_purex_item(vha, item, qla2xxx_process_purls_pkt); +out: + if (xmt_reject) { + qla_nvme_ls_reject_iocb(vha, (*rsp)->qpair, &a, false); + __qla_consume_iocb(vha, pkt, rsp); + } +} diff --git a/drivers/scsi/qla2xxx/qla_nvme.h b/drivers/scsi/qla2xxx/qla_nvme.h index d299478371b2..a253ac55171b 100644 --- a/drivers/scsi/qla2xxx/qla_nvme.h +++ b/drivers/scsi/qla2xxx/qla_nvme.h @@ -21,6 +21,7 @@ #define Q2T_NVME_NUM_TAGS 2048 #define QLA_MAX_FC_SEGMENTS 64 +struct qla_nvme_unsol_ctx; struct scsi_qla_host; struct qla_hw_data; struct req_que; @@ -37,6 +38,7 @@ struct nvme_private { struct qla_nvme_rport { struct fc_port *fcport; + struct qla_nvme_unsol_ctx *uctx; }; #define COMMAND_NVME 0x88 /* Command Type FC-NVMe IOCB */ @@ -75,6 +77,9 @@ struct cmd_nvme { struct dsd64 nvme_dsd; }; +#define PURLS_MSLEEP_INTERVAL 1 +#define PURLS_RETRY_COUNT 5 + #define PT_LS4_REQUEST 0x89 /* Link Service pass-through IOCB (request) */ struct pt_ls4_request { uint8_t entry_type; @@ -118,21 +123,19 @@ struct pt_ls4_rx_unsol { __le32 exchange_address; uint8_t d_id[3]; uint8_t r_ctl; - be_id_t s_id; + le_id_t s_id; uint8_t cs_ctl; uint8_t f_ctl[3]; uint8_t type; __le16 seq_cnt; uint8_t df_ctl; uint8_t seq_id; - __le16 rx_id; - __le16 ox_id; - __le32 param; - __le32 desc0; + __le16 rx_id; + __le16 ox_id; + __le32 desc0; #define PT_LS4_PAYLOAD_OFFSET 0x2c #define PT_LS4_FIRST_PACKET_LEN 20 - __le32 desc_len; - __le32 payload[3]; + __le32 payload[5]; }; /* diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index f31d0ead8919..aa492489a4b8 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -4457,8 +4457,9 @@ qla2x00_mem_alloc(struct qla_hw_data *ha, uint16_t req_len, uint16_t rsp_len, ha->elsrej.size = sizeof(struct fc_els_ls_rjt) + 16; ha->elsrej.c = dma_alloc_coherent(&ha->pdev->dev, - ha->elsrej.size, &ha->elsrej.cdma, GFP_KERNEL); - + ha->elsrej.size, + &ha->elsrej.cdma, + GFP_KERNEL); if (!ha->elsrej.c) { ql_dbg_pci(ql_dbg_init, ha->pdev, 0xffff, "Alloc failed for els reject cmd.\n"); @@ -4467,8 +4468,21 @@ qla2x00_mem_alloc(struct qla_hw_data *ha, uint16_t req_len, uint16_t rsp_len, ha->elsrej.c->er_cmd = ELS_LS_RJT; ha->elsrej.c->er_reason = ELS_RJT_LOGIC; ha->elsrej.c->er_explan = ELS_EXPL_UNAB_DATA; + + ha->lsrjt.size = sizeof(struct fcnvme_ls_rjt); + ha->lsrjt.c = dma_alloc_coherent(&ha->pdev->dev, ha->lsrjt.size, + &ha->lsrjt.cdma, GFP_KERNEL); + if (!ha->lsrjt.c) { + ql_dbg_pci(ql_dbg_init, ha->pdev, 0xffff, + "Alloc failed for nvme fc reject cmd.\n"); + goto fail_lsrjt; + } + return 0; +fail_lsrjt: + dma_free_coherent(&ha->pdev->dev, ha->elsrej.size, + ha->elsrej.c, ha->elsrej.cdma); fail_elsrej: dma_pool_destroy(ha->purex_dma_pool); fail_flt: @@ -4998,6 +5012,12 @@ qla2x00_mem_free(struct qla_hw_data *ha) ha->elsrej.c = NULL; } + if (ha->lsrjt.c) { + dma_free_coherent(&ha->pdev->dev, ha->lsrjt.size, ha->lsrjt.c, + ha->lsrjt.cdma); + ha->lsrjt.c = NULL; + } + ha->init_cb = NULL; ha->init_cb_dma = 0; diff --git a/include/linux/nvme-fc-driver.h b/include/linux/nvme-fc-driver.h index 4109f1bd6128..f6ef8cf5d774 100644 --- a/include/linux/nvme-fc-driver.h +++ b/include/linux/nvme-fc-driver.h @@ -53,10 +53,10 @@ struct nvmefc_ls_req { void *rqstaddr; dma_addr_t rqstdma; - u32 rqstlen; + __le32 rqstlen; void *rspaddr; dma_addr_t rspdma; - u32 rsplen; + __le32 rsplen; u32 timeout; void *private; @@ -120,7 +120,7 @@ struct nvmefc_ls_req { struct nvmefc_ls_rsp { void *rspbuf; dma_addr_t rspdma; - u16 rsplen; + __le32 rsplen; void (*done)(struct nvmefc_ls_rsp *rsp); void *nvme_fc_private; /* LLDD is not to access !! */ -- cgit v1.2.3 From 08b8a0440eeec83f8330349f829908858fd52d31 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 14 Mar 2022 15:47:18 -0400 Subject: libceph: add spinlock around osd->o_requests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In a later patch, we're going to need to search for a request in the rbtree, but taking the o_mutex is inconvenient as we already hold the con mutex at the point where we need it. Add a new spinlock that we take when inserting and erasing entries from the o_requests tree. Search of the rbtree can be done with either the mutex or the spinlock, but insertion and removal requires both. Signed-off-by: Jeff Layton Reviewed-by: Xiubo Li Reviewed-and-tested-by: Luís Henriques Reviewed-by: Milind Changire Signed-off-by: Ilya Dryomov --- include/linux/ceph/osd_client.h | 8 +++++++- net/ceph/osd_client.c | 5 +++++ 2 files changed, 12 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index fb6be72104df..92addef18738 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -29,7 +29,12 @@ typedef void (*ceph_osdc_callback_t)(struct ceph_osd_request *); #define CEPH_HOMELESS_OSD -1 -/* a given osd we're communicating with */ +/* + * A given osd we're communicating with. + * + * Note that the o_requests tree can be searched while holding the "lock" mutex + * or the "o_requests_lock" spinlock. Insertion or removal requires both! + */ struct ceph_osd { refcount_t o_ref; struct ceph_osd_client *o_osdc; @@ -37,6 +42,7 @@ struct ceph_osd { int o_incarnation; struct rb_node o_node; struct ceph_connection o_con; + spinlock_t o_requests_lock; struct rb_root o_requests; struct rb_root o_linger_requests; struct rb_root o_backoff_mappings; diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 658a6f2320cf..6d7a430bfed8 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -1177,6 +1177,7 @@ static void osd_init(struct ceph_osd *osd) { refcount_set(&osd->o_ref, 1); RB_CLEAR_NODE(&osd->o_node); + spin_lock_init(&osd->o_requests_lock); osd->o_requests = RB_ROOT; osd->o_linger_requests = RB_ROOT; osd->o_backoff_mappings = RB_ROOT; @@ -1406,7 +1407,9 @@ static void link_request(struct ceph_osd *osd, struct ceph_osd_request *req) atomic_inc(&osd->o_osdc->num_homeless); get_osd(osd); + spin_lock(&osd->o_requests_lock); insert_request(&osd->o_requests, req); + spin_unlock(&osd->o_requests_lock); req->r_osd = osd; } @@ -1418,7 +1421,9 @@ static void unlink_request(struct ceph_osd *osd, struct ceph_osd_request *req) req, req->r_tid); req->r_osd = NULL; + spin_lock(&osd->o_requests_lock); erase_request(&osd->o_requests, req); + spin_unlock(&osd->o_requests_lock); put_osd(osd); if (!osd_homeless(osd)) -- cgit v1.2.3 From a679e50f728648f7b2f3b349e082448abd388038 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 16 Mar 2022 15:23:00 -0400 Subject: libceph: define struct ceph_sparse_extent and add some helpers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the OSD sends back a sparse read reply, it contains an array of these structures. Define the structure and add a couple of helpers for dealing with them. Also add a place in struct ceph_osd_req_op to store the extent buffer, and code to free it if it's populated when the req is torn down. Signed-off-by: Jeff Layton Reviewed-by: Xiubo Li Reviewed-and-tested-by: Luís Henriques Reviewed-by: Milind Changire Signed-off-by: Ilya Dryomov --- include/linux/ceph/osd_client.h | 43 ++++++++++++++++++++++++++++++++++++++++- net/ceph/osd_client.c | 13 +++++++++++++ 2 files changed, 55 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index 92addef18738..05da1e755b7b 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -29,6 +29,17 @@ typedef void (*ceph_osdc_callback_t)(struct ceph_osd_request *); #define CEPH_HOMELESS_OSD -1 +/* + * A single extent in a SPARSE_READ reply. + * + * Note that these come from the OSD as little-endian values. On BE arches, + * we convert them in-place after receipt. + */ +struct ceph_sparse_extent { + u64 off; + u64 len; +} __packed; + /* * A given osd we're communicating with. * @@ -104,6 +115,8 @@ struct ceph_osd_req_op { u64 offset, length; u64 truncate_size; u32 truncate_seq; + int sparse_ext_cnt; + struct ceph_sparse_extent *sparse_ext; struct ceph_osd_data osd_data; } extent; struct { @@ -510,6 +523,20 @@ extern struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *, u32 truncate_seq, u64 truncate_size, bool use_mempool); +int __ceph_alloc_sparse_ext_map(struct ceph_osd_req_op *op, int cnt); + +/* + * How big an extent array should we preallocate for a sparse read? This is + * just a starting value. If we get more than this back from the OSD, the + * receiver will reallocate. + */ +#define CEPH_SPARSE_EXT_ARRAY_INITIAL 16 + +static inline int ceph_alloc_sparse_ext_map(struct ceph_osd_req_op *op) +{ + return __ceph_alloc_sparse_ext_map(op, CEPH_SPARSE_EXT_ARRAY_INITIAL); +} + extern void ceph_osdc_get_request(struct ceph_osd_request *req); extern void ceph_osdc_put_request(struct ceph_osd_request *req); @@ -564,5 +591,19 @@ int ceph_osdc_list_watchers(struct ceph_osd_client *osdc, struct ceph_object_locator *oloc, struct ceph_watch_item **watchers, u32 *num_watchers); -#endif +/* Find offset into the buffer of the end of the extent map */ +static inline u64 ceph_sparse_ext_map_end(struct ceph_osd_req_op *op) +{ + struct ceph_sparse_extent *ext; + + /* No extents? No data */ + if (op->extent.sparse_ext_cnt == 0) + return 0; + + ext = &op->extent.sparse_ext[op->extent.sparse_ext_cnt - 1]; + + return ext->off + ext->len - op->extent.offset; +} + +#endif diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 6d7a430bfed8..3e03ae68722c 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -378,6 +378,7 @@ static void osd_req_op_data_release(struct ceph_osd_request *osd_req, case CEPH_OSD_OP_READ: case CEPH_OSD_OP_WRITE: case CEPH_OSD_OP_WRITEFULL: + kfree(op->extent.sparse_ext); ceph_osd_data_release(&op->extent.osd_data); break; case CEPH_OSD_OP_CALL: @@ -1120,6 +1121,18 @@ fail: } EXPORT_SYMBOL(ceph_osdc_new_request); +int __ceph_alloc_sparse_ext_map(struct ceph_osd_req_op *op, int cnt) +{ + op->extent.sparse_ext_cnt = cnt; + op->extent.sparse_ext = kmalloc_array(cnt, + sizeof(*op->extent.sparse_ext), + GFP_NOFS); + if (!op->extent.sparse_ext) + return -ENOMEM; + return 0; +} +EXPORT_SYMBOL(__ceph_alloc_sparse_ext_map); + /* * We keep osd requests in an rbtree, sorted by ->r_tid. */ -- cgit v1.2.3 From ec3bc567eac12c557a2b99bd0b34b5dff12cab23 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 25 Jan 2022 08:26:31 -0500 Subject: libceph: new sparse_read op, support sparse reads on msgr2 crc codepath MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add support for a new sparse_read ceph_connection operation. The idea is that the client driver can define this operation use it to do special handling for incoming reads. The alloc_msg routine will look at the request and determine whether the reply is expected to be sparse. If it is, then we'll dispatch to a different set of state machine states that will repeatedly call the driver's sparse_read op to get length and placement info for reading the extent map, and the extents themselves. This necessitates adding some new field to some other structs: - The msg gets a new bool to track whether it's a sparse_read request. - A new field is added to the cursor to track the amount remaining in the current extent. This is used to cap the read from the socket into the msg_data - Handing a revoke with all of this is particularly difficult, so I've added a new data_len_remain field to the v2 connection info, and then use that to skip that much on a revoke. We may want to expand the use of that to the normal read path as well, just for consistency's sake. Signed-off-by: Jeff Layton Reviewed-by: Xiubo Li Reviewed-and-tested-by: Luís Henriques Reviewed-by: Milind Changire Signed-off-by: Ilya Dryomov --- include/linux/ceph/messenger.h | 28 +++++++ net/ceph/messenger.c | 1 + net/ceph/messenger_v2.c | 167 ++++++++++++++++++++++++++++++++++++++--- 3 files changed, 187 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h index 99c1726be6ee..8a6938fa324e 100644 --- a/include/linux/ceph/messenger.h +++ b/include/linux/ceph/messenger.h @@ -17,6 +17,7 @@ struct ceph_msg; struct ceph_connection; +struct ceph_msg_data_cursor; /* * Ceph defines these callbacks for handling connection events. @@ -70,6 +71,30 @@ struct ceph_connection_operations { int used_proto, int result, const int *allowed_protos, int proto_cnt, const int *allowed_modes, int mode_cnt); + + /** + * sparse_read: read sparse data + * @con: connection we're reading from + * @cursor: data cursor for reading extents + * @buf: optional buffer to read into + * + * This should be called more than once, each time setting up to + * receive an extent into the current cursor position, and zeroing + * the holes between them. + * + * Returns amount of data to be read (in bytes), 0 if reading is + * complete, or -errno if there was an error. + * + * If @buf is set on a >0 return, then the data should be read into + * the provided buffer. Otherwise, it should be read into the cursor. + * + * The sparse read operation is expected to initialize the cursor + * with a length covering up to the end of the last extent. + */ + int (*sparse_read)(struct ceph_connection *con, + struct ceph_msg_data_cursor *cursor, + char **buf); + }; /* use format string %s%lld */ @@ -207,6 +232,7 @@ struct ceph_msg_data_cursor { struct ceph_msg_data *data; /* current data item */ size_t resid; /* bytes not yet consumed */ + int sr_resid; /* residual sparse_read len */ bool need_crc; /* crc update needed */ union { #ifdef CONFIG_BLOCK @@ -251,6 +277,7 @@ struct ceph_msg { struct kref kref; bool more_to_follow; bool needs_out_seq; + bool sparse_read; int front_alloc_len; struct ceph_msgpool *pool; @@ -395,6 +422,7 @@ struct ceph_connection_v2_info { void *conn_bufs[16]; int conn_buf_cnt; + int data_len_remain; struct kvec in_sign_kvecs[8]; struct kvec out_sign_kvecs[8]; diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index 5eb4898cccd4..2eb10d7518e8 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -1013,6 +1013,7 @@ void ceph_msg_data_cursor_init(struct ceph_msg_data_cursor *cursor, cursor->total_resid = length; cursor->data = msg->data; + cursor->sr_resid = 0; __ceph_msg_data_cursor_init(cursor); } diff --git a/net/ceph/messenger_v2.c b/net/ceph/messenger_v2.c index 1df1d29dee92..17c9a858bfbd 100644 --- a/net/ceph/messenger_v2.c +++ b/net/ceph/messenger_v2.c @@ -52,14 +52,16 @@ #define FRAME_LATE_STATUS_COMPLETE 0xe #define FRAME_LATE_STATUS_ABORTED_MASK 0xf -#define IN_S_HANDLE_PREAMBLE 1 -#define IN_S_HANDLE_CONTROL 2 -#define IN_S_HANDLE_CONTROL_REMAINDER 3 -#define IN_S_PREPARE_READ_DATA 4 -#define IN_S_PREPARE_READ_DATA_CONT 5 -#define IN_S_PREPARE_READ_ENC_PAGE 6 -#define IN_S_HANDLE_EPILOGUE 7 -#define IN_S_FINISH_SKIP 8 +#define IN_S_HANDLE_PREAMBLE 1 +#define IN_S_HANDLE_CONTROL 2 +#define IN_S_HANDLE_CONTROL_REMAINDER 3 +#define IN_S_PREPARE_READ_DATA 4 +#define IN_S_PREPARE_READ_DATA_CONT 5 +#define IN_S_PREPARE_READ_ENC_PAGE 6 +#define IN_S_PREPARE_SPARSE_DATA 7 +#define IN_S_PREPARE_SPARSE_DATA_CONT 8 +#define IN_S_HANDLE_EPILOGUE 9 +#define IN_S_FINISH_SKIP 10 #define OUT_S_QUEUE_DATA 1 #define OUT_S_QUEUE_DATA_CONT 2 @@ -1825,6 +1827,123 @@ static void prepare_read_data_cont(struct ceph_connection *con) con->v2.in_state = IN_S_HANDLE_EPILOGUE; } +static int prepare_sparse_read_cont(struct ceph_connection *con) +{ + int ret; + struct bio_vec bv; + char *buf = NULL; + struct ceph_msg_data_cursor *cursor = &con->v2.in_cursor; + + WARN_ON(con->v2.in_state != IN_S_PREPARE_SPARSE_DATA_CONT); + + if (iov_iter_is_bvec(&con->v2.in_iter)) { + if (ceph_test_opt(from_msgr(con->msgr), RXBOUNCE)) { + con->in_data_crc = crc32c(con->in_data_crc, + page_address(con->bounce_page), + con->v2.in_bvec.bv_len); + get_bvec_at(cursor, &bv); + memcpy_to_page(bv.bv_page, bv.bv_offset, + page_address(con->bounce_page), + con->v2.in_bvec.bv_len); + } else { + con->in_data_crc = ceph_crc32c_page(con->in_data_crc, + con->v2.in_bvec.bv_page, + con->v2.in_bvec.bv_offset, + con->v2.in_bvec.bv_len); + } + + ceph_msg_data_advance(cursor, con->v2.in_bvec.bv_len); + cursor->sr_resid -= con->v2.in_bvec.bv_len; + dout("%s: advance by 0x%x sr_resid 0x%x\n", __func__, + con->v2.in_bvec.bv_len, cursor->sr_resid); + WARN_ON_ONCE(cursor->sr_resid > cursor->total_resid); + if (cursor->sr_resid) { + get_bvec_at(cursor, &bv); + if (bv.bv_len > cursor->sr_resid) + bv.bv_len = cursor->sr_resid; + if (ceph_test_opt(from_msgr(con->msgr), RXBOUNCE)) { + bv.bv_page = con->bounce_page; + bv.bv_offset = 0; + } + set_in_bvec(con, &bv); + con->v2.data_len_remain -= bv.bv_len; + return 0; + } + } else if (iov_iter_is_kvec(&con->v2.in_iter)) { + /* On first call, we have no kvec so don't compute crc */ + if (con->v2.in_kvec_cnt) { + WARN_ON_ONCE(con->v2.in_kvec_cnt > 1); + con->in_data_crc = crc32c(con->in_data_crc, + con->v2.in_kvecs[0].iov_base, + con->v2.in_kvecs[0].iov_len); + } + } else { + return -EIO; + } + + /* get next extent */ + ret = con->ops->sparse_read(con, cursor, &buf); + if (ret <= 0) { + if (ret < 0) + return ret; + + reset_in_kvecs(con); + add_in_kvec(con, con->v2.in_buf, CEPH_EPILOGUE_PLAIN_LEN); + con->v2.in_state = IN_S_HANDLE_EPILOGUE; + return 0; + } + + if (buf) { + /* receive into buffer */ + reset_in_kvecs(con); + add_in_kvec(con, buf, ret); + con->v2.data_len_remain -= ret; + return 0; + } + + if (ret > cursor->total_resid) { + pr_warn("%s: ret 0x%x total_resid 0x%zx resid 0x%zx\n", + __func__, ret, cursor->total_resid, cursor->resid); + return -EIO; + } + get_bvec_at(cursor, &bv); + if (bv.bv_len > cursor->sr_resid) + bv.bv_len = cursor->sr_resid; + if (ceph_test_opt(from_msgr(con->msgr), RXBOUNCE)) { + if (unlikely(!con->bounce_page)) { + con->bounce_page = alloc_page(GFP_NOIO); + if (!con->bounce_page) { + pr_err("failed to allocate bounce page\n"); + return -ENOMEM; + } + } + + bv.bv_page = con->bounce_page; + bv.bv_offset = 0; + } + set_in_bvec(con, &bv); + con->v2.data_len_remain -= ret; + return ret; +} + +static int prepare_sparse_read_data(struct ceph_connection *con) +{ + struct ceph_msg *msg = con->in_msg; + + dout("%s: starting sparse read\n", __func__); + + if (WARN_ON_ONCE(!con->ops->sparse_read)) + return -EOPNOTSUPP; + + if (!con_secure(con)) + con->in_data_crc = -1; + + reset_in_kvecs(con); + con->v2.in_state = IN_S_PREPARE_SPARSE_DATA_CONT; + con->v2.data_len_remain = data_len(msg); + return prepare_sparse_read_cont(con); +} + static int prepare_read_tail_plain(struct ceph_connection *con) { struct ceph_msg *msg = con->in_msg; @@ -1845,7 +1964,10 @@ static int prepare_read_tail_plain(struct ceph_connection *con) } if (data_len(msg)) { - con->v2.in_state = IN_S_PREPARE_READ_DATA; + if (msg->sparse_read) + con->v2.in_state = IN_S_PREPARE_SPARSE_DATA; + else + con->v2.in_state = IN_S_PREPARE_READ_DATA; } else { add_in_kvec(con, con->v2.in_buf, CEPH_EPILOGUE_PLAIN_LEN); con->v2.in_state = IN_S_HANDLE_EPILOGUE; @@ -2898,6 +3020,12 @@ static int populate_in_iter(struct ceph_connection *con) prepare_read_enc_page(con); ret = 0; break; + case IN_S_PREPARE_SPARSE_DATA: + ret = prepare_sparse_read_data(con); + break; + case IN_S_PREPARE_SPARSE_DATA_CONT: + ret = prepare_sparse_read_cont(con); + break; case IN_S_HANDLE_EPILOGUE: ret = handle_epilogue(con); break; @@ -3489,6 +3617,23 @@ static void revoke_at_prepare_read_enc_page(struct ceph_connection *con) con->v2.in_state = IN_S_FINISH_SKIP; } +static void revoke_at_prepare_sparse_data(struct ceph_connection *con) +{ + int resid; /* current piece of data */ + int remaining; + + WARN_ON(con_secure(con)); + WARN_ON(!data_len(con->in_msg)); + WARN_ON(!iov_iter_is_bvec(&con->v2.in_iter)); + resid = iov_iter_count(&con->v2.in_iter); + dout("%s con %p resid %d\n", __func__, con, resid); + + remaining = CEPH_EPILOGUE_PLAIN_LEN + con->v2.data_len_remain; + con->v2.in_iter.count -= resid; + set_in_skip(con, resid + remaining); + con->v2.in_state = IN_S_FINISH_SKIP; +} + static void revoke_at_handle_epilogue(struct ceph_connection *con) { int resid; @@ -3505,6 +3650,7 @@ static void revoke_at_handle_epilogue(struct ceph_connection *con) void ceph_con_v2_revoke_incoming(struct ceph_connection *con) { switch (con->v2.in_state) { + case IN_S_PREPARE_SPARSE_DATA: case IN_S_PREPARE_READ_DATA: revoke_at_prepare_read_data(con); break; @@ -3514,6 +3660,9 @@ void ceph_con_v2_revoke_incoming(struct ceph_connection *con) case IN_S_PREPARE_READ_ENC_PAGE: revoke_at_prepare_read_enc_page(con); break; + case IN_S_PREPARE_SPARSE_DATA_CONT: + revoke_at_prepare_sparse_data(con); + break; case IN_S_HANDLE_EPILOGUE: revoke_at_handle_epilogue(con); break; -- cgit v1.2.3 From d396f89db39a2f259e2125ca43b4c31bb65afcad Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 24 Mar 2022 13:33:06 -0400 Subject: libceph: add sparse read support to msgr1 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add 2 new fields to ceph_connection_v1_info to track the necessary info in sparse reads. Skip initializing the cursor for a sparse read. Break out read_partial_message_section into a wrapper around a new read_partial_message_chunk function that doesn't zero out the crc first. Add new helper functions to drive receiving into the destinations provided by the sparse_read state machine. Signed-off-by: Jeff Layton Reviewed-by: Xiubo Li Reviewed-and-tested-by: Luís Henriques Reviewed-by: Milind Changire Signed-off-by: Ilya Dryomov --- include/linux/ceph/messenger.h | 4 ++ net/ceph/messenger_v1.c | 98 ++++++++++++++++++++++++++++++++++++++---- 2 files changed, 94 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h index 8a6938fa324e..9fd7255172ad 100644 --- a/include/linux/ceph/messenger.h +++ b/include/linux/ceph/messenger.h @@ -336,6 +336,10 @@ struct ceph_connection_v1_info { int in_base_pos; /* bytes read */ + /* sparse reads */ + struct kvec in_sr_kvec; /* current location to receive into */ + u64 in_sr_len; /* amount of data in this extent */ + /* message in temps */ u8 in_tag; /* protocol control byte */ struct ceph_msg_header in_hdr; diff --git a/net/ceph/messenger_v1.c b/net/ceph/messenger_v1.c index 3d57bb48a2b4..f9a50d7f0d20 100644 --- a/net/ceph/messenger_v1.c +++ b/net/ceph/messenger_v1.c @@ -159,9 +159,9 @@ static size_t sizeof_footer(struct ceph_connection *con) static void prepare_message_data(struct ceph_msg *msg, u32 data_len) { - /* Initialize data cursor */ - - ceph_msg_data_cursor_init(&msg->cursor, msg, data_len); + /* Initialize data cursor if it's not a sparse read */ + if (!msg->sparse_read) + ceph_msg_data_cursor_init(&msg->cursor, msg, data_len); } /* @@ -960,9 +960,9 @@ static void process_ack(struct ceph_connection *con) prepare_read_tag(con); } -static int read_partial_message_section(struct ceph_connection *con, - struct kvec *section, - unsigned int sec_len, u32 *crc) +static int read_partial_message_chunk(struct ceph_connection *con, + struct kvec *section, + unsigned int sec_len, u32 *crc) { int ret, left; @@ -978,11 +978,91 @@ static int read_partial_message_section(struct ceph_connection *con, section->iov_len += ret; } if (section->iov_len == sec_len) - *crc = crc32c(0, section->iov_base, section->iov_len); + *crc = crc32c(*crc, section->iov_base, section->iov_len); return 1; } +static inline int read_partial_message_section(struct ceph_connection *con, + struct kvec *section, + unsigned int sec_len, u32 *crc) +{ + *crc = 0; + return read_partial_message_chunk(con, section, sec_len, crc); +} + +static int read_sparse_msg_extent(struct ceph_connection *con, u32 *crc) +{ + struct ceph_msg_data_cursor *cursor = &con->in_msg->cursor; + bool do_bounce = ceph_test_opt(from_msgr(con->msgr), RXBOUNCE); + + if (do_bounce && unlikely(!con->bounce_page)) { + con->bounce_page = alloc_page(GFP_NOIO); + if (!con->bounce_page) { + pr_err("failed to allocate bounce page\n"); + return -ENOMEM; + } + } + + while (cursor->sr_resid > 0) { + struct page *page, *rpage; + size_t off, len; + int ret; + + page = ceph_msg_data_next(cursor, &off, &len); + rpage = do_bounce ? con->bounce_page : page; + + /* clamp to what remains in extent */ + len = min_t(int, len, cursor->sr_resid); + ret = ceph_tcp_recvpage(con->sock, rpage, (int)off, len); + if (ret <= 0) + return ret; + *crc = ceph_crc32c_page(*crc, rpage, off, ret); + ceph_msg_data_advance(cursor, (size_t)ret); + cursor->sr_resid -= ret; + if (do_bounce) + memcpy_page(page, off, rpage, off, ret); + } + return 1; +} + +static int read_sparse_msg_data(struct ceph_connection *con) +{ + struct ceph_msg_data_cursor *cursor = &con->in_msg->cursor; + bool do_datacrc = !ceph_test_opt(from_msgr(con->msgr), NOCRC); + u32 crc = 0; + int ret = 1; + + if (do_datacrc) + crc = con->in_data_crc; + + do { + if (con->v1.in_sr_kvec.iov_base) + ret = read_partial_message_chunk(con, + &con->v1.in_sr_kvec, + con->v1.in_sr_len, + &crc); + else if (cursor->sr_resid > 0) + ret = read_sparse_msg_extent(con, &crc); + + if (ret <= 0) { + if (do_datacrc) + con->in_data_crc = crc; + return ret; + } + + memset(&con->v1.in_sr_kvec, 0, sizeof(con->v1.in_sr_kvec)); + ret = con->ops->sparse_read(con, cursor, + (char **)&con->v1.in_sr_kvec.iov_base); + con->v1.in_sr_len = ret; + } while (ret > 0); + + if (do_datacrc) + con->in_data_crc = crc; + + return ret < 0 ? ret : 1; /* must return > 0 to indicate success */ +} + static int read_partial_msg_data(struct ceph_connection *con) { struct ceph_msg_data_cursor *cursor = &con->in_msg->cursor; @@ -1173,7 +1253,9 @@ static int read_partial_message(struct ceph_connection *con) if (!m->num_data_items) return -EIO; - if (ceph_test_opt(from_msgr(con->msgr), RXBOUNCE)) + if (m->sparse_read) + ret = read_sparse_msg_data(con); + else if (ceph_test_opt(from_msgr(con->msgr), RXBOUNCE)) ret = read_partial_msg_data_bounce(con); else ret = read_partial_msg_data(con); -- cgit v1.2.3 From f628d799972799023d32c2542bb2639eb8c4f84e Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 11 Feb 2022 11:38:02 -0500 Subject: libceph: add sparse read support to OSD client MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Have get_reply check for the presence of sparse read ops in the request and set the sparse_read boolean in the msg. That will queue the messenger layer to use the sparse read codepath instead of the normal data receive. Add a new sparse_read operation for the OSD client, driven by its own state machine. The messenger will repeatedly call the sparse_read operation, and it will pass back the necessary info to set up to read the next extent of data, while zero-filling the sparse regions. The state machine will stop at the end of the last extent, and will attach the extent map buffer to the ceph_osd_req_op so that the caller can use it. Signed-off-by: Jeff Layton Reviewed-by: Xiubo Li Reviewed-and-tested-by: Luís Henriques Reviewed-by: Milind Changire Signed-off-by: Ilya Dryomov --- include/linux/ceph/osd_client.h | 32 +++++ net/ceph/osd_client.c | 257 +++++++++++++++++++++++++++++++++++++++- 2 files changed, 285 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index 05da1e755b7b..bfa4813590da 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -40,6 +40,36 @@ struct ceph_sparse_extent { u64 len; } __packed; +/* Sparse read state machine state values */ +enum ceph_sparse_read_state { + CEPH_SPARSE_READ_HDR = 0, + CEPH_SPARSE_READ_EXTENTS, + CEPH_SPARSE_READ_DATA_LEN, + CEPH_SPARSE_READ_DATA, +}; + +/* + * A SPARSE_READ reply is a 32-bit count of extents, followed by an array of + * 64-bit offset/length pairs, and then all of the actual file data + * concatenated after it (sans holes). + * + * Unfortunately, we don't know how long the extent array is until we've + * started reading the data section of the reply. The caller should send down + * a destination buffer for the array, but we'll alloc one if it's too small + * or if the caller doesn't. + */ +struct ceph_sparse_read { + enum ceph_sparse_read_state sr_state; /* state machine state */ + u64 sr_req_off; /* orig request offset */ + u64 sr_req_len; /* orig request length */ + u64 sr_pos; /* current pos in buffer */ + int sr_index; /* current extent index */ + __le32 sr_datalen; /* length of actual data */ + u32 sr_count; /* extent count in reply */ + int sr_ext_len; /* length of extent array */ + struct ceph_sparse_extent *sr_extent; /* extent array */ +}; + /* * A given osd we're communicating with. * @@ -48,6 +78,7 @@ struct ceph_sparse_extent { */ struct ceph_osd { refcount_t o_ref; + int o_sparse_op_idx; struct ceph_osd_client *o_osdc; int o_osd; int o_incarnation; @@ -63,6 +94,7 @@ struct ceph_osd { unsigned long lru_ttl; struct list_head o_keepalive_item; struct mutex lock; + struct ceph_sparse_read o_sparse_read; }; #define CEPH_OSD_SLAB_OPS 2 diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 3e03ae68722c..0aacbadcab06 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -376,6 +376,7 @@ static void osd_req_op_data_release(struct ceph_osd_request *osd_req, switch (op->op) { case CEPH_OSD_OP_READ: + case CEPH_OSD_OP_SPARSE_READ: case CEPH_OSD_OP_WRITE: case CEPH_OSD_OP_WRITEFULL: kfree(op->extent.sparse_ext); @@ -670,6 +671,7 @@ static void get_num_data_items(struct ceph_osd_request *req, /* reply */ case CEPH_OSD_OP_STAT: case CEPH_OSD_OP_READ: + case CEPH_OSD_OP_SPARSE_READ: case CEPH_OSD_OP_LIST_WATCHERS: *num_reply_data_items += 1; break; @@ -739,7 +741,7 @@ void osd_req_op_extent_init(struct ceph_osd_request *osd_req, BUG_ON(opcode != CEPH_OSD_OP_READ && opcode != CEPH_OSD_OP_WRITE && opcode != CEPH_OSD_OP_WRITEFULL && opcode != CEPH_OSD_OP_ZERO && - opcode != CEPH_OSD_OP_TRUNCATE); + opcode != CEPH_OSD_OP_TRUNCATE && opcode != CEPH_OSD_OP_SPARSE_READ); op->extent.offset = offset; op->extent.length = length; @@ -964,6 +966,7 @@ static u32 osd_req_encode_op(struct ceph_osd_op *dst, case CEPH_OSD_OP_STAT: break; case CEPH_OSD_OP_READ: + case CEPH_OSD_OP_SPARSE_READ: case CEPH_OSD_OP_WRITE: case CEPH_OSD_OP_WRITEFULL: case CEPH_OSD_OP_ZERO: @@ -1060,7 +1063,8 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc, BUG_ON(opcode != CEPH_OSD_OP_READ && opcode != CEPH_OSD_OP_WRITE && opcode != CEPH_OSD_OP_ZERO && opcode != CEPH_OSD_OP_TRUNCATE && - opcode != CEPH_OSD_OP_CREATE && opcode != CEPH_OSD_OP_DELETE); + opcode != CEPH_OSD_OP_CREATE && opcode != CEPH_OSD_OP_DELETE && + opcode != CEPH_OSD_OP_SPARSE_READ); req = ceph_osdc_alloc_request(osdc, snapc, num_ops, use_mempool, GFP_NOFS); @@ -1201,6 +1205,13 @@ static void osd_init(struct ceph_osd *osd) mutex_init(&osd->lock); } +static void ceph_init_sparse_read(struct ceph_sparse_read *sr) +{ + kfree(sr->sr_extent); + memset(sr, '\0', sizeof(*sr)); + sr->sr_state = CEPH_SPARSE_READ_HDR; +} + static void osd_cleanup(struct ceph_osd *osd) { WARN_ON(!RB_EMPTY_NODE(&osd->o_node)); @@ -1211,6 +1222,8 @@ static void osd_cleanup(struct ceph_osd *osd) WARN_ON(!list_empty(&osd->o_osd_lru)); WARN_ON(!list_empty(&osd->o_keepalive_item)); + ceph_init_sparse_read(&osd->o_sparse_read); + if (osd->o_auth.authorizer) { WARN_ON(osd_homeless(osd)); ceph_auth_destroy_authorizer(osd->o_auth.authorizer); @@ -1230,6 +1243,9 @@ static struct ceph_osd *create_osd(struct ceph_osd_client *osdc, int onum) osd_init(osd); osd->o_osdc = osdc; osd->o_osd = onum; + osd->o_sparse_op_idx = -1; + + ceph_init_sparse_read(&osd->o_sparse_read); ceph_con_init(&osd->o_con, osd, &osd_con_ops, &osdc->client->msgr); @@ -2034,6 +2050,7 @@ static void setup_request_data(struct ceph_osd_request *req) &op->raw_data_in); break; case CEPH_OSD_OP_READ: + case CEPH_OSD_OP_SPARSE_READ: ceph_osdc_msg_data_add(reply_msg, &op->extent.osd_data); break; @@ -2453,8 +2470,10 @@ static void finish_request(struct ceph_osd_request *req) req->r_end_latency = ktime_get(); - if (req->r_osd) + if (req->r_osd) { + ceph_init_sparse_read(&req->r_osd->o_sparse_read); unlink_request(req->r_osd, req); + } atomic_dec(&osdc->num_requests); /* @@ -5366,6 +5385,24 @@ static void osd_dispatch(struct ceph_connection *con, struct ceph_msg *msg) ceph_msg_put(msg); } +/* How much sparse data was requested? */ +static u64 sparse_data_requested(struct ceph_osd_request *req) +{ + u64 len = 0; + + if (req->r_flags & CEPH_OSD_FLAG_READ) { + int i; + + for (i = 0; i < req->r_num_ops; ++i) { + struct ceph_osd_req_op *op = &req->r_ops[i]; + + if (op->op == CEPH_OSD_OP_SPARSE_READ) + len += op->extent.length; + } + } + return len; +} + /* * Lookup and return message for incoming reply. Don't try to do * anything about a larger than preallocated data portion of the @@ -5382,6 +5419,7 @@ static struct ceph_msg *get_reply(struct ceph_connection *con, int front_len = le32_to_cpu(hdr->front_len); int data_len = le32_to_cpu(hdr->data_len); u64 tid = le64_to_cpu(hdr->tid); + u64 srlen; down_read(&osdc->lock); if (!osd_registered(osd)) { @@ -5414,7 +5452,8 @@ static struct ceph_msg *get_reply(struct ceph_connection *con, req->r_reply = m; } - if (data_len > req->r_reply->data_length) { + srlen = sparse_data_requested(req); + if (!srlen && data_len > req->r_reply->data_length) { pr_warn("%s osd%d tid %llu data %d > preallocated %zu, skipping\n", __func__, osd->o_osd, req->r_tid, data_len, req->r_reply->data_length); @@ -5424,6 +5463,8 @@ static struct ceph_msg *get_reply(struct ceph_connection *con, } m = ceph_msg_get(req->r_reply); + m->sparse_read = (bool)srlen; + dout("get_reply tid %lld %p\n", tid, m); out_unlock_session: @@ -5656,9 +5697,217 @@ static int osd_check_message_signature(struct ceph_msg *msg) return ceph_auth_check_message_signature(auth, msg); } +static void advance_cursor(struct ceph_msg_data_cursor *cursor, size_t len, + bool zero) +{ + while (len) { + struct page *page; + size_t poff, plen; + + page = ceph_msg_data_next(cursor, &poff, &plen); + if (plen > len) + plen = len; + if (zero) + zero_user_segment(page, poff, poff + plen); + len -= plen; + ceph_msg_data_advance(cursor, plen); + } +} + +static int prep_next_sparse_read(struct ceph_connection *con, + struct ceph_msg_data_cursor *cursor) +{ + struct ceph_osd *o = con->private; + struct ceph_sparse_read *sr = &o->o_sparse_read; + struct ceph_osd_request *req; + struct ceph_osd_req_op *op; + + spin_lock(&o->o_requests_lock); + req = lookup_request(&o->o_requests, le64_to_cpu(con->in_msg->hdr.tid)); + if (!req) { + spin_unlock(&o->o_requests_lock); + return -EBADR; + } + + if (o->o_sparse_op_idx < 0) { + u64 srlen = sparse_data_requested(req); + + dout("%s: [%d] starting new sparse read req. srlen=0x%llx\n", + __func__, o->o_osd, srlen); + ceph_msg_data_cursor_init(cursor, con->in_msg, srlen); + } else { + u64 end; + + op = &req->r_ops[o->o_sparse_op_idx]; + + WARN_ON_ONCE(op->extent.sparse_ext); + + /* hand back buffer we took earlier */ + op->extent.sparse_ext = sr->sr_extent; + sr->sr_extent = NULL; + op->extent.sparse_ext_cnt = sr->sr_count; + sr->sr_ext_len = 0; + dout("%s: [%d] completed extent array len %d cursor->resid %zd\n", + __func__, o->o_osd, op->extent.sparse_ext_cnt, cursor->resid); + /* Advance to end of data for this operation */ + end = ceph_sparse_ext_map_end(op); + if (end < sr->sr_req_len) + advance_cursor(cursor, sr->sr_req_len - end, false); + } + + ceph_init_sparse_read(sr); + + /* find next op in this request (if any) */ + while (++o->o_sparse_op_idx < req->r_num_ops) { + op = &req->r_ops[o->o_sparse_op_idx]; + if (op->op == CEPH_OSD_OP_SPARSE_READ) + goto found; + } + + /* reset for next sparse read request */ + spin_unlock(&o->o_requests_lock); + o->o_sparse_op_idx = -1; + return 0; +found: + sr->sr_req_off = op->extent.offset; + sr->sr_req_len = op->extent.length; + sr->sr_pos = sr->sr_req_off; + dout("%s: [%d] new sparse read op at idx %d 0x%llx~0x%llx\n", __func__, + o->o_osd, o->o_sparse_op_idx, sr->sr_req_off, sr->sr_req_len); + + /* hand off request's sparse extent map buffer */ + sr->sr_ext_len = op->extent.sparse_ext_cnt; + op->extent.sparse_ext_cnt = 0; + sr->sr_extent = op->extent.sparse_ext; + op->extent.sparse_ext = NULL; + + spin_unlock(&o->o_requests_lock); + return 1; +} + +#ifdef __BIG_ENDIAN +static inline void convert_extent_map(struct ceph_sparse_read *sr) +{ + int i; + + for (i = 0; i < sr->sr_count; i++) { + struct ceph_sparse_extent *ext = &sr->sr_extent[i]; + + ext->off = le64_to_cpu((__force __le64)ext->off); + ext->len = le64_to_cpu((__force __le64)ext->len); + } +} +#else +static inline void convert_extent_map(struct ceph_sparse_read *sr) +{ +} +#endif + +#define MAX_EXTENTS 4096 + +static int osd_sparse_read(struct ceph_connection *con, + struct ceph_msg_data_cursor *cursor, + char **pbuf) +{ + struct ceph_osd *o = con->private; + struct ceph_sparse_read *sr = &o->o_sparse_read; + u32 count = sr->sr_count; + u64 eoff, elen; + int ret; + + switch (sr->sr_state) { + case CEPH_SPARSE_READ_HDR: +next_op: + ret = prep_next_sparse_read(con, cursor); + if (ret <= 0) + return ret; + + /* number of extents */ + ret = sizeof(sr->sr_count); + *pbuf = (char *)&sr->sr_count; + sr->sr_state = CEPH_SPARSE_READ_EXTENTS; + break; + case CEPH_SPARSE_READ_EXTENTS: + /* Convert sr_count to host-endian */ + count = le32_to_cpu((__force __le32)sr->sr_count); + sr->sr_count = count; + dout("[%d] got %u extents\n", o->o_osd, count); + + if (count > 0) { + if (!sr->sr_extent || count > sr->sr_ext_len) { + /* + * Apply a hard cap to the number of extents. + * If we have more, assume something is wrong. + */ + if (count > MAX_EXTENTS) { + dout("%s: OSD returned 0x%x extents in a single reply!\n", + __func__, count); + return -EREMOTEIO; + } + + /* no extent array provided, or too short */ + kfree(sr->sr_extent); + sr->sr_extent = kmalloc_array(count, + sizeof(*sr->sr_extent), + GFP_NOIO); + if (!sr->sr_extent) + return -ENOMEM; + sr->sr_ext_len = count; + } + ret = count * sizeof(*sr->sr_extent); + *pbuf = (char *)sr->sr_extent; + sr->sr_state = CEPH_SPARSE_READ_DATA_LEN; + break; + } + /* No extents? Read data len */ + fallthrough; + case CEPH_SPARSE_READ_DATA_LEN: + convert_extent_map(sr); + ret = sizeof(sr->sr_datalen); + *pbuf = (char *)&sr->sr_datalen; + sr->sr_state = CEPH_SPARSE_READ_DATA; + break; + case CEPH_SPARSE_READ_DATA: + if (sr->sr_index >= count) { + sr->sr_state = CEPH_SPARSE_READ_HDR; + goto next_op; + } + + eoff = sr->sr_extent[sr->sr_index].off; + elen = sr->sr_extent[sr->sr_index].len; + + dout("[%d] ext %d off 0x%llx len 0x%llx\n", + o->o_osd, sr->sr_index, eoff, elen); + + if (elen > INT_MAX) { + dout("Sparse read extent length too long (0x%llx)\n", + elen); + return -EREMOTEIO; + } + + /* zero out anything from sr_pos to start of extent */ + if (sr->sr_pos < eoff) + advance_cursor(cursor, eoff - sr->sr_pos, true); + + /* Set position to end of extent */ + sr->sr_pos = eoff + elen; + + /* send back the new length and nullify the ptr */ + cursor->sr_resid = elen; + ret = elen; + *pbuf = NULL; + + /* Bump the array index */ + ++sr->sr_index; + break; + } + return ret; +} + static const struct ceph_connection_operations osd_con_ops = { .get = osd_get_con, .put = osd_put_con, + .sparse_read = osd_sparse_read, .alloc_msg = osd_alloc_msg, .dispatch = osd_dispatch, .fault = osd_fault, -- cgit v1.2.3 From dee0c5f834605ce9b384ee8b9c7032ffd8db4eca Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 1 Jul 2022 06:30:12 -0400 Subject: libceph: add new iov_iter-based ceph_msg_data_type and ceph_osd_data_type MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add an iov_iter to the unions in ceph_msg_data and ceph_msg_data_cursor. Instead of requiring a list of pages or bvecs, we can just use an iov_iter directly, and avoid extra allocations. We assume that the pages represented by the iter are pinned such that they shouldn't incur page faults, which is the case for the iov_iters created by netfs. While working on this, Al Viro informed me that he was going to change iov_iter_get_pages to auto-advance the iterator as that pattern is more or less required for ITER_PIPE anyway. We emulate that here for now by advancing in the _next op and tracking that amount in the "lastlen" field. In the event that _next is called twice without an intervening _advance, we revert the iov_iter by the remaining lastlen before calling iov_iter_get_pages. Cc: Al Viro Cc: David Howells Signed-off-by: Jeff Layton Reviewed-by: Xiubo Li Reviewed-and-tested-by: Luís Henriques Reviewed-by: Milind Changire Signed-off-by: Ilya Dryomov --- include/linux/ceph/messenger.h | 8 +++++ include/linux/ceph/osd_client.h | 4 +++ net/ceph/messenger.c | 77 +++++++++++++++++++++++++++++++++++++++++ net/ceph/osd_client.c | 27 +++++++++++++++ 4 files changed, 116 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h index 9fd7255172ad..2eaaabbe98cb 100644 --- a/include/linux/ceph/messenger.h +++ b/include/linux/ceph/messenger.h @@ -123,6 +123,7 @@ enum ceph_msg_data_type { CEPH_MSG_DATA_BIO, /* data source/destination is a bio list */ #endif /* CONFIG_BLOCK */ CEPH_MSG_DATA_BVECS, /* data source/destination is a bio_vec array */ + CEPH_MSG_DATA_ITER, /* data source/destination is an iov_iter */ }; #ifdef CONFIG_BLOCK @@ -224,6 +225,7 @@ struct ceph_msg_data { bool own_pages; }; struct ceph_pagelist *pagelist; + struct iov_iter iter; }; }; @@ -248,6 +250,10 @@ struct ceph_msg_data_cursor { struct page *page; /* page from list */ size_t offset; /* bytes from list */ }; + struct { + struct iov_iter iov_iter; + unsigned int lastlen; + }; }; }; @@ -605,6 +611,8 @@ void ceph_msg_data_add_bio(struct ceph_msg *msg, struct ceph_bio_iter *bio_pos, #endif /* CONFIG_BLOCK */ void ceph_msg_data_add_bvecs(struct ceph_msg *msg, struct ceph_bvec_iter *bvec_pos); +void ceph_msg_data_add_iter(struct ceph_msg *msg, + struct iov_iter *iter); struct ceph_msg *ceph_msg_new2(int type, int front_len, int max_data_items, gfp_t flags, bool can_fail); diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index bfa4813590da..8f5d2b5bbba2 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -108,6 +108,7 @@ enum ceph_osd_data_type { CEPH_OSD_DATA_TYPE_BIO, #endif /* CONFIG_BLOCK */ CEPH_OSD_DATA_TYPE_BVECS, + CEPH_OSD_DATA_TYPE_ITER, }; struct ceph_osd_data { @@ -131,6 +132,7 @@ struct ceph_osd_data { struct ceph_bvec_iter bvec_pos; u32 num_bvecs; }; + struct iov_iter iter; }; }; @@ -501,6 +503,8 @@ void osd_req_op_extent_osd_data_bvecs(struct ceph_osd_request *osd_req, void osd_req_op_extent_osd_data_bvec_pos(struct ceph_osd_request *osd_req, unsigned int which, struct ceph_bvec_iter *bvec_pos); +void osd_req_op_extent_osd_iter(struct ceph_osd_request *osd_req, + unsigned int which, struct iov_iter *iter); extern void osd_req_op_cls_request_data_pagelist(struct ceph_osd_request *, unsigned int which, diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index 2eb10d7518e8..10a41cd9c523 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -969,6 +969,62 @@ static bool ceph_msg_data_pagelist_advance(struct ceph_msg_data_cursor *cursor, return true; } +static void ceph_msg_data_iter_cursor_init(struct ceph_msg_data_cursor *cursor, + size_t length) +{ + struct ceph_msg_data *data = cursor->data; + + cursor->iov_iter = data->iter; + cursor->lastlen = 0; + iov_iter_truncate(&cursor->iov_iter, length); + cursor->resid = iov_iter_count(&cursor->iov_iter); +} + +static struct page *ceph_msg_data_iter_next(struct ceph_msg_data_cursor *cursor, + size_t *page_offset, size_t *length) +{ + struct page *page; + ssize_t len; + + if (cursor->lastlen) + iov_iter_revert(&cursor->iov_iter, cursor->lastlen); + + len = iov_iter_get_pages2(&cursor->iov_iter, &page, PAGE_SIZE, + 1, page_offset); + BUG_ON(len < 0); + + cursor->lastlen = len; + + /* + * FIXME: The assumption is that the pages represented by the iov_iter + * are pinned, with the references held by the upper-level + * callers, or by virtue of being under writeback. Eventually, + * we'll get an iov_iter_get_pages2 variant that doesn't take + * page refs. Until then, just put the page ref. + */ + VM_BUG_ON_PAGE(!PageWriteback(page) && page_count(page) < 2, page); + put_page(page); + + *length = min_t(size_t, len, cursor->resid); + return page; +} + +static bool ceph_msg_data_iter_advance(struct ceph_msg_data_cursor *cursor, + size_t bytes) +{ + BUG_ON(bytes > cursor->resid); + cursor->resid -= bytes; + + if (bytes < cursor->lastlen) { + cursor->lastlen -= bytes; + } else { + iov_iter_advance(&cursor->iov_iter, bytes - cursor->lastlen); + cursor->lastlen = 0; + } + + return cursor->resid; +} + /* * Message data is handled (sent or received) in pieces, where each * piece resides on a single page. The network layer might not @@ -996,6 +1052,9 @@ static void __ceph_msg_data_cursor_init(struct ceph_msg_data_cursor *cursor) case CEPH_MSG_DATA_BVECS: ceph_msg_data_bvecs_cursor_init(cursor, length); break; + case CEPH_MSG_DATA_ITER: + ceph_msg_data_iter_cursor_init(cursor, length); + break; case CEPH_MSG_DATA_NONE: default: /* BUG(); */ @@ -1043,6 +1102,9 @@ struct page *ceph_msg_data_next(struct ceph_msg_data_cursor *cursor, case CEPH_MSG_DATA_BVECS: page = ceph_msg_data_bvecs_next(cursor, page_offset, length); break; + case CEPH_MSG_DATA_ITER: + page = ceph_msg_data_iter_next(cursor, page_offset, length); + break; case CEPH_MSG_DATA_NONE: default: page = NULL; @@ -1081,6 +1143,9 @@ void ceph_msg_data_advance(struct ceph_msg_data_cursor *cursor, size_t bytes) case CEPH_MSG_DATA_BVECS: new_piece = ceph_msg_data_bvecs_advance(cursor, bytes); break; + case CEPH_MSG_DATA_ITER: + new_piece = ceph_msg_data_iter_advance(cursor, bytes); + break; case CEPH_MSG_DATA_NONE: default: BUG(); @@ -1880,6 +1945,18 @@ void ceph_msg_data_add_bvecs(struct ceph_msg *msg, } EXPORT_SYMBOL(ceph_msg_data_add_bvecs); +void ceph_msg_data_add_iter(struct ceph_msg *msg, + struct iov_iter *iter) +{ + struct ceph_msg_data *data; + + data = ceph_msg_data_add(msg); + data->type = CEPH_MSG_DATA_ITER; + data->iter = *iter; + + msg->data_length += iov_iter_count(&data->iter); +} + /* * construct a new message with given type, size * the new msg has a ref count of 1. diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 0aacbadcab06..684faf8553de 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -171,6 +171,13 @@ static void ceph_osd_data_bvecs_init(struct ceph_osd_data *osd_data, osd_data->num_bvecs = num_bvecs; } +static void ceph_osd_iter_init(struct ceph_osd_data *osd_data, + struct iov_iter *iter) +{ + osd_data->type = CEPH_OSD_DATA_TYPE_ITER; + osd_data->iter = *iter; +} + static struct ceph_osd_data * osd_req_op_raw_data_in(struct ceph_osd_request *osd_req, unsigned int which) { @@ -264,6 +271,22 @@ void osd_req_op_extent_osd_data_bvec_pos(struct ceph_osd_request *osd_req, } EXPORT_SYMBOL(osd_req_op_extent_osd_data_bvec_pos); +/** + * osd_req_op_extent_osd_iter - Set up an operation with an iterator buffer + * @osd_req: The request to set up + * @which: Index of the operation in which to set the iter + * @iter: The buffer iterator + */ +void osd_req_op_extent_osd_iter(struct ceph_osd_request *osd_req, + unsigned int which, struct iov_iter *iter) +{ + struct ceph_osd_data *osd_data; + + osd_data = osd_req_op_data(osd_req, which, extent, osd_data); + ceph_osd_iter_init(osd_data, iter); +} +EXPORT_SYMBOL(osd_req_op_extent_osd_iter); + static void osd_req_op_cls_request_info_pagelist( struct ceph_osd_request *osd_req, unsigned int which, struct ceph_pagelist *pagelist) @@ -346,6 +369,8 @@ static u64 ceph_osd_data_length(struct ceph_osd_data *osd_data) #endif /* CONFIG_BLOCK */ case CEPH_OSD_DATA_TYPE_BVECS: return osd_data->bvec_pos.iter.bi_size; + case CEPH_OSD_DATA_TYPE_ITER: + return iov_iter_count(&osd_data->iter); default: WARN(true, "unrecognized data type %d\n", (int)osd_data->type); return 0; @@ -954,6 +979,8 @@ static void ceph_osdc_msg_data_add(struct ceph_msg *msg, #endif } else if (osd_data->type == CEPH_OSD_DATA_TYPE_BVECS) { ceph_msg_data_add_bvecs(msg, &osd_data->bvec_pos); + } else if (osd_data->type == CEPH_OSD_DATA_TYPE_ITER) { + ceph_msg_data_add_iter(msg, &osd_data->iter); } else { BUG_ON(osd_data->type != CEPH_OSD_DATA_TYPE_NONE); } -- cgit v1.2.3 From 2d332d5bc424404911540006a8bb450fbb96b178 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 27 Jul 2020 10:16:09 -0400 Subject: ceph: fscrypt_auth handling for ceph MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Most fscrypt-enabled filesystems store the crypto context in an xattr, but that's problematic for ceph as xatts are governed by the XATTR cap, but we really want the crypto context as part of the AUTH cap. Because of this, the MDS has added two new inode metadata fields: fscrypt_auth and fscrypt_file. The former is used to hold the crypto context, and the latter is used to track the real file size. Parse new fscrypt_auth and fscrypt_file fields in inode traces. For now, we don't use fscrypt_file, but fscrypt_auth is used to hold the fscrypt context. Allow the client to use a setattr request for setting the fscrypt_auth field. Since this is not a standard setattr request from the VFS, we add a new field to __ceph_setattr that carries ceph-specific inode attrs. Have the set_context op do a setattr that sets the fscrypt_auth value, and get_context just return the contents of that field (since it should always be available). Signed-off-by: Jeff Layton Reviewed-by: Xiubo Li Reviewed-and-tested-by: Luís Henriques Reviewed-by: Milind Changire Signed-off-by: Ilya Dryomov --- fs/ceph/Makefile | 1 + fs/ceph/acl.c | 4 +- fs/ceph/caps.c | 78 ++++++++++++++++++++++++----- fs/ceph/crypto.c | 77 +++++++++++++++++++++++++++++ fs/ceph/crypto.h | 36 ++++++++++++++ fs/ceph/inode.c | 64 +++++++++++++++++++++++- fs/ceph/mds_client.c | 114 +++++++++++++++++++++++++++++++++++++++---- fs/ceph/mds_client.h | 7 +++ fs/ceph/super.c | 3 ++ fs/ceph/super.h | 15 +++++- include/linux/ceph/ceph_fs.h | 21 +++++--- 11 files changed, 385 insertions(+), 35 deletions(-) create mode 100644 fs/ceph/crypto.c create mode 100644 fs/ceph/crypto.h (limited to 'include/linux') diff --git a/fs/ceph/Makefile b/fs/ceph/Makefile index 50c635dc7f71..1f77ca04c426 100644 --- a/fs/ceph/Makefile +++ b/fs/ceph/Makefile @@ -12,3 +12,4 @@ ceph-y := super.o inode.o dir.o file.o locks.o addr.o ioctl.o \ ceph-$(CONFIG_CEPH_FSCACHE) += cache.o ceph-$(CONFIG_CEPH_FS_POSIX_ACL) += acl.o +ceph-$(CONFIG_FS_ENCRYPTION) += crypto.o diff --git a/fs/ceph/acl.c b/fs/ceph/acl.c index 6945a938d396..8a56f979c7cb 100644 --- a/fs/ceph/acl.c +++ b/fs/ceph/acl.c @@ -140,7 +140,7 @@ int ceph_set_acl(struct mnt_idmap *idmap, struct dentry *dentry, newattrs.ia_ctime = current_time(inode); newattrs.ia_mode = new_mode; newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; - ret = __ceph_setattr(inode, &newattrs); + ret = __ceph_setattr(inode, &newattrs, NULL); if (ret) goto out_free; } @@ -151,7 +151,7 @@ int ceph_set_acl(struct mnt_idmap *idmap, struct dentry *dentry, newattrs.ia_ctime = old_ctime; newattrs.ia_mode = old_mode; newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; - __ceph_setattr(inode, &newattrs); + __ceph_setattr(inode, &newattrs, NULL); } goto out_free; } diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index e2bb0d0072da..1c62ef339bc6 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -14,6 +14,7 @@ #include "super.h" #include "mds_client.h" #include "cache.h" +#include "crypto.h" #include #include @@ -1216,15 +1217,12 @@ struct cap_msg_args { umode_t mode; bool inline_data; bool wake; + u32 fscrypt_auth_len; + u32 fscrypt_file_len; + u8 fscrypt_auth[sizeof(struct ceph_fscrypt_auth)]; // for context + u8 fscrypt_file[sizeof(u64)]; // for size }; -/* - * cap struct size + flock buffer size + inline version + inline data size + - * osd_epoch_barrier + oldest_flush_tid - */ -#define CAP_MSG_SIZE (sizeof(struct ceph_mds_caps) + \ - 4 + 8 + 4 + 4 + 8 + 4 + 4 + 4 + 8 + 8 + 4) - /* Marshal up the cap msg to the MDS */ static void encode_cap_msg(struct ceph_msg *msg, struct cap_msg_args *arg) { @@ -1240,7 +1238,7 @@ static void encode_cap_msg(struct ceph_msg *msg, struct cap_msg_args *arg) arg->size, arg->max_size, arg->xattr_version, arg->xattr_buf ? (int)arg->xattr_buf->vec.iov_len : 0); - msg->hdr.version = cpu_to_le16(10); + msg->hdr.version = cpu_to_le16(12); msg->hdr.tid = cpu_to_le64(arg->flush_tid); fc = msg->front.iov_base; @@ -1311,6 +1309,21 @@ static void encode_cap_msg(struct ceph_msg *msg, struct cap_msg_args *arg) /* Advisory flags (version 10) */ ceph_encode_32(&p, arg->flags); + + /* dirstats (version 11) - these are r/o on the client */ + ceph_encode_64(&p, 0); + ceph_encode_64(&p, 0); + +#if IS_ENABLED(CONFIG_FS_ENCRYPTION) + /* fscrypt_auth and fscrypt_file (version 12) */ + ceph_encode_32(&p, arg->fscrypt_auth_len); + ceph_encode_copy(&p, arg->fscrypt_auth, arg->fscrypt_auth_len); + ceph_encode_32(&p, arg->fscrypt_file_len); + ceph_encode_copy(&p, arg->fscrypt_file, arg->fscrypt_file_len); +#else /* CONFIG_FS_ENCRYPTION */ + ceph_encode_32(&p, 0); + ceph_encode_32(&p, 0); +#endif /* CONFIG_FS_ENCRYPTION */ } /* @@ -1432,7 +1445,37 @@ static void __prep_cap(struct cap_msg_args *arg, struct ceph_cap *cap, } } arg->flags = flags; +#if IS_ENABLED(CONFIG_FS_ENCRYPTION) + if (ci->fscrypt_auth_len && + WARN_ON_ONCE(ci->fscrypt_auth_len > sizeof(struct ceph_fscrypt_auth))) { + /* Don't set this if it's too big */ + arg->fscrypt_auth_len = 0; + } else { + arg->fscrypt_auth_len = ci->fscrypt_auth_len; + memcpy(arg->fscrypt_auth, ci->fscrypt_auth, + min_t(size_t, ci->fscrypt_auth_len, + sizeof(arg->fscrypt_auth))); + } + /* FIXME: use this to track "real" size */ + arg->fscrypt_file_len = 0; +#endif /* CONFIG_FS_ENCRYPTION */ +} + +#define CAP_MSG_FIXED_FIELDS (sizeof(struct ceph_mds_caps) + \ + 4 + 8 + 4 + 4 + 8 + 4 + 4 + 4 + 8 + 8 + 4 + 8 + 8 + 4 + 4) + +#if IS_ENABLED(CONFIG_FS_ENCRYPTION) +static inline int cap_msg_size(struct cap_msg_args *arg) +{ + return CAP_MSG_FIXED_FIELDS + arg->fscrypt_auth_len + + arg->fscrypt_file_len; } +#else +static inline int cap_msg_size(struct cap_msg_args *arg) +{ + return CAP_MSG_FIXED_FIELDS; +} +#endif /* CONFIG_FS_ENCRYPTION */ /* * Send a cap msg on the given inode. @@ -1444,7 +1487,8 @@ static void __send_cap(struct cap_msg_args *arg, struct ceph_inode_info *ci) struct ceph_msg *msg; struct inode *inode = &ci->netfs.inode; - msg = ceph_msg_new(CEPH_MSG_CLIENT_CAPS, CAP_MSG_SIZE, GFP_NOFS, false); + msg = ceph_msg_new(CEPH_MSG_CLIENT_CAPS, cap_msg_size(arg), GFP_NOFS, + false); if (!msg) { pr_err("error allocating cap msg: ino (%llx.%llx) flushing %s tid %llu, requeuing cap.\n", ceph_vinop(inode), ceph_cap_string(arg->dirty), @@ -1470,10 +1514,6 @@ static inline int __send_flush_snap(struct inode *inode, struct cap_msg_args arg; struct ceph_msg *msg; - msg = ceph_msg_new(CEPH_MSG_CLIENT_CAPS, CAP_MSG_SIZE, GFP_NOFS, false); - if (!msg) - return -ENOMEM; - arg.session = session; arg.ino = ceph_vino(inode).ino; arg.cid = 0; @@ -1511,6 +1551,18 @@ static inline int __send_flush_snap(struct inode *inode, arg.flags = 0; arg.wake = false; + /* + * No fscrypt_auth changes from a capsnap. It will need + * to update fscrypt_file on size changes (TODO). + */ + arg.fscrypt_auth_len = 0; + arg.fscrypt_file_len = 0; + + msg = ceph_msg_new(CEPH_MSG_CLIENT_CAPS, cap_msg_size(&arg), + GFP_NOFS, false); + if (!msg) + return -ENOMEM; + encode_cap_msg(msg, &arg); ceph_con_send(&arg.session->s_con, msg); return 0; diff --git a/fs/ceph/crypto.c b/fs/ceph/crypto.c new file mode 100644 index 000000000000..b17a6ee16cf0 --- /dev/null +++ b/fs/ceph/crypto.c @@ -0,0 +1,77 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include + +#include "super.h" +#include "crypto.h" + +static int ceph_crypt_get_context(struct inode *inode, void *ctx, size_t len) +{ + struct ceph_inode_info *ci = ceph_inode(inode); + struct ceph_fscrypt_auth *cfa = (struct ceph_fscrypt_auth *)ci->fscrypt_auth; + u32 ctxlen; + + /* Non existent or too short? */ + if (!cfa || (ci->fscrypt_auth_len < (offsetof(struct ceph_fscrypt_auth, cfa_blob) + 1))) + return -ENOBUFS; + + /* Some format we don't recognize? */ + if (le32_to_cpu(cfa->cfa_version) != CEPH_FSCRYPT_AUTH_VERSION) + return -ENOBUFS; + + ctxlen = le32_to_cpu(cfa->cfa_blob_len); + if (len < ctxlen) + return -ERANGE; + + memcpy(ctx, cfa->cfa_blob, ctxlen); + return ctxlen; +} + +static int ceph_crypt_set_context(struct inode *inode, const void *ctx, + size_t len, void *fs_data) +{ + int ret; + struct iattr attr = { }; + struct ceph_iattr cia = { }; + struct ceph_fscrypt_auth *cfa; + + WARN_ON_ONCE(fs_data); + + if (len > FSCRYPT_SET_CONTEXT_MAX_SIZE) + return -EINVAL; + + cfa = kzalloc(sizeof(*cfa), GFP_KERNEL); + if (!cfa) + return -ENOMEM; + + cfa->cfa_version = cpu_to_le32(CEPH_FSCRYPT_AUTH_VERSION); + cfa->cfa_blob_len = cpu_to_le32(len); + memcpy(cfa->cfa_blob, ctx, len); + + cia.fscrypt_auth = cfa; + + ret = __ceph_setattr(inode, &attr, &cia); + if (ret == 0) + inode_set_flags(inode, S_ENCRYPTED, S_ENCRYPTED); + kfree(cia.fscrypt_auth); + return ret; +} + +static bool ceph_crypt_empty_dir(struct inode *inode) +{ + struct ceph_inode_info *ci = ceph_inode(inode); + + return ci->i_rsubdirs + ci->i_rfiles == 1; +} + +static struct fscrypt_operations ceph_fscrypt_ops = { + .get_context = ceph_crypt_get_context, + .set_context = ceph_crypt_set_context, + .empty_dir = ceph_crypt_empty_dir, +}; + +void ceph_fscrypt_set_ops(struct super_block *sb) +{ + fscrypt_set_ops(sb, &ceph_fscrypt_ops); +} diff --git a/fs/ceph/crypto.h b/fs/ceph/crypto.h new file mode 100644 index 000000000000..6dca674f79b8 --- /dev/null +++ b/fs/ceph/crypto.h @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Ceph fscrypt functionality + */ + +#ifndef _CEPH_CRYPTO_H +#define _CEPH_CRYPTO_H + +#include + +struct ceph_fscrypt_auth { + __le32 cfa_version; + __le32 cfa_blob_len; + u8 cfa_blob[FSCRYPT_SET_CONTEXT_MAX_SIZE]; +} __packed; + +#define CEPH_FSCRYPT_AUTH_VERSION 1 +static inline u32 ceph_fscrypt_auth_len(struct ceph_fscrypt_auth *fa) +{ + u32 ctxsize = le32_to_cpu(fa->cfa_blob_len); + + return offsetof(struct ceph_fscrypt_auth, cfa_blob) + ctxsize; +} + +#ifdef CONFIG_FS_ENCRYPTION +void ceph_fscrypt_set_ops(struct super_block *sb); + +#else /* CONFIG_FS_ENCRYPTION */ + +static inline void ceph_fscrypt_set_ops(struct super_block *sb) +{ +} + +#endif /* CONFIG_FS_ENCRYPTION */ + +#endif diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index e23172024707..a3aa7870a6a2 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -14,10 +14,12 @@ #include #include #include +#include #include "super.h" #include "mds_client.h" #include "cache.h" +#include "crypto.h" #include /* @@ -617,6 +619,10 @@ struct inode *ceph_alloc_inode(struct super_block *sb) INIT_WORK(&ci->i_work, ceph_inode_work); ci->i_work_mask = 0; memset(&ci->i_btime, '\0', sizeof(ci->i_btime)); +#ifdef CONFIG_FS_ENCRYPTION + ci->fscrypt_auth = NULL; + ci->fscrypt_auth_len = 0; +#endif return &ci->netfs.inode; } @@ -625,6 +631,9 @@ void ceph_free_inode(struct inode *inode) struct ceph_inode_info *ci = ceph_inode(inode); kfree(ci->i_symlink); +#ifdef CONFIG_FS_ENCRYPTION + kfree(ci->fscrypt_auth); +#endif kmem_cache_free(ceph_inode_cachep, ci); } @@ -645,6 +654,7 @@ void ceph_evict_inode(struct inode *inode) clear_inode(inode); ceph_fscache_unregister_inode_cookie(ci); + fscrypt_put_encryption_info(inode); __ceph_remove_caps(ci); @@ -935,6 +945,17 @@ int ceph_fill_inode(struct inode *inode, struct page *locked_page, __ceph_update_quota(ci, iinfo->max_bytes, iinfo->max_files); +#ifdef CONFIG_FS_ENCRYPTION + if (iinfo->fscrypt_auth_len && (inode->i_state & I_NEW)) { + kfree(ci->fscrypt_auth); + ci->fscrypt_auth_len = iinfo->fscrypt_auth_len; + ci->fscrypt_auth = iinfo->fscrypt_auth; + iinfo->fscrypt_auth = NULL; + iinfo->fscrypt_auth_len = 0; + inode_set_flags(inode, S_ENCRYPTED, S_ENCRYPTED); + } +#endif + if ((new_version || (new_issued & CEPH_CAP_AUTH_SHARED)) && (issued & CEPH_CAP_AUTH_EXCL) == 0) { inode->i_mode = mode; @@ -2079,7 +2100,8 @@ static const struct inode_operations ceph_symlink_iops = { .listxattr = ceph_listxattr, }; -int __ceph_setattr(struct inode *inode, struct iattr *attr) +int __ceph_setattr(struct inode *inode, struct iattr *attr, + struct ceph_iattr *cia) { struct ceph_inode_info *ci = ceph_inode(inode); unsigned int ia_valid = attr->ia_valid; @@ -2119,6 +2141,43 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr) } dout("setattr %p issued %s\n", inode, ceph_cap_string(issued)); +#if IS_ENABLED(CONFIG_FS_ENCRYPTION) + if (cia && cia->fscrypt_auth) { + u32 len = ceph_fscrypt_auth_len(cia->fscrypt_auth); + + if (len > sizeof(*cia->fscrypt_auth)) { + err = -EINVAL; + spin_unlock(&ci->i_ceph_lock); + goto out; + } + + dout("setattr %llx:%llx fscrypt_auth len %u to %u)\n", + ceph_vinop(inode), ci->fscrypt_auth_len, len); + + /* It should never be re-set once set */ + WARN_ON_ONCE(ci->fscrypt_auth); + + if (issued & CEPH_CAP_AUTH_EXCL) { + dirtied |= CEPH_CAP_AUTH_EXCL; + kfree(ci->fscrypt_auth); + ci->fscrypt_auth = (u8 *)cia->fscrypt_auth; + ci->fscrypt_auth_len = len; + } else if ((issued & CEPH_CAP_AUTH_SHARED) == 0 || + ci->fscrypt_auth_len != len || + memcmp(ci->fscrypt_auth, cia->fscrypt_auth, len)) { + req->r_fscrypt_auth = cia->fscrypt_auth; + mask |= CEPH_SETATTR_FSCRYPT_AUTH; + release |= CEPH_CAP_AUTH_SHARED; + } + cia->fscrypt_auth = NULL; + } +#else + if (cia && cia->fscrypt_auth) { + err = -EINVAL; + spin_unlock(&ci->i_ceph_lock); + goto out; + } +#endif /* CONFIG_FS_ENCRYPTION */ if (ia_valid & ATTR_UID) { dout("setattr %p uid %d -> %d\n", inode, @@ -2282,6 +2341,7 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr) req->r_stamp = attr->ia_ctime; err = ceph_mdsc_do_request(mdsc, NULL, req); } +out: dout("setattr %p result=%d (%s locally, %d remote)\n", inode, err, ceph_cap_string(dirtied), mask); @@ -2322,7 +2382,7 @@ int ceph_setattr(struct mnt_idmap *idmap, struct dentry *dentry, ceph_quota_is_max_bytes_exceeded(inode, attr->ia_size)) return -EDQUOT; - err = __ceph_setattr(inode, attr); + err = __ceph_setattr(inode, attr, NULL); if (err >= 0 && (attr->ia_valid & ATTR_MODE)) err = posix_acl_chmod(&nop_mnt_idmap, dentry, attr->ia_mode); diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 77b5e7b2e5dd..c3927dab5a3b 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -15,6 +15,7 @@ #include "super.h" #include "mds_client.h" +#include "crypto.h" #include #include @@ -184,8 +185,54 @@ static int parse_reply_info_in(void **p, void *end, info->rsnaps = 0; } + if (struct_v >= 5) { + u32 alen; + + ceph_decode_32_safe(p, end, alen, bad); + + while (alen--) { + u32 len; + + /* key */ + ceph_decode_32_safe(p, end, len, bad); + ceph_decode_skip_n(p, end, len, bad); + /* value */ + ceph_decode_32_safe(p, end, len, bad); + ceph_decode_skip_n(p, end, len, bad); + } + } + + /* fscrypt flag -- ignore */ + if (struct_v >= 6) + ceph_decode_skip_8(p, end, bad); + + info->fscrypt_auth = NULL; + info->fscrypt_auth_len = 0; + info->fscrypt_file = NULL; + info->fscrypt_file_len = 0; + if (struct_v >= 7) { + ceph_decode_32_safe(p, end, info->fscrypt_auth_len, bad); + if (info->fscrypt_auth_len) { + info->fscrypt_auth = kmalloc(info->fscrypt_auth_len, + GFP_KERNEL); + if (!info->fscrypt_auth) + return -ENOMEM; + ceph_decode_copy_safe(p, end, info->fscrypt_auth, + info->fscrypt_auth_len, bad); + } + ceph_decode_32_safe(p, end, info->fscrypt_file_len, bad); + if (info->fscrypt_file_len) { + info->fscrypt_file = kmalloc(info->fscrypt_file_len, + GFP_KERNEL); + if (!info->fscrypt_file) + return -ENOMEM; + ceph_decode_copy_safe(p, end, info->fscrypt_file, + info->fscrypt_file_len, bad); + } + } *p = end; } else { + /* legacy (unversioned) struct */ if (features & CEPH_FEATURE_MDS_INLINE_DATA) { ceph_decode_64_safe(p, end, info->inline_version, bad); ceph_decode_32_safe(p, end, info->inline_len, bad); @@ -651,8 +698,21 @@ out_bad: static void destroy_reply_info(struct ceph_mds_reply_info_parsed *info) { + int i; + + kfree(info->diri.fscrypt_auth); + kfree(info->diri.fscrypt_file); + kfree(info->targeti.fscrypt_auth); + kfree(info->targeti.fscrypt_file); if (!info->dir_entries) return; + + for (i = 0; i < info->dir_nr; i++) { + struct ceph_mds_reply_dir_entry *rde = info->dir_entries + i; + + kfree(rde->inode.fscrypt_auth); + kfree(rde->inode.fscrypt_file); + } free_pages((unsigned long)info->dir_entries, get_order(info->dir_buf_size)); } @@ -966,6 +1026,7 @@ void ceph_mdsc_release_request(struct kref *kref) put_cred(req->r_cred); if (req->r_pagelist) ceph_pagelist_release(req->r_pagelist); + kfree(req->r_fscrypt_auth); put_request_session(req); ceph_unreserve_caps(req->r_mdsc, &req->r_caps_reservation); WARN_ON_ONCE(!list_empty(&req->r_wait)); @@ -2543,8 +2604,8 @@ static int set_request_path_attr(struct inode *rinode, struct dentry *rdentry, return r; } -static void encode_timestamp_and_gids(void **p, - const struct ceph_mds_request *req) +static void encode_mclientrequest_tail(void **p, + const struct ceph_mds_request *req) { struct ceph_timespec ts; int i; @@ -2557,6 +2618,20 @@ static void encode_timestamp_and_gids(void **p, for (i = 0; i < req->r_cred->group_info->ngroups; i++) ceph_encode_64(p, from_kgid(&init_user_ns, req->r_cred->group_info->gid[i])); + + /* v5: altname (TODO: skip for now) */ + ceph_encode_32(p, 0); + + /* v6: fscrypt_auth and fscrypt_file */ + if (req->r_fscrypt_auth) { + u32 authlen = ceph_fscrypt_auth_len(req->r_fscrypt_auth); + + ceph_encode_32(p, authlen); + ceph_encode_copy(p, req->r_fscrypt_auth, authlen); + } else { + ceph_encode_32(p, 0); + } + ceph_encode_32(p, 0); // fscrypt_file for now } /* @@ -2605,12 +2680,14 @@ static struct ceph_msg *create_request_message(struct ceph_mds_session *session, goto out_free1; } + /* head */ len = legacy ? sizeof(*head) : sizeof(struct ceph_mds_request_head); - len += pathlen1 + pathlen2 + 2*(1 + sizeof(u32) + sizeof(u64)) + - sizeof(struct ceph_timespec); - len += sizeof(u32) + (sizeof(u64) * req->r_cred->group_info->ngroups); - /* calculate (max) length for cap releases */ + /* filepaths */ + len += 2 * (1 + sizeof(u32) + sizeof(u64)); + len += pathlen1 + pathlen2; + + /* cap releases */ len += sizeof(struct ceph_mds_request_release) * (!!req->r_inode_drop + !!req->r_dentry_drop + !!req->r_old_inode_drop + !!req->r_old_dentry_drop); @@ -2620,6 +2697,25 @@ static struct ceph_msg *create_request_message(struct ceph_mds_session *session, if (req->r_old_dentry_drop) len += pathlen2; + /* MClientRequest tail */ + + /* req->r_stamp */ + len += sizeof(struct ceph_timespec); + + /* gid list */ + len += sizeof(u32) + (sizeof(u64) * req->r_cred->group_info->ngroups); + + /* alternate name */ + len += sizeof(u32); // TODO + + /* fscrypt_auth */ + len += sizeof(u32); // fscrypt_auth + if (req->r_fscrypt_auth) + len += ceph_fscrypt_auth_len(req->r_fscrypt_auth); + + /* fscrypt_file */ + len += sizeof(u32); + msg = ceph_msg_new2(CEPH_MSG_CLIENT_REQUEST, len, 1, GFP_NOFS, false); if (!msg) { msg = ERR_PTR(-ENOMEM); @@ -2639,7 +2735,7 @@ static struct ceph_msg *create_request_message(struct ceph_mds_session *session, } else { struct ceph_mds_request_head *new_head = msg->front.iov_base; - msg->hdr.version = cpu_to_le16(4); + msg->hdr.version = cpu_to_le16(6); new_head->version = cpu_to_le16(CEPH_MDS_REQUEST_HEAD_VERSION); head = (struct ceph_mds_request_head_old *)&new_head->oldest_client_tid; p = msg->front.iov_base + sizeof(*new_head); @@ -2690,7 +2786,7 @@ static struct ceph_msg *create_request_message(struct ceph_mds_session *session, head->num_releases = cpu_to_le16(releases); - encode_timestamp_and_gids(&p, req); + encode_mclientrequest_tail(&p, req); if (WARN_ON_ONCE(p > end)) { ceph_msg_put(msg); @@ -2820,7 +2916,7 @@ static int __prepare_send_request(struct ceph_mds_session *session, rhead->num_releases = 0; p = msg->front.iov_base + req->r_request_release_offset; - encode_timestamp_and_gids(&p, req); + encode_mclientrequest_tail(&p, req); msg->front.iov_len = p - msg->front.iov_base; msg->hdr.front_len = cpu_to_le32(msg->front.iov_len); diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index 3c1aa99c1876..a2e85fb5aab1 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h @@ -86,6 +86,10 @@ struct ceph_mds_reply_info_in { s32 dir_pin; struct ceph_timespec btime; struct ceph_timespec snap_btime; + u8 *fscrypt_auth; + u8 *fscrypt_file; + u32 fscrypt_auth_len; + u32 fscrypt_file_len; u64 rsnaps; u64 change_attr; }; @@ -278,6 +282,9 @@ struct ceph_mds_request { struct mutex r_fill_mutex; union ceph_mds_request_args r_args; + + struct ceph_fscrypt_auth *r_fscrypt_auth; + int r_fmode; /* file mode, if expecting cap */ int r_request_release_offset; const struct cred *r_cred; diff --git a/fs/ceph/super.c b/fs/ceph/super.c index d6a1790f6923..c4ab2db85ef0 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -20,6 +20,7 @@ #include "super.h" #include "mds_client.h" #include "cache.h" +#include "crypto.h" #include #include @@ -1135,6 +1136,8 @@ static int ceph_set_super(struct super_block *s, struct fs_context *fc) s->s_time_max = U32_MAX; s->s_flags |= SB_NODIRATIME | SB_NOATIME; + ceph_fscrypt_set_ops(s); + ret = set_anon_super_fc(s, fc); if (ret != 0) fsc->sb = NULL; diff --git a/fs/ceph/super.h b/fs/ceph/super.h index eb621bb276bd..3a39a9b3bc33 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -450,6 +450,13 @@ struct ceph_inode_info { struct work_struct i_work; unsigned long i_work_mask; + +#ifdef CONFIG_FS_ENCRYPTION + u32 fscrypt_auth_len; + u32 fscrypt_file_len; + u8 *fscrypt_auth; + u8 *fscrypt_file; +#endif }; struct ceph_netfs_request_data { @@ -1073,7 +1080,13 @@ static inline int ceph_do_getattr(struct inode *inode, int mask, bool force) } extern int ceph_permission(struct mnt_idmap *idmap, struct inode *inode, int mask); -extern int __ceph_setattr(struct inode *inode, struct iattr *attr); + +struct ceph_iattr { + struct ceph_fscrypt_auth *fscrypt_auth; +}; + +extern int __ceph_setattr(struct inode *inode, struct iattr *attr, + struct ceph_iattr *cia); extern int ceph_setattr(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *attr); extern int ceph_getattr(struct mnt_idmap *idmap, diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h index 49586ff26152..45f8ce61e103 100644 --- a/include/linux/ceph/ceph_fs.h +++ b/include/linux/ceph/ceph_fs.h @@ -359,14 +359,19 @@ enum { extern const char *ceph_mds_op_name(int op); - -#define CEPH_SETATTR_MODE 1 -#define CEPH_SETATTR_UID 2 -#define CEPH_SETATTR_GID 4 -#define CEPH_SETATTR_MTIME 8 -#define CEPH_SETATTR_ATIME 16 -#define CEPH_SETATTR_SIZE 32 -#define CEPH_SETATTR_CTIME 64 +#define CEPH_SETATTR_MODE (1 << 0) +#define CEPH_SETATTR_UID (1 << 1) +#define CEPH_SETATTR_GID (1 << 2) +#define CEPH_SETATTR_MTIME (1 << 3) +#define CEPH_SETATTR_ATIME (1 << 4) +#define CEPH_SETATTR_SIZE (1 << 5) +#define CEPH_SETATTR_CTIME (1 << 6) +#define CEPH_SETATTR_MTIME_NOW (1 << 7) +#define CEPH_SETATTR_ATIME_NOW (1 << 8) +#define CEPH_SETATTR_BTIME (1 << 9) +#define CEPH_SETATTR_KILL_SGUID (1 << 10) +#define CEPH_SETATTR_FSCRYPT_AUTH (1 << 11) +#define CEPH_SETATTR_FSCRYPT_FILE (1 << 12) /* * Ceph setxattr request flags. -- cgit v1.2.3 From 69dd3b3930f96b624228000921f417fb0919a6ab Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 25 Aug 2022 09:31:05 -0400 Subject: libceph: add CEPH_OSD_OP_ASSERT_VER support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ...and record the user_version in the reply in a new field in ceph_osd_request, so we can populate the assert_ver appropriately. Shuffle the fields a bit too so that the new field fits in an existing hole on x86_64. Signed-off-by: Jeff Layton Reviewed-by: Xiubo Li Reviewed-and-tested-by: Luís Henriques Reviewed-by: Milind Changire Signed-off-by: Ilya Dryomov --- include/linux/ceph/osd_client.h | 6 +++++- include/linux/ceph/rados.h | 4 ++++ net/ceph/osd_client.c | 5 +++++ 3 files changed, 14 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index 8f5d2b5bbba2..bf9823956758 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -198,6 +198,9 @@ struct ceph_osd_req_op { u32 src_fadvise_flags; struct ceph_osd_data osd_data; } copy_from; + struct { + u64 ver; + } assert_ver; }; }; @@ -252,6 +255,7 @@ struct ceph_osd_request { struct ceph_osd_client *r_osdc; struct kref r_kref; bool r_mempool; + bool r_linger; /* don't resend on failure */ struct completion r_completion; /* private to osd_client.c */ ceph_osdc_callback_t r_callback; @@ -264,9 +268,9 @@ struct ceph_osd_request { struct ceph_snap_context *r_snapc; /* for writes */ struct timespec64 r_mtime; /* ditto */ u64 r_data_offset; /* ditto */ - bool r_linger; /* don't resend on failure */ /* internal */ + u64 r_version; /* data version sent in reply */ unsigned long r_stamp; /* jiffies, send or check time */ unsigned long r_start_stamp; /* jiffies */ ktime_t r_start_latency; /* ktime_t */ diff --git a/include/linux/ceph/rados.h b/include/linux/ceph/rados.h index 43a7a1573b51..73c3efbec36c 100644 --- a/include/linux/ceph/rados.h +++ b/include/linux/ceph/rados.h @@ -523,6 +523,10 @@ struct ceph_osd_op { struct { __le64 cookie; } __attribute__ ((packed)) notify; + struct { + __le64 unused; + __le64 ver; + } __attribute__ ((packed)) assert_ver; struct { __le64 offset, length; __le64 src_offset; diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 684faf8553de..7f159e40cf9c 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -1048,6 +1048,10 @@ static u32 osd_req_encode_op(struct ceph_osd_op *dst, dst->copy_from.src_fadvise_flags = cpu_to_le32(src->copy_from.src_fadvise_flags); break; + case CEPH_OSD_OP_ASSERT_VER: + dst->assert_ver.unused = cpu_to_le64(0); + dst->assert_ver.ver = cpu_to_le64(src->assert_ver.ver); + break; default: pr_err("unsupported osd opcode %s\n", ceph_osd_op_name(src->op)); @@ -3859,6 +3863,7 @@ static void handle_reply(struct ceph_osd *osd, struct ceph_msg *msg) * one (type of) reply back. */ WARN_ON(!(m.flags & CEPH_OSD_FLAG_ONDISK)); + req->r_version = m.user_version; req->r_result = m.result ?: data_len; finish_request(req); mutex_unlock(&osd->lock); -- cgit v1.2.3 From 781589e40ac5f929f58824c15448e1ba49c3ac32 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Thu, 17 Aug 2023 15:55:31 -0700 Subject: rtc: Add support for limited alarm timer offsets Some alarm timers are based on time offsets, not on absolute times. In some situations, the amount of time that can be scheduled in the future is limited. This may result in a refusal to suspend the system, causing substantial battery drain. Some RTC alarm drivers remedy the situation by setting the alarm time to the maximum supported time if a request for an out-of-range timeout is made. This is not really desirable since it may result in unexpected early wakeups. To reduce the impact of this problem, let RTC drivers report the maximum supported alarm timer offset. The code setting alarm timers can then decide if it wants to reject setting alarm timers to a larger value, if it wants to implement recurring alarms until the actually requested alarm time is met, or if it wants to accept the limited alarm time. Only introduce the necessary variable into struct rtc_device. Code to set and use the variable will follow with subsequent patches. Cc: Brian Norris Signed-off-by: Guenter Roeck Link: https://lore.kernel.org/r/20230817225537.4053865-2-linux@roeck-us.net Signed-off-by: Alexandre Belloni --- include/linux/rtc.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/rtc.h b/include/linux/rtc.h index 1fd9c6a21ebe..4c0bcbeb1f00 100644 --- a/include/linux/rtc.h +++ b/include/linux/rtc.h @@ -146,6 +146,7 @@ struct rtc_device { time64_t range_min; timeu64_t range_max; + timeu64_t alarm_offset_max; time64_t start_secs; time64_t offset_secs; bool set_start_time; -- cgit v1.2.3 From 35d8dbbb25add265a880ab0dc48a229f06b08325 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 29 Aug 2023 20:46:31 +0200 Subject: thermal: core: Drop unused .get_trip_*() callbacks After recent changes in the ACPI thermal driver and in the Intel DTS IOSF thermal driver, all thermal zone drivers are expected to use trip tables for initialization and none of them should implement .get_trip_type(), .get_trip_temp() or .get_trip_hyst() callbacks, so drop these callbacks entirely from the core. Signed-off-by: Rafael J. Wysocki --- drivers/thermal/thermal_core.c | 2 +- drivers/thermal/thermal_trip.c | 24 +++--------------------- include/linux/thermal.h | 4 ---- 3 files changed, 4 insertions(+), 26 deletions(-) (limited to 'include/linux') diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c index a59700593d32..0bdde1ab5d8b 100644 --- a/drivers/thermal/thermal_core.c +++ b/drivers/thermal/thermal_core.c @@ -1266,7 +1266,7 @@ thermal_zone_device_register_with_trips(const char *type, struct thermal_trip *t return ERR_PTR(-EINVAL); } - if (num_trips > 0 && (!ops->get_trip_type || !ops->get_trip_temp) && !trips) + if (num_trips > 0 && !trips) return ERR_PTR(-EINVAL); if (!thermal_class) diff --git a/drivers/thermal/thermal_trip.c b/drivers/thermal/thermal_trip.c index 53115cfdfd42..024e2e365a26 100644 --- a/drivers/thermal/thermal_trip.c +++ b/drivers/thermal/thermal_trip.c @@ -101,29 +101,11 @@ void __thermal_zone_set_trips(struct thermal_zone_device *tz) int __thermal_zone_get_trip(struct thermal_zone_device *tz, int trip_id, struct thermal_trip *trip) { - int ret; - - if (!tz || trip_id < 0 || trip_id >= tz->num_trips || !trip) + if (!tz || !tz->trips || trip_id < 0 || trip_id >= tz->num_trips || !trip) return -EINVAL; - if (tz->trips) { - *trip = tz->trips[trip_id]; - return 0; - } - - if (tz->ops->get_trip_hyst) { - ret = tz->ops->get_trip_hyst(tz, trip_id, &trip->hysteresis); - if (ret) - return ret; - } else { - trip->hysteresis = 0; - } - - ret = tz->ops->get_trip_temp(tz, trip_id, &trip->temperature); - if (ret) - return ret; - - return tz->ops->get_trip_type(tz, trip_id, &trip->type); + *trip = tz->trips[trip_id]; + return 0; } EXPORT_SYMBOL_GPL(__thermal_zone_get_trip); diff --git a/include/linux/thermal.h b/include/linux/thermal.h index b449a46766f5..e6bd3b7c9eda 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -62,11 +62,7 @@ struct thermal_zone_device_ops { int (*set_trips) (struct thermal_zone_device *, int, int); int (*change_mode) (struct thermal_zone_device *, enum thermal_device_mode); - int (*get_trip_type) (struct thermal_zone_device *, int, - enum thermal_trip_type *); - int (*get_trip_temp) (struct thermal_zone_device *, int, int *); int (*set_trip_temp) (struct thermal_zone_device *, int, int); - int (*get_trip_hyst) (struct thermal_zone_device *, int, int *); int (*set_trip_hyst) (struct thermal_zone_device *, int, int); int (*get_crit_temp) (struct thermal_zone_device *, int *); int (*set_emul_temp) (struct thermal_zone_device *, int); -- cgit v1.2.3 From 8289d810ea85755a9d22f75785806cb34eecd5e5 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 22 Aug 2023 13:40:06 +0200 Subject: thermal: core: Rework .get_trend() thermal zone callback Passing a struct thermal_trip pointer instead of a trip index to the .get_trend() thermal zone callback allows one of its 2 implementations, the thermal_get_trend() function in the ACPI thermal driver, to be simplified quite a bit, and the other implementation of it in the ti-soc-thermal driver does not even use the relevant callback argument. For this reason, change the .get_trend() thermal zone callback definition and adjust the related code accordingly. Signed-off-by: Rafael J. Wysocki --- drivers/acpi/thermal.c | 41 ++++++++++------------ drivers/thermal/thermal_core.h | 2 +- drivers/thermal/thermal_helpers.c | 3 +- drivers/thermal/ti-soc-thermal/ti-thermal-common.c | 3 +- include/linux/thermal.h | 30 ++++++++-------- 5 files changed, 38 insertions(+), 41 deletions(-) (limited to 'include/linux') diff --git a/drivers/acpi/thermal.c b/drivers/acpi/thermal.c index 419590f41ed5..f14e68266ccd 100644 --- a/drivers/acpi/thermal.c +++ b/drivers/acpi/thermal.c @@ -492,26 +492,22 @@ static int thermal_get_temp(struct thermal_zone_device *thermal, int *temp) } static int thermal_get_trend(struct thermal_zone_device *thermal, - int trip_index, enum thermal_trend *trend) + struct thermal_trip *trip, + enum thermal_trend *trend) { struct acpi_thermal *tz = thermal_zone_device_priv(thermal); struct acpi_thermal_trip *acpi_trip; - int t, i; + int t; - if (!tz || trip_index < 0) + if (!tz || !trip) return -EINVAL; - if (tz->trips.critical.valid) - trip_index--; - - if (tz->trips.hot.valid) - trip_index--; - - if (trip_index < 0) + acpi_trip = trip->priv; + if (!acpi_trip || !acpi_trip->valid) return -EINVAL; - acpi_trip = &tz->trips.passive.trip; - if (acpi_trip->valid && !trip_index--) { + switch (trip->type) { + case THERMAL_TRIP_PASSIVE: t = tz->trips.passive.tc1 * (tz->temperature - tz->last_temperature) + tz->trips.passive.tc2 * (tz->temperature - @@ -524,19 +520,18 @@ static int thermal_get_trend(struct thermal_zone_device *thermal, *trend = THERMAL_TREND_STABLE; return 0; - } - - t = acpi_thermal_temp(tz, tz->temperature); - for (i = 0; i < ACPI_THERMAL_MAX_ACTIVE; i++) { - acpi_trip = &tz->trips.active[i].trip; - if (acpi_trip->valid && !trip_index--) { - if (t > acpi_thermal_temp(tz, acpi_trip->temperature)) { - *trend = THERMAL_TREND_RAISING; - return 0; - } + case THERMAL_TRIP_ACTIVE: + t = acpi_thermal_temp(tz, tz->temperature); + if (t <= trip->temperature) break; - } + + *trend = THERMAL_TREND_RAISING; + + return 0; + + default: + break; } return -EINVAL; diff --git a/drivers/thermal/thermal_core.h b/drivers/thermal/thermal_core.h index 04513f9fbfa1..de884bea28b6 100644 --- a/drivers/thermal/thermal_core.h +++ b/drivers/thermal/thermal_core.h @@ -70,7 +70,7 @@ static inline bool cdev_is_power_actor(struct thermal_cooling_device *cdev) void thermal_cdev_update(struct thermal_cooling_device *); void __thermal_cdev_update(struct thermal_cooling_device *cdev); -int get_tz_trend(struct thermal_zone_device *tz, int trip); +int get_tz_trend(struct thermal_zone_device *tz, int trip_index); struct thermal_instance * get_thermal_instance(struct thermal_zone_device *tz, diff --git a/drivers/thermal/thermal_helpers.c b/drivers/thermal/thermal_helpers.c index cfba0965a22d..4d66372c9629 100644 --- a/drivers/thermal/thermal_helpers.c +++ b/drivers/thermal/thermal_helpers.c @@ -22,8 +22,9 @@ #include "thermal_core.h" #include "thermal_trace.h" -int get_tz_trend(struct thermal_zone_device *tz, int trip) +int get_tz_trend(struct thermal_zone_device *tz, int trip_index) { + struct thermal_trip *trip = tz->trips ? &tz->trips[trip_index] : NULL; enum thermal_trend trend; if (tz->emul_temperature || !tz->ops->get_trend || diff --git a/drivers/thermal/ti-soc-thermal/ti-thermal-common.c b/drivers/thermal/ti-soc-thermal/ti-thermal-common.c index d414a4b7a94a..6ba2613627e1 100644 --- a/drivers/thermal/ti-soc-thermal/ti-thermal-common.c +++ b/drivers/thermal/ti-soc-thermal/ti-thermal-common.c @@ -109,7 +109,8 @@ static inline int __ti_thermal_get_temp(struct thermal_zone_device *tz, int *tem return ret; } -static int __ti_thermal_get_trend(struct thermal_zone_device *tz, int trip, enum thermal_trend *trend) +static int __ti_thermal_get_trend(struct thermal_zone_device *tz, + struct thermal_trip *trip, enum thermal_trend *trend) { struct ti_thermal_data *data = thermal_zone_device_priv(tz); struct ti_bandgap *bgp; diff --git a/include/linux/thermal.h b/include/linux/thermal.h index e6bd3b7c9eda..eb17495c8acc 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -53,6 +53,20 @@ enum thermal_notify_event { THERMAL_EVENT_KEEP_ALIVE, /* Request for user space handler to respond */ }; +/** + * struct thermal_trip - representation of a point in temperature domain + * @temperature: temperature value in miliCelsius + * @hysteresis: relative hysteresis in miliCelsius + * @type: trip point type + * @priv: pointer to driver data associated with this trip + */ +struct thermal_trip { + int temperature; + int hysteresis; + enum thermal_trip_type type; + void *priv; +}; + struct thermal_zone_device_ops { int (*bind) (struct thermal_zone_device *, struct thermal_cooling_device *); @@ -66,26 +80,12 @@ struct thermal_zone_device_ops { int (*set_trip_hyst) (struct thermal_zone_device *, int, int); int (*get_crit_temp) (struct thermal_zone_device *, int *); int (*set_emul_temp) (struct thermal_zone_device *, int); - int (*get_trend) (struct thermal_zone_device *, int, + int (*get_trend) (struct thermal_zone_device *, struct thermal_trip *, enum thermal_trend *); void (*hot)(struct thermal_zone_device *); void (*critical)(struct thermal_zone_device *); }; -/** - * struct thermal_trip - representation of a point in temperature domain - * @temperature: temperature value in miliCelsius - * @hysteresis: relative hysteresis in miliCelsius - * @type: trip point type - * @priv: pointer to driver data associated with this trip - */ -struct thermal_trip { - int temperature; - int hysteresis; - enum thermal_trip_type type; - void *priv; -}; - struct thermal_cooling_device_ops { int (*get_max_state) (struct thermal_cooling_device *, unsigned long *); int (*get_cur_state) (struct thermal_cooling_device *, unsigned long *); -- cgit v1.2.3 From 218a06a79d9a98a96ef46bb003d4d8adb0962056 Mon Sep 17 00:00:00 2001 From: Jie Zhan Date: Tue, 22 Aug 2023 20:48:37 +0800 Subject: cpufreq: Support per-policy performance boost The boost control currently applies to the whole system. However, users may prefer to boost a subset of cores in order to provide prioritized performance to workloads running on the boosted cores. Enable per-policy boost by adding a 'boost' sysfs interface under each policy path. This can be found at: /sys/devices/system/cpu/cpufreq/policy<*>/boost Same to the global boost switch, writing 1/0 to the per-policy 'boost' enables/disables boost on a cpufreq policy respectively. The user view of global and per-policy boost controls should be: 1. Enabling global boost initially enables boost on all policies, and per-policy boost can then be enabled or disabled individually, given that the platform does support so. 2. Disabling global boost makes the per-policy boost interface illegal. Signed-off-by: Jie Zhan Reviewed-by: Wei Xu Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 43 +++++++++++++++++++++++++++++++++++++++++++ include/linux/cpufreq.h | 3 +++ 2 files changed, 46 insertions(+) (limited to 'include/linux') diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index d9eb036245c3..60ed89000e82 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -86,6 +86,7 @@ static void cpufreq_governor_limits(struct cpufreq_policy *policy); static int cpufreq_set_policy(struct cpufreq_policy *policy, struct cpufreq_governor *new_gov, unsigned int new_pol); +static bool cpufreq_boost_supported(void); /* * Two notifier lists: the "policy" list is involved in the @@ -623,6 +624,40 @@ static ssize_t store_boost(struct kobject *kobj, struct kobj_attribute *attr, } define_one_global_rw(boost); +static ssize_t show_local_boost(struct cpufreq_policy *policy, char *buf) +{ + return sysfs_emit(buf, "%d\n", policy->boost_enabled); +} + +static ssize_t store_local_boost(struct cpufreq_policy *policy, + const char *buf, size_t count) +{ + int ret, enable; + + ret = kstrtoint(buf, 10, &enable); + if (ret || enable < 0 || enable > 1) + return -EINVAL; + + if (!cpufreq_driver->boost_enabled) + return -EINVAL; + + if (policy->boost_enabled == enable) + return count; + + cpus_read_lock(); + ret = cpufreq_driver->set_boost(policy, enable); + cpus_read_unlock(); + + if (ret) + return ret; + + policy->boost_enabled = enable; + + return count; +} + +static struct freq_attr local_boost = __ATTR(boost, 0644, show_local_boost, store_local_boost); + static struct cpufreq_governor *find_governor(const char *str_governor) { struct cpufreq_governor *t; @@ -1057,6 +1092,12 @@ static int cpufreq_add_dev_interface(struct cpufreq_policy *policy) return ret; } + if (cpufreq_boost_supported()) { + ret = sysfs_create_file(&policy->kobj, &local_boost.attr); + if (ret) + return ret; + } + return 0; } @@ -2718,6 +2759,8 @@ int cpufreq_boost_trigger_state(int state) ret = cpufreq_driver->set_boost(policy, state); if (ret) goto err_reset_state; + + policy->boost_enabled = state; } cpus_read_unlock(); diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 43b363a99215..71d186d6933a 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -141,6 +141,9 @@ struct cpufreq_policy { */ bool dvfs_possible_from_any_cpu; + /* Per policy boost enabled flag. */ + bool boost_enabled; + /* Cached frequency lookup from cpufreq_driver_resolve_freq. */ unsigned int cached_target_freq; unsigned int cached_resolved_idx; -- cgit v1.2.3 From 7e9be1124dbe7888907e82cab20164578e3f9ab7 Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Tue, 29 Aug 2023 19:51:57 +0200 Subject: netfilter: nf_tables: Audit log setelem reset Since set element reset is not integrated into nf_tables' transaction logic, an explicit log call is needed, similar to NFT_MSG_GETOBJ_RESET handling. For the sake of simplicity, catchall element reset will always generate a dedicated log entry. This relieves nf_tables_dump_set() from having to adjust the logged element count depending on whether a catchall element was found or not. Fixes: 079cd633219d7 ("netfilter: nf_tables: Introduce NFT_MSG_GETSETELEM_RESET") Signed-off-by: Phil Sutter Reviewed-by: Richard Guy Briggs Signed-off-by: Pablo Neira Ayuso --- include/linux/audit.h | 1 + kernel/auditsc.c | 1 + net/netfilter/nf_tables_api.c | 31 ++++++++++++++++++++++++++++--- 3 files changed, 30 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/audit.h b/include/linux/audit.h index 6a3a9e122bb5..192bf03aacc5 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -117,6 +117,7 @@ enum audit_nfcfgop { AUDIT_NFT_OP_OBJ_RESET, AUDIT_NFT_OP_FLOWTABLE_REGISTER, AUDIT_NFT_OP_FLOWTABLE_UNREGISTER, + AUDIT_NFT_OP_SETELEM_RESET, AUDIT_NFT_OP_INVALID, }; diff --git a/kernel/auditsc.c b/kernel/auditsc.c index addeed3df15d..38481e318197 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -143,6 +143,7 @@ static const struct audit_nfcfgop_tab audit_nfcfgs[] = { { AUDIT_NFT_OP_OBJ_RESET, "nft_reset_obj" }, { AUDIT_NFT_OP_FLOWTABLE_REGISTER, "nft_register_flowtable" }, { AUDIT_NFT_OP_FLOWTABLE_UNREGISTER, "nft_unregister_flowtable" }, + { AUDIT_NFT_OP_SETELEM_RESET, "nft_reset_setelem" }, { AUDIT_NFT_OP_INVALID, "nft_invalid" }, }; diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 41b826dff6f5..361e98e71692 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -102,6 +102,7 @@ static const u8 nft2audit_op[NFT_MSG_MAX] = { // enum nf_tables_msg_types [NFT_MSG_NEWFLOWTABLE] = AUDIT_NFT_OP_FLOWTABLE_REGISTER, [NFT_MSG_GETFLOWTABLE] = AUDIT_NFT_OP_INVALID, [NFT_MSG_DELFLOWTABLE] = AUDIT_NFT_OP_FLOWTABLE_UNREGISTER, + [NFT_MSG_GETSETELEM_RESET] = AUDIT_NFT_OP_SETELEM_RESET, }; static void nft_validate_state_update(struct nft_table *table, u8 new_validate_state) @@ -5624,13 +5625,25 @@ static int nf_tables_dump_setelem(const struct nft_ctx *ctx, return nf_tables_fill_setelem(args->skb, set, elem, args->reset); } +static void audit_log_nft_set_reset(const struct nft_table *table, + unsigned int base_seq, + unsigned int nentries) +{ + char *buf = kasprintf(GFP_ATOMIC, "%s:%u", table->name, base_seq); + + audit_log_nfcfg(buf, table->family, nentries, + AUDIT_NFT_OP_SETELEM_RESET, GFP_ATOMIC); + kfree(buf); +} + struct nft_set_dump_ctx { const struct nft_set *set; struct nft_ctx ctx; }; static int nft_set_catchall_dump(struct net *net, struct sk_buff *skb, - const struct nft_set *set, bool reset) + const struct nft_set *set, bool reset, + unsigned int base_seq) { struct nft_set_elem_catchall *catchall; u8 genmask = nft_genmask_cur(net); @@ -5646,6 +5659,8 @@ static int nft_set_catchall_dump(struct net *net, struct sk_buff *skb, elem.priv = catchall->elem; ret = nf_tables_fill_setelem(skb, set, &elem, reset); + if (reset && !ret) + audit_log_nft_set_reset(set->table, base_seq, 1); break; } @@ -5725,12 +5740,17 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb) set->ops->walk(&dump_ctx->ctx, set, &args.iter); if (!args.iter.err && args.iter.count == cb->args[0]) - args.iter.err = nft_set_catchall_dump(net, skb, set, reset); + args.iter.err = nft_set_catchall_dump(net, skb, set, + reset, cb->seq); rcu_read_unlock(); nla_nest_end(skb, nest); nlmsg_end(skb, nlh); + if (reset && args.iter.count > args.iter.skip) + audit_log_nft_set_reset(table, cb->seq, + args.iter.count - args.iter.skip); + if (args.iter.err && args.iter.err != -EMSGSIZE) return args.iter.err; if (args.iter.count == cb->args[0]) @@ -5955,13 +5975,13 @@ static int nf_tables_getsetelem(struct sk_buff *skb, struct netlink_ext_ack *extack = info->extack; u8 genmask = nft_genmask_cur(info->net); u8 family = info->nfmsg->nfgen_family; + int rem, err = 0, nelems = 0; struct net *net = info->net; struct nft_table *table; struct nft_set *set; struct nlattr *attr; struct nft_ctx ctx; bool reset = false; - int rem, err = 0; table = nft_table_lookup(net, nla[NFTA_SET_ELEM_LIST_TABLE], family, genmask, 0); @@ -6004,8 +6024,13 @@ static int nf_tables_getsetelem(struct sk_buff *skb, NL_SET_BAD_ATTR(extack, attr); break; } + nelems++; } + if (reset) + audit_log_nft_set_reset(table, nft_pernet(net)->base_seq, + nelems); + return err; } -- cgit v1.2.3 From ea078ae9108e25fc881c84369f7c03931d22e555 Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Tue, 29 Aug 2023 19:51:58 +0200 Subject: netfilter: nf_tables: Audit log rule reset Resetting rules' stateful data happens outside of the transaction logic, so 'get' and 'dump' handlers have to emit audit log entries themselves. Fixes: 8daa8fde3fc3f ("netfilter: nf_tables: Introduce NFT_MSG_GETRULE_RESET") Signed-off-by: Phil Sutter Reviewed-by: Richard Guy Briggs Signed-off-by: Pablo Neira Ayuso --- include/linux/audit.h | 1 + kernel/auditsc.c | 1 + net/netfilter/nf_tables_api.c | 18 ++++++++++++++++++ 3 files changed, 20 insertions(+) (limited to 'include/linux') diff --git a/include/linux/audit.h b/include/linux/audit.h index 192bf03aacc5..51b1b7054a23 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -118,6 +118,7 @@ enum audit_nfcfgop { AUDIT_NFT_OP_FLOWTABLE_REGISTER, AUDIT_NFT_OP_FLOWTABLE_UNREGISTER, AUDIT_NFT_OP_SETELEM_RESET, + AUDIT_NFT_OP_RULE_RESET, AUDIT_NFT_OP_INVALID, }; diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 38481e318197..fc0c7c03eeab 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -144,6 +144,7 @@ static const struct audit_nfcfgop_tab audit_nfcfgs[] = { { AUDIT_NFT_OP_FLOWTABLE_REGISTER, "nft_register_flowtable" }, { AUDIT_NFT_OP_FLOWTABLE_UNREGISTER, "nft_unregister_flowtable" }, { AUDIT_NFT_OP_SETELEM_RESET, "nft_reset_setelem" }, + { AUDIT_NFT_OP_RULE_RESET, "nft_reset_rule" }, { AUDIT_NFT_OP_INVALID, "nft_invalid" }, }; diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 361e98e71692..2c81cee858d6 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -3422,6 +3422,18 @@ err: nfnetlink_set_err(ctx->net, ctx->portid, NFNLGRP_NFTABLES, -ENOBUFS); } +static void audit_log_rule_reset(const struct nft_table *table, + unsigned int base_seq, + unsigned int nentries) +{ + char *buf = kasprintf(GFP_ATOMIC, "%s:%u", + table->name, base_seq); + + audit_log_nfcfg(buf, table->family, nentries, + AUDIT_NFT_OP_RULE_RESET, GFP_ATOMIC); + kfree(buf); +} + struct nft_rule_dump_ctx { char *table; char *chain; @@ -3528,6 +3540,9 @@ static int nf_tables_dump_rules(struct sk_buff *skb, done: rcu_read_unlock(); + if (reset && idx > cb->args[0]) + audit_log_rule_reset(table, cb->seq, idx - cb->args[0]); + cb->args[0] = idx; return skb->len; } @@ -3635,6 +3650,9 @@ static int nf_tables_getrule(struct sk_buff *skb, const struct nfnl_info *info, if (err < 0) goto err_fill_rule_info; + if (reset) + audit_log_rule_reset(table, nft_pernet(net)->base_seq, 1); + return nfnetlink_unicast(skb2, net, NETLINK_CB(skb).portid); err_fill_rule_info: -- cgit v1.2.3 From 3af5ae22030cb59fab4fba35f5a2b62f47e14df9 Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Tue, 25 Jul 2023 09:44:40 +0800 Subject: ceph: make members in struct ceph_mds_request_args_ext a union In ceph mainline it will allow to set the btime in the setattr request and just add a 'btime' member in the union 'ceph_mds_request_args' and then bump up the header version to 4. That means the total size of union 'ceph_mds_request_args' will increase sizeof(struct ceph_timespec) bytes, but in kclient it will increase the sizeof(setattr_ext) bytes for each request. Since the MDS will always depend on the header's vesion and front_len members to decode the 'ceph_mds_request_head' struct, at the same time kclient hasn't supported the 'btime' feature yet in setattr request, so it's safe to do this change here. This will save 48 bytes memories for each request. Fixes: 4f1ddb1ea874 ("ceph: implement updated ceph_mds_request_head structure") Signed-off-by: Xiubo Li Reviewed-by: Milind Changire Signed-off-by: Ilya Dryomov --- include/linux/ceph/ceph_fs.h | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h index 45f8ce61e103..ae44812eb6d4 100644 --- a/include/linux/ceph/ceph_fs.h +++ b/include/linux/ceph/ceph_fs.h @@ -467,17 +467,19 @@ union ceph_mds_request_args { } __attribute__ ((packed)); union ceph_mds_request_args_ext { - union ceph_mds_request_args old; - struct { - __le32 mode; - __le32 uid; - __le32 gid; - struct ceph_timespec mtime; - struct ceph_timespec atime; - __le64 size, old_size; /* old_size needed by truncate */ - __le32 mask; /* CEPH_SETATTR_* */ - struct ceph_timespec btime; - } __attribute__ ((packed)) setattr_ext; + union { + union ceph_mds_request_args old; + struct { + __le32 mode; + __le32 uid; + __le32 gid; + struct ceph_timespec mtime; + struct ceph_timespec atime; + __le64 size, old_size; /* old_size needed by truncate */ + __le32 mask; /* CEPH_SETATTR_* */ + struct ceph_timespec btime; + } __attribute__ ((packed)) setattr_ext; + }; }; #define CEPH_MDS_FLAG_REPLAY 1 /* this is a replayed op */ -- cgit v1.2.3 From ce0d5bd3a6c176f9a3bf867624a07119dd4d0878 Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Tue, 25 Jul 2023 17:51:59 +0800 Subject: ceph: make num_fwd and num_retry to __u32 The num_fwd in MClientRequestForward is int32_t, while the num_fwd in ceph_mds_request_head is __u8. This is buggy when the num_fwd is larger than 256 it will always be truncate to 0 again. But the client couldn't recoginize this. This will make them to __u32 instead. Because the old cephs will directly copy the raw memories when decoding the reqeust's head, so we need to make sure this kclient will be compatible with old cephs. For newer cephs they will decode the requests depending the version, which will be much simpler and easier to extend new members. Link: https://tracker.ceph.com/issues/62145 Signed-off-by: Xiubo Li Reviewed-by: Alexander Mikhalitsyn Reviewed-by: Milind Changire Signed-off-by: Ilya Dryomov --- fs/ceph/mds_client.c | 189 +++++++++++++++++++++++-------------------- fs/ceph/mds_client.h | 4 +- include/linux/ceph/ceph_fs.h | 23 +++++- 3 files changed, 126 insertions(+), 90 deletions(-) (limited to 'include/linux') diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 04a881343e43..615db141b6c4 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -2840,6 +2840,18 @@ static void encode_mclientrequest_tail(void **p, } } +static struct ceph_mds_request_head_legacy * +find_legacy_request_head(void *p, u64 features) +{ + bool legacy = !(features & CEPH_FEATURE_FS_BTIME); + struct ceph_mds_request_head_old *ohead; + + if (legacy) + return (struct ceph_mds_request_head_legacy *)p; + ohead = (struct ceph_mds_request_head_old *)p; + return (struct ceph_mds_request_head_legacy *)&ohead->oldest_client_tid; +} + /* * called under mdsc->mutex */ @@ -2850,7 +2862,7 @@ static struct ceph_msg *create_request_message(struct ceph_mds_session *session, int mds = session->s_mds; struct ceph_mds_client *mdsc = session->s_mdsc; struct ceph_msg *msg; - struct ceph_mds_request_head_old *head; + struct ceph_mds_request_head_legacy *lhead; const char *path1 = NULL; const char *path2 = NULL; u64 ino1 = 0, ino2 = 0; @@ -2862,6 +2874,8 @@ static struct ceph_msg *create_request_message(struct ceph_mds_session *session, void *p, *end; int ret; bool legacy = !(session->s_con.peer_features & CEPH_FEATURE_FS_BTIME); + bool old_version = !test_bit(CEPHFS_FEATURE_32BITS_RETRY_FWD, + &session->s_features); ret = set_request_path_attr(req->r_inode, req->r_dentry, req->r_parent, req->r_path1, req->r_ino1.ino, @@ -2893,7 +2907,19 @@ static struct ceph_msg *create_request_message(struct ceph_mds_session *session, goto out_free2; } - len = legacy ? sizeof(*head) : sizeof(struct ceph_mds_request_head); + /* + * For old cephs without supporting the 32bit retry/fwd feature + * it will copy the raw memories directly when decoding the + * requests. While new cephs will decode the head depending the + * version member, so we need to make sure it will be compatible + * with them both. + */ + if (legacy) + len = sizeof(struct ceph_mds_request_head_legacy); + else if (old_version) + len = sizeof(struct ceph_mds_request_head_old); + else + len = sizeof(struct ceph_mds_request_head); /* filepaths */ len += 2 * (1 + sizeof(u32) + sizeof(u64)); @@ -2938,33 +2964,40 @@ static struct ceph_msg *create_request_message(struct ceph_mds_session *session, msg->hdr.tid = cpu_to_le64(req->r_tid); + lhead = find_legacy_request_head(msg->front.iov_base, + session->s_con.peer_features); + /* - * The old ceph_mds_request_head didn't contain a version field, and + * The ceph_mds_request_head_legacy didn't contain a version field, and * one was added when we moved the message version from 3->4. */ if (legacy) { msg->hdr.version = cpu_to_le16(3); - head = msg->front.iov_base; - p = msg->front.iov_base + sizeof(*head); + p = msg->front.iov_base + sizeof(*lhead); + } else if (old_version) { + struct ceph_mds_request_head_old *ohead = msg->front.iov_base; + + msg->hdr.version = cpu_to_le16(4); + ohead->version = cpu_to_le16(1); + p = msg->front.iov_base + sizeof(*ohead); } else { - struct ceph_mds_request_head *new_head = msg->front.iov_base; + struct ceph_mds_request_head *nhead = msg->front.iov_base; msg->hdr.version = cpu_to_le16(6); - new_head->version = cpu_to_le16(CEPH_MDS_REQUEST_HEAD_VERSION); - head = (struct ceph_mds_request_head_old *)&new_head->oldest_client_tid; - p = msg->front.iov_base + sizeof(*new_head); + nhead->version = cpu_to_le16(CEPH_MDS_REQUEST_HEAD_VERSION); + p = msg->front.iov_base + sizeof(*nhead); } end = msg->front.iov_base + msg->front.iov_len; - head->mdsmap_epoch = cpu_to_le32(mdsc->mdsmap->m_epoch); - head->op = cpu_to_le32(req->r_op); - head->caller_uid = cpu_to_le32(from_kuid(&init_user_ns, - req->r_cred->fsuid)); - head->caller_gid = cpu_to_le32(from_kgid(&init_user_ns, - req->r_cred->fsgid)); - head->ino = cpu_to_le64(req->r_deleg_ino); - head->args = req->r_args; + lhead->mdsmap_epoch = cpu_to_le32(mdsc->mdsmap->m_epoch); + lhead->op = cpu_to_le32(req->r_op); + lhead->caller_uid = cpu_to_le32(from_kuid(&init_user_ns, + req->r_cred->fsuid)); + lhead->caller_gid = cpu_to_le32(from_kgid(&init_user_ns, + req->r_cred->fsgid)); + lhead->ino = cpu_to_le64(req->r_deleg_ino); + lhead->args = req->r_args; ceph_encode_filepath(&p, end, ino1, path1); ceph_encode_filepath(&p, end, ino2, path2); @@ -3006,7 +3039,7 @@ static struct ceph_msg *create_request_message(struct ceph_mds_session *session, p = msg->front.iov_base + req->r_request_release_offset; } - head->num_releases = cpu_to_le16(releases); + lhead->num_releases = cpu_to_le16(releases); encode_mclientrequest_tail(&p, req); @@ -3057,18 +3090,6 @@ static void complete_request(struct ceph_mds_client *mdsc, complete_all(&req->r_completion); } -static struct ceph_mds_request_head_old * -find_old_request_head(void *p, u64 features) -{ - bool legacy = !(features & CEPH_FEATURE_FS_BTIME); - struct ceph_mds_request_head *new_head; - - if (legacy) - return (struct ceph_mds_request_head_old *)p; - new_head = (struct ceph_mds_request_head *)p; - return (struct ceph_mds_request_head_old *)&new_head->oldest_client_tid; -} - /* * called under mdsc->mutex */ @@ -3078,29 +3099,28 @@ static int __prepare_send_request(struct ceph_mds_session *session, { int mds = session->s_mds; struct ceph_mds_client *mdsc = session->s_mdsc; - struct ceph_mds_request_head_old *rhead; + struct ceph_mds_request_head_legacy *lhead; + struct ceph_mds_request_head *nhead; struct ceph_msg *msg; - int flags = 0, max_retry; + int flags = 0, old_max_retry; + bool old_version = !test_bit(CEPHFS_FEATURE_32BITS_RETRY_FWD, + &session->s_features); /* - * The type of 'r_attempts' in kernel 'ceph_mds_request' - * is 'int', while in 'ceph_mds_request_head' the type of - * 'num_retry' is '__u8'. So in case the request retries - * exceeding 256 times, the MDS will receive a incorrect - * retry seq. - * - * In this case it's ususally a bug in MDS and continue - * retrying the request makes no sense. - * - * In future this could be fixed in ceph code, so avoid - * using the hardcode here. + * Avoid inifinite retrying after overflow. The client will + * increase the retry count and if the MDS is old version, + * so we limit to retry at most 256 times. */ - max_retry = sizeof_field(struct ceph_mds_request_head, num_retry); - max_retry = 1 << (max_retry * BITS_PER_BYTE); - if (req->r_attempts >= max_retry) { - pr_warn_ratelimited("%s request tid %llu seq overflow\n", - __func__, req->r_tid); - return -EMULTIHOP; + if (req->r_attempts) { + old_max_retry = sizeof_field(struct ceph_mds_request_head_old, + num_retry); + old_max_retry = 1 << (old_max_retry * BITS_PER_BYTE); + if ((old_version && req->r_attempts >= old_max_retry) || + ((uint32_t)req->r_attempts >= U32_MAX)) { + pr_warn_ratelimited("%s request tid %llu seq overflow\n", + __func__, req->r_tid); + return -EMULTIHOP; + } } req->r_attempts++; @@ -3126,20 +3146,24 @@ static int __prepare_send_request(struct ceph_mds_session *session, * d_move mangles the src name. */ msg = req->r_request; - rhead = find_old_request_head(msg->front.iov_base, - session->s_con.peer_features); + lhead = find_legacy_request_head(msg->front.iov_base, + session->s_con.peer_features); - flags = le32_to_cpu(rhead->flags); + flags = le32_to_cpu(lhead->flags); flags |= CEPH_MDS_FLAG_REPLAY; - rhead->flags = cpu_to_le32(flags); + lhead->flags = cpu_to_le32(flags); if (req->r_target_inode) - rhead->ino = cpu_to_le64(ceph_ino(req->r_target_inode)); + lhead->ino = cpu_to_le64(ceph_ino(req->r_target_inode)); - rhead->num_retry = req->r_attempts - 1; + lhead->num_retry = req->r_attempts - 1; + if (!old_version) { + nhead = (struct ceph_mds_request_head*)msg->front.iov_base; + nhead->ext_num_retry = cpu_to_le32(req->r_attempts - 1); + } /* remove cap/dentry releases from message */ - rhead->num_releases = 0; + lhead->num_releases = 0; p = msg->front.iov_base + req->r_request_release_offset; encode_mclientrequest_tail(&p, req); @@ -3160,18 +3184,23 @@ static int __prepare_send_request(struct ceph_mds_session *session, } req->r_request = msg; - rhead = find_old_request_head(msg->front.iov_base, - session->s_con.peer_features); - rhead->oldest_client_tid = cpu_to_le64(__get_oldest_tid(mdsc)); + lhead = find_legacy_request_head(msg->front.iov_base, + session->s_con.peer_features); + lhead->oldest_client_tid = cpu_to_le64(__get_oldest_tid(mdsc)); if (test_bit(CEPH_MDS_R_GOT_UNSAFE, &req->r_req_flags)) flags |= CEPH_MDS_FLAG_REPLAY; if (test_bit(CEPH_MDS_R_ASYNC, &req->r_req_flags)) flags |= CEPH_MDS_FLAG_ASYNC; if (req->r_parent) flags |= CEPH_MDS_FLAG_WANT_DENTRY; - rhead->flags = cpu_to_le32(flags); - rhead->num_fwd = req->r_num_fwd; - rhead->num_retry = req->r_attempts - 1; + lhead->flags = cpu_to_le32(flags); + lhead->num_fwd = req->r_num_fwd; + lhead->num_retry = req->r_attempts - 1; + if (!old_version) { + nhead = (struct ceph_mds_request_head*)msg->front.iov_base; + nhead->ext_num_fwd = cpu_to_le32(req->r_num_fwd); + nhead->ext_num_retry = cpu_to_le32(req->r_attempts - 1); + } dout(" r_parent = %p\n", req->r_parent); return 0; @@ -3830,33 +3859,21 @@ static void handle_forward(struct ceph_mds_client *mdsc, if (test_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags)) { dout("forward tid %llu aborted, unregistering\n", tid); __unregister_request(mdsc, req); - } else if (fwd_seq <= req->r_num_fwd) { + } else if (fwd_seq <= req->r_num_fwd || (uint32_t)fwd_seq >= U32_MAX) { /* - * The type of 'num_fwd' in ceph 'MClientRequestForward' - * is 'int32_t', while in 'ceph_mds_request_head' the - * type is '__u8'. So in case the request bounces between - * MDSes exceeding 256 times, the client will get stuck. - * - * In this case it's ususally a bug in MDS and continue - * bouncing the request makes no sense. + * Avoid inifinite retrying after overflow. * - * In future this could be fixed in ceph code, so avoid - * using the hardcode here. + * The MDS will increase the fwd count and in client side + * if the num_fwd is less than the one saved in request + * that means the MDS is an old version and overflowed of + * 8 bits. */ - int max = sizeof_field(struct ceph_mds_request_head, num_fwd); - max = 1 << (max * BITS_PER_BYTE); - if (req->r_num_fwd >= max) { - mutex_lock(&req->r_fill_mutex); - req->r_err = -EMULTIHOP; - set_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags); - mutex_unlock(&req->r_fill_mutex); - aborted = true; - pr_warn_ratelimited("forward tid %llu seq overflow\n", - tid); - } else { - dout("forward tid %llu to mds%d - old seq %d <= %d\n", - tid, next_mds, req->r_num_fwd, fwd_seq); - } + mutex_lock(&req->r_fill_mutex); + req->r_err = -EMULTIHOP; + set_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags); + mutex_unlock(&req->r_fill_mutex); + aborted = true; + pr_warn_ratelimited("forward tid %llu seq overflow\n", tid); } else { /* resend. forward race not possible; mds would drop */ dout("forward tid %llu to mds%d (we resend)\n", tid, next_mds); diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index 1fa0f78b7b79..5a3714bdd64a 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h @@ -32,8 +32,9 @@ enum ceph_feature_type { CEPHFS_FEATURE_ALTERNATE_NAME, CEPHFS_FEATURE_NOTIFY_SESSION_STATE, CEPHFS_FEATURE_OP_GETVXATTR, + CEPHFS_FEATURE_32BITS_RETRY_FWD, - CEPHFS_FEATURE_MAX = CEPHFS_FEATURE_OP_GETVXATTR, + CEPHFS_FEATURE_MAX = CEPHFS_FEATURE_32BITS_RETRY_FWD, }; #define CEPHFS_FEATURES_CLIENT_SUPPORTED { \ @@ -47,6 +48,7 @@ enum ceph_feature_type { CEPHFS_FEATURE_ALTERNATE_NAME, \ CEPHFS_FEATURE_NOTIFY_SESSION_STATE, \ CEPHFS_FEATURE_OP_GETVXATTR, \ + CEPHFS_FEATURE_32BITS_RETRY_FWD, \ } /* diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h index ae44812eb6d4..5f2301ee88bc 100644 --- a/include/linux/ceph/ceph_fs.h +++ b/include/linux/ceph/ceph_fs.h @@ -486,7 +486,7 @@ union ceph_mds_request_args_ext { #define CEPH_MDS_FLAG_WANT_DENTRY 2 /* want dentry in reply */ #define CEPH_MDS_FLAG_ASYNC 4 /* request is asynchronous */ -struct ceph_mds_request_head_old { +struct ceph_mds_request_head_legacy { __le64 oldest_client_tid; __le32 mdsmap_epoch; /* on client */ __le32 flags; /* CEPH_MDS_FLAG_* */ @@ -499,9 +499,9 @@ struct ceph_mds_request_head_old { union ceph_mds_request_args args; } __attribute__ ((packed)); -#define CEPH_MDS_REQUEST_HEAD_VERSION 1 +#define CEPH_MDS_REQUEST_HEAD_VERSION 2 -struct ceph_mds_request_head { +struct ceph_mds_request_head_old { __le16 version; /* struct version */ __le64 oldest_client_tid; __le32 mdsmap_epoch; /* on client */ @@ -515,6 +515,23 @@ struct ceph_mds_request_head { union ceph_mds_request_args_ext args; } __attribute__ ((packed)); +struct ceph_mds_request_head { + __le16 version; /* struct version */ + __le64 oldest_client_tid; + __le32 mdsmap_epoch; /* on client */ + __le32 flags; /* CEPH_MDS_FLAG_* */ + __u8 num_retry, num_fwd; /* legacy count retry and fwd attempts */ + __le16 num_releases; /* # include cap/lease release records */ + __le32 op; /* mds op code */ + __le32 caller_uid, caller_gid; + __le64 ino; /* use this ino for openc, mkdir, mknod, + etc. (if replaying) */ + union ceph_mds_request_args_ext args; + + __le32 ext_num_retry; /* new count retry attempts */ + __le32 ext_num_fwd; /* new count fwd attempts */ +} __attribute__ ((packed)); + /* cap/lease release record */ struct ceph_mds_request_release { __le64 ino, cap_id; /* ino and unique cap id */ -- cgit v1.2.3 From 52e322eda3d475614210efbc0f2793a1da9d367a Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 28 Jul 2023 17:47:22 -0700 Subject: KVM: x86/mmu: BUG() in rmap helpers iff CONFIG_BUG_ON_DATA_CORRUPTION=y Introduce KVM_BUG_ON_DATA_CORRUPTION() and use it in the low-level rmap helpers to convert the existing BUG()s to WARN_ON_ONCE() when the kernel is built with CONFIG_BUG_ON_DATA_CORRUPTION=n, i.e. does NOT want to BUG() on corruption of host kernel data structures. Environments that don't have infrastructure to automatically capture crash dumps, i.e. aren't likely to enable CONFIG_BUG_ON_DATA_CORRUPTION=y, are typically better served overall by WARN-and-continue behavior (for the kernel, the VM is dead regardless), as a BUG() while holding mmu_lock all but guarantees the _best_ case scenario is a panic(). Make the BUG()s conditional instead of removing/replacing them entirely as there's a non-zero chance (though by no means a guarantee) that the damage isn't contained to the target VM, e.g. if no rmap is found for a SPTE then KVM may be double-zapping the SPTE, i.e. has already freed the memory the SPTE pointed at and thus KVM is reading/writing memory that KVM no longer owns. Link: https://lore.kernel.org/all/20221129191237.31447-1-mizhang@google.com Suggested-by: Mingwei Zhang Cc: David Matlack Cc: Jim Mattson Reviewed-by: Mingwei Zhang Link: https://lore.kernel.org/r/20230729004722.1056172-13-seanjc@google.com Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/mmu.c | 21 ++++++++++----------- include/linux/kvm_host.h | 19 +++++++++++++++++++ 2 files changed, 29 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index f518dd569a14..0420944da242 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -973,7 +973,7 @@ static void pte_list_desc_remove_entry(struct kvm *kvm, * when adding an entry and the previous head is full, and heads are * removed (this flow) when they become empty. */ - BUG_ON(j < 0); + KVM_BUG_ON_DATA_CORRUPTION(j < 0, kvm); /* * Replace the to-be-freed SPTE with the last valid entry from the head @@ -1004,14 +1004,13 @@ static void pte_list_remove(struct kvm *kvm, u64 *spte, struct pte_list_desc *desc; int i; - if (!rmap_head->val) { - pr_err("%s: %p 0->BUG\n", __func__, spte); - BUG(); - } else if (!(rmap_head->val & 1)) { - if ((u64 *)rmap_head->val != spte) { - pr_err("%s: %p 1->BUG\n", __func__, spte); - BUG(); - } + if (KVM_BUG_ON_DATA_CORRUPTION(!rmap_head->val, kvm)) + return; + + if (!(rmap_head->val & 1)) { + if (KVM_BUG_ON_DATA_CORRUPTION((u64 *)rmap_head->val != spte, kvm)) + return; + rmap_head->val = 0; } else { desc = (struct pte_list_desc *)(rmap_head->val & ~1ul); @@ -1025,8 +1024,8 @@ static void pte_list_remove(struct kvm *kvm, u64 *spte, } desc = desc->more; } - pr_err("%s: %p many->many\n", __func__, spte); - BUG(); + + KVM_BUG_ON_DATA_CORRUPTION(true, kvm); } } diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 1b583f35547e..fb6c6109fdca 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -867,6 +867,25 @@ static inline void kvm_vm_bugged(struct kvm *kvm) unlikely(__ret); \ }) +/* + * Note, "data corruption" refers to corruption of host kernel data structures, + * not guest data. Guest data corruption, suspected or confirmed, that is tied + * and contained to a single VM should *never* BUG() and potentially panic the + * host, i.e. use this variant of KVM_BUG() if and only if a KVM data structure + * is corrupted and that corruption can have a cascading effect to other parts + * of the hosts and/or to other VMs. + */ +#define KVM_BUG_ON_DATA_CORRUPTION(cond, kvm) \ +({ \ + bool __ret = !!(cond); \ + \ + if (IS_ENABLED(CONFIG_BUG_ON_DATA_CORRUPTION)) \ + BUG_ON(__ret); \ + else if (WARN_ON_ONCE(__ret && !(kvm)->vm_bugged)) \ + kvm_vm_bugged(kvm); \ + unlikely(__ret); \ +}) + static inline void kvm_vcpu_srcu_read_lock(struct kvm_vcpu *vcpu) { #ifdef CONFIG_PROVE_RCU -- cgit v1.2.3 From 6a86b5b5cd76d2734304a0173f5f01aa8aa2025e Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 29 Aug 2023 22:53:52 +0200 Subject: bpf: Annotate bpf_long_memcpy with data_race syzbot reported a data race splat between two processes trying to update the same BPF map value via syscall on different CPUs: BUG: KCSAN: data-race in bpf_percpu_array_update / bpf_percpu_array_update write to 0xffffe8fffe7425d8 of 8 bytes by task 8257 on cpu 1: bpf_long_memcpy include/linux/bpf.h:428 [inline] bpf_obj_memcpy include/linux/bpf.h:441 [inline] copy_map_value_long include/linux/bpf.h:464 [inline] bpf_percpu_array_update+0x3bb/0x500 kernel/bpf/arraymap.c:380 bpf_map_update_value+0x190/0x370 kernel/bpf/syscall.c:175 generic_map_update_batch+0x3ae/0x4f0 kernel/bpf/syscall.c:1749 bpf_map_do_batch+0x2df/0x3d0 kernel/bpf/syscall.c:4648 __sys_bpf+0x28a/0x780 __do_sys_bpf kernel/bpf/syscall.c:5241 [inline] __se_sys_bpf kernel/bpf/syscall.c:5239 [inline] __x64_sys_bpf+0x43/0x50 kernel/bpf/syscall.c:5239 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd write to 0xffffe8fffe7425d8 of 8 bytes by task 8268 on cpu 0: bpf_long_memcpy include/linux/bpf.h:428 [inline] bpf_obj_memcpy include/linux/bpf.h:441 [inline] copy_map_value_long include/linux/bpf.h:464 [inline] bpf_percpu_array_update+0x3bb/0x500 kernel/bpf/arraymap.c:380 bpf_map_update_value+0x190/0x370 kernel/bpf/syscall.c:175 generic_map_update_batch+0x3ae/0x4f0 kernel/bpf/syscall.c:1749 bpf_map_do_batch+0x2df/0x3d0 kernel/bpf/syscall.c:4648 __sys_bpf+0x28a/0x780 __do_sys_bpf kernel/bpf/syscall.c:5241 [inline] __se_sys_bpf kernel/bpf/syscall.c:5239 [inline] __x64_sys_bpf+0x43/0x50 kernel/bpf/syscall.c:5239 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd value changed: 0x0000000000000000 -> 0xfffffff000002788 The bpf_long_memcpy is used with 8-byte aligned pointers, power-of-8 size and forced to use long read/writes to try to atomically copy long counters. It is best-effort only and no barriers are here since it _will_ race with concurrent updates from BPF programs. The bpf_long_memcpy() is called from bpf(2) syscall. Marco suggested that the best way to make this known to KCSAN would be to use data_race() annotation. Reported-by: syzbot+97522333291430dd277f@syzkaller.appspotmail.com Suggested-by: Marco Elver Signed-off-by: Daniel Borkmann Acked-by: Marco Elver Link: https://lore.kernel.org/bpf/000000000000d87a7f06040c970c@google.com Link: https://lore.kernel.org/bpf/57628f7a15e20d502247c3b55fceb1cb2b31f266.1693342186.git.daniel@iogearbox.net --- include/linux/bpf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 12596af59c00..024e8b28c34b 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -438,7 +438,7 @@ static inline void bpf_long_memcpy(void *dst, const void *src, u32 size) size /= sizeof(long); while (size--) - *ldst++ = *lsrc++; + data_race(*ldst++ = *lsrc++); } /* copy everything but bpf_spin_lock, bpf_timer, and kptrs. There could be one of each. */ -- cgit v1.2.3 From 8423be8926aa82cd2e28bba5cc96ccb72c7ce6be Mon Sep 17 00:00:00 2001 From: Sriram Yagnaraman Date: Thu, 31 Aug 2023 10:03:31 +0200 Subject: ipv6: ignore dst hint for multipath routes Route hints when the nexthop is part of a multipath group causes packets in the same receive batch to be sent to the same nexthop irrespective of the multipath hash of the packet. So, do not extract route hint for packets whose destination is part of a multipath group. A new SKB flag IP6SKB_MULTIPATH is introduced for this purpose, set the flag when route is looked up in fib6_select_path() and use it in ip6_can_use_hint() to check for the existence of the flag. Fixes: 197dbf24e360 ("ipv6: introduce and uses route look hints for list input.") Signed-off-by: Sriram Yagnaraman Reviewed-by: Ido Schimmel Reviewed-by: David Ahern Signed-off-by: David S. Miller --- include/linux/ipv6.h | 1 + net/ipv6/ip6_input.c | 3 ++- net/ipv6/route.c | 3 +++ 3 files changed, 6 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 5883551b1ee8..af8a771a053c 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -147,6 +147,7 @@ struct inet6_skb_parm { #define IP6SKB_JUMBOGRAM 128 #define IP6SKB_SEG6 256 #define IP6SKB_FAKEJUMBO 512 +#define IP6SKB_MULTIPATH 1024 }; #if defined(CONFIG_NET_L3_MASTER_DEV) diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index d94041bb4287..b8378814532c 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -99,7 +99,8 @@ static bool ip6_can_use_hint(const struct sk_buff *skb, static struct sk_buff *ip6_extract_route_hint(const struct net *net, struct sk_buff *skb) { - if (fib6_routes_require_src(net) || fib6_has_custom_rules(net)) + if (fib6_routes_require_src(net) || fib6_has_custom_rules(net) || + IP6CB(skb)->flags & IP6SKB_MULTIPATH) return NULL; return skb; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 846aec8e0093..01d6d352850a 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -423,6 +423,9 @@ void fib6_select_path(const struct net *net, struct fib6_result *res, if (match->nh && have_oif_match && res->nh) return; + if (skb) + IP6CB(skb)->flags |= IP6SKB_MULTIPATH; + /* We might have already computed the hash for ICMPv6 errors. In such * case it will always be non-zero. Otherwise now is the time to do it. */ -- cgit v1.2.3 From 6ad11bc6ed37c6371e9e13619862709b6529b43a Mon Sep 17 00:00:00 2001 From: Baruch Siach Date: Thu, 24 Aug 2023 14:38:08 +0300 Subject: rmap: remove anon_vma_link() nommu stub anon_vma_link() is unused since commit 5beb49305251 ("mm: change anon_vma linking to fix multi-process server scalability issue"). Link: https://lkml.kernel.org/r/cdce9b00c9ab15f6d02eddf40dcad537d3e9676f.1692877089.git.baruch@tkos.co.il Signed-off-by: Baruch Siach Reviewed-by: David Hildenbrand Signed-off-by: Andrew Morton --- include/linux/rmap.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/rmap.h b/include/linux/rmap.h index a3825ce81102..51cc21ebb568 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -479,7 +479,6 @@ struct anon_vma *folio_lock_anon_vma_read(struct folio *folio, #define anon_vma_init() do {} while (0) #define anon_vma_prepare(vma) (0) -#define anon_vma_link(vma) do {} while (0) static inline int folio_referenced(struct folio *folio, int is_locked, struct mem_cgroup *memcg, -- cgit v1.2.3 From 719c5e37e99d2fd588d1c994284d17650a66354c Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Fri, 1 Sep 2023 06:53:23 +0200 Subject: net: phy: micrel: Correct bit assignments for phy_device flags Previously, the defines for phy_device flags in the Micrel driver were ambiguous in their representation. They were intended to be bit masks but were mistakenly defined as bit positions. This led to the following issues: - MICREL_KSZ8_P1_ERRATA, designated for KSZ88xx switches, overlapped with MICREL_PHY_FXEN and MICREL_PHY_50MHZ_CLK. - Due to this overlap, the code path for MICREL_PHY_FXEN, tailored for the KSZ8041 PHY, was not executed for KSZ88xx PHYs. - Similarly, the code associated with MICREL_PHY_50MHZ_CLK wasn't triggered for KSZ88xx. To rectify this, all three flags have now been explicitly converted to use the `BIT()` macro, ensuring they are defined as bit masks and preventing potential overlaps in the future. Fixes: 49011e0c1555 ("net: phy: micrel: ksz886x/ksz8081: add cabletest support") Signed-off-by: Oleksij Rempel Reviewed-by: Russell King (Oracle) Signed-off-by: David S. Miller --- include/linux/micrel_phy.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/micrel_phy.h b/include/linux/micrel_phy.h index 8bef1ab62bba..322d87255984 100644 --- a/include/linux/micrel_phy.h +++ b/include/linux/micrel_phy.h @@ -41,9 +41,9 @@ #define PHY_ID_KSZ9477 0x00221631 /* struct phy_device dev_flags definitions */ -#define MICREL_PHY_50MHZ_CLK 0x00000001 -#define MICREL_PHY_FXEN 0x00000002 -#define MICREL_KSZ8_P1_ERRATA 0x00000003 +#define MICREL_PHY_50MHZ_CLK BIT(0) +#define MICREL_PHY_FXEN BIT(1) +#define MICREL_KSZ8_P1_ERRATA BIT(2) #define MICREL_KSZ9021_EXTREG_CTRL 0xB #define MICREL_KSZ9021_EXTREG_DATA_WRITE 0xC -- cgit v1.2.3 From 0be7592885d7b4c20595c388adc13930b653b847 Mon Sep 17 00:00:00 2001 From: Nilesh Javali Date: Thu, 31 Aug 2023 16:51:45 +0530 Subject: scsi: qla2xxx: Correct endianness for rqstlen and rsplen rqstlen and rsplen were changed to __le32 to fix sparse warnings: drivers/scsi/qla2xxx/qla_nvme.c:402:30: warning: incorrect type in assignment (different base types) drivers/scsi/qla2xxx/qla_nvme.c:402:30: expected restricted __le32 [usertype] cmd_len drivers/scsi/qla2xxx/qla_nvme.c:402:30: got unsigned short [usertype] rsplen drivers/scsi/qla2xxx/qla_nvme.c:507:30: warning: incorrect type in assignment (different base types) drivers/scsi/qla2xxx/qla_nvme.c:507:30: expected restricted __le32 [usertype] cmd_len drivers/scsi/qla2xxx/qla_nvme.c:507:30: got unsigned int [usertype] rqstlen drivers/scsi/qla2xxx/qla_nvme.c:508:30: warning: incorrect type in assignment (different base types) drivers/scsi/qla2xxx/qla_nvme.c:508:30: expected restricted __le32 [usertype] rsp_len drivers/scsi/qla2xxx/qla_nvme.c:508:30: got unsigned int [usertype] rsplen Correct the endianness in qla2xxx driver thus avoiding changes in nvme-fc-driver.h. Fixes: 875386b98857 ("scsi: qla2xxx: Add Unsolicited LS Request and Response Support for NVMe") Signed-off-by: Nilesh Javali Link: https://lore.kernel.org/r/20230831112146.32595-1-njavali@marvell.com Signed-off-by: Martin K. Petersen --- drivers/scsi/qla2xxx/qla_nvme.c | 10 +++++----- include/linux/nvme-fc-driver.h | 6 +++--- 2 files changed, 8 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/drivers/scsi/qla2xxx/qla_nvme.c b/drivers/scsi/qla2xxx/qla_nvme.c index db753d712991..a8ddf356e662 100644 --- a/drivers/scsi/qla2xxx/qla_nvme.c +++ b/drivers/scsi/qla2xxx/qla_nvme.c @@ -399,14 +399,14 @@ static int qla_nvme_xmt_ls_rsp(struct nvme_fc_local_port *lport, nvme->u.nvme.dl = 0; nvme->u.nvme.timeout_sec = 0; nvme->u.nvme.cmd_dma = fd_resp->rspdma; - nvme->u.nvme.cmd_len = fd_resp->rsplen; + nvme->u.nvme.cmd_len = cpu_to_le32(fd_resp->rsplen); nvme->u.nvme.rsp_len = 0; nvme->u.nvme.rsp_dma = 0; nvme->u.nvme.exchange_address = uctx->exchange_address; nvme->u.nvme.nport_handle = uctx->nport_handle; nvme->u.nvme.ox_id = uctx->ox_id; dma_sync_single_for_device(&ha->pdev->dev, nvme->u.nvme.cmd_dma, - le32_to_cpu(fd_resp->rsplen), DMA_TO_DEVICE); + fd_resp->rsplen, DMA_TO_DEVICE); ql_dbg(ql_dbg_unsol, vha, 0x2122, "Unsol lsreq portid=%06x %8phC exchange_address 0x%x ox_id 0x%x hdl 0x%x\n", @@ -504,13 +504,13 @@ static int qla_nvme_ls_req(struct nvme_fc_local_port *lport, nvme->u.nvme.desc = fd; nvme->u.nvme.dir = 0; nvme->u.nvme.dl = 0; - nvme->u.nvme.cmd_len = fd->rqstlen; - nvme->u.nvme.rsp_len = fd->rsplen; + nvme->u.nvme.cmd_len = cpu_to_le32(fd->rqstlen); + nvme->u.nvme.rsp_len = cpu_to_le32(fd->rsplen); nvme->u.nvme.rsp_dma = fd->rspdma; nvme->u.nvme.timeout_sec = fd->timeout; nvme->u.nvme.cmd_dma = fd->rqstdma; dma_sync_single_for_device(&ha->pdev->dev, nvme->u.nvme.cmd_dma, - le32_to_cpu(fd->rqstlen), DMA_TO_DEVICE); + fd->rqstlen, DMA_TO_DEVICE); rval = qla2x00_start_sp(sp); if (rval != QLA_SUCCESS) { diff --git a/include/linux/nvme-fc-driver.h b/include/linux/nvme-fc-driver.h index f6ef8cf5d774..4109f1bd6128 100644 --- a/include/linux/nvme-fc-driver.h +++ b/include/linux/nvme-fc-driver.h @@ -53,10 +53,10 @@ struct nvmefc_ls_req { void *rqstaddr; dma_addr_t rqstdma; - __le32 rqstlen; + u32 rqstlen; void *rspaddr; dma_addr_t rspdma; - __le32 rsplen; + u32 rsplen; u32 timeout; void *private; @@ -120,7 +120,7 @@ struct nvmefc_ls_req { struct nvmefc_ls_rsp { void *rspbuf; dma_addr_t rspdma; - __le32 rsplen; + u16 rsplen; void (*done)(struct nvmefc_ls_rsp *rsp); void *nvme_fc_private; /* LLDD is not to access !! */ -- cgit v1.2.3 From cf60ce92358da29f3b1e24005f7b748584b82753 Mon Sep 17 00:00:00 2001 From: Pavel Pisa Date: Mon, 4 Sep 2023 12:00:02 +0200 Subject: of: overlay: Fix of_overlay_fdt_apply prototype when !CONFIG_OF_OVERLAY The of_overlay_fdt_apply has been changed but when CONFIG_OF_OVERLAY support is not configured then old stub prototype is declared by of.h header. Signed-off-by: Pavel Pisa Fixes: 47284862bfc7 ("of: overlay: Extend of_overlay_fdt_apply() to specify the target node") Acked-by: Marc Kleine-Budde Link: https://lore.kernel.org/r/20230904100002.7913-1-pisa@cmp.felk.cvut.cz Signed-off-by: Rob Herring --- include/linux/of.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/of.h b/include/linux/of.h index ed679819c279..6a9ddf20e79a 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -1676,8 +1676,8 @@ int of_overlay_notifier_unregister(struct notifier_block *nb); #else -static inline int of_overlay_fdt_apply(void *overlay_fdt, u32 overlay_fdt_size, - int *ovcs_id) +static inline int of_overlay_fdt_apply(const void *overlay_fdt, u32 overlay_fdt_size, + int *ovcs_id, struct device_node *target_base) { return -ENOTSUPP; } -- cgit v1.2.3 From 9ffa7b92bc768609243d79fb0c0125b9704c7061 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 30 Aug 2023 18:11:38 +0200 Subject: thermal: core: Clean up headers of thermal zone registration functions For consistency, add a missing thermal_zone_device_register_with_trips() stub for the CONFIG_THERMAL unset case, specify argument names in all of the thermal zone registration and unregistration function headers and make all of them use white space consistently. No intentional functional impact. Signed-off-by: Rafael J. Wysocki --- include/linux/thermal.h | 53 ++++++++++++++++++++++++++++++++++--------------- 1 file changed, 37 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/include/linux/thermal.h b/include/linux/thermal.h index eb17495c8acc..a30643104f3f 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -300,16 +300,24 @@ int thermal_acpi_critical_trip_temp(struct acpi_device *adev, int *ret_temp); #endif #ifdef CONFIG_THERMAL -struct thermal_zone_device *thermal_zone_device_register(const char *, int, int, - void *, struct thermal_zone_device_ops *, - const struct thermal_zone_params *, int, int); - -void thermal_zone_device_unregister(struct thermal_zone_device *); - -struct thermal_zone_device * -thermal_zone_device_register_with_trips(const char *, struct thermal_trip *, int, int, - void *, struct thermal_zone_device_ops *, - const struct thermal_zone_params *, int, int); +struct thermal_zone_device *thermal_zone_device_register( + const char *type, + int num_trips, int mask, + void *devdata, + struct thermal_zone_device_ops *ops, + const struct thermal_zone_params *tzp, + int passive_delay, int polling_delay); + +struct thermal_zone_device *thermal_zone_device_register_with_trips( + const char *type, + struct thermal_trip *trips, + int num_trips, int mask, + void *devdata, + struct thermal_zone_device_ops *ops, + const struct thermal_zone_params *tzp, + int passive_delay, int polling_delay); + +void thermal_zone_device_unregister(struct thermal_zone_device *tz); void *thermal_zone_device_priv(struct thermal_zone_device *tzd); const char *thermal_zone_device_type(struct thermal_zone_device *tzd); @@ -351,14 +359,27 @@ int thermal_zone_device_disable(struct thermal_zone_device *tz); void thermal_zone_device_critical(struct thermal_zone_device *tz); #else static inline struct thermal_zone_device *thermal_zone_device_register( - const char *type, int trips, int mask, void *devdata, - struct thermal_zone_device_ops *ops, - const struct thermal_zone_params *tzp, - int passive_delay, int polling_delay) + const char *type, + int num_trips, int mask, + void *devdata, + struct thermal_zone_device_ops *ops, + const struct thermal_zone_params *tzp, + int passive_delay, int polling_delay) +{ return ERR_PTR(-ENODEV); } + +static inline struct thermal_zone_device *thermal_zone_device_register_with_trips( + const char *type, + struct thermal_trip *trips, + int num_trips, int mask, + void *devdata, + struct thermal_zone_device_ops *ops, + const struct thermal_zone_params *tzp, + int passive_delay, int polling_delay) { return ERR_PTR(-ENODEV); } -static inline void thermal_zone_device_unregister( - struct thermal_zone_device *tz) + +static inline void thermal_zone_device_unregister(struct thermal_zone_device *tz) { } + static inline struct thermal_cooling_device * thermal_cooling_device_register(const char *type, void *devdata, const struct thermal_cooling_device_ops *ops) -- cgit v1.2.3 From d332db8fc1a2dfb4738281b1d6d4ed20115dd9d3 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 30 Aug 2023 18:13:35 +0200 Subject: thermal: core: Add function for registering tripless thermal zones Multiple callers of thermal_zone_device_register() don't pass any trips to it and they might use a shortened argument list for that, so add a special function with fewer arguments for this purpose. Signed-off-by: Rafael J. Wysocki --- drivers/thermal/thermal_core.c | 11 +++++++++++ include/linux/thermal.h | 13 +++++++++++++ 2 files changed, 24 insertions(+) (limited to 'include/linux') diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c index 0bdde1ab5d8b..33ebec043800 100644 --- a/drivers/thermal/thermal_core.c +++ b/drivers/thermal/thermal_core.c @@ -1400,6 +1400,17 @@ struct thermal_zone_device *thermal_zone_device_register(const char *type, int n } EXPORT_SYMBOL_GPL(thermal_zone_device_register); +struct thermal_zone_device *thermal_tripless_zone_device_register( + const char *type, + void *devdata, + struct thermal_zone_device_ops *ops, + const struct thermal_zone_params *tzp) +{ + return thermal_zone_device_register_with_trips(type, NULL, 0, 0, devdata, + ops, tzp, 0, 0); +} +EXPORT_SYMBOL_GPL(thermal_tripless_zone_device_register); + void *thermal_zone_device_priv(struct thermal_zone_device *tzd) { return tzd->devdata; diff --git a/include/linux/thermal.h b/include/linux/thermal.h index a30643104f3f..1a1433eb9b59 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -317,6 +317,12 @@ struct thermal_zone_device *thermal_zone_device_register_with_trips( const struct thermal_zone_params *tzp, int passive_delay, int polling_delay); +struct thermal_zone_device *thermal_tripless_zone_device_register( + const char *type, + void *devdata, + struct thermal_zone_device_ops *ops, + const struct thermal_zone_params *tzp); + void thermal_zone_device_unregister(struct thermal_zone_device *tz); void *thermal_zone_device_priv(struct thermal_zone_device *tzd); @@ -377,6 +383,13 @@ static inline struct thermal_zone_device *thermal_zone_device_register_with_trip int passive_delay, int polling_delay) { return ERR_PTR(-ENODEV); } +static inline struct thermal_zone_device *thermal_tripless_zone_device_register( + const char *type, + void *devdata, + struct thermal_zone_device_ops *ops, + const struct thermal_zone_params *tzp) +{ return ERR_PTR(-ENODEV); } + static inline void thermal_zone_device_unregister(struct thermal_zone_device *tz) { } -- cgit v1.2.3 From edd220b33f479cf9dcda0bfefb2cb8c5902e9885 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 30 Aug 2023 18:16:29 +0200 Subject: thermal: core: Drop thermal_zone_device_register() There are no more users of thermal_zone_device_register(), so drop it from the core. Note that thermal_zone_device_register_with_trips() may be renamed to thermal_zone_device_register() in the future, but only after a grace period allowing all of the possible work in progress that may be using the latter to adjust. Signed-off-by: Rafael J. Wysocki --- drivers/thermal/thermal_core.c | 11 ----------- include/linux/thermal.h | 17 ----------------- 2 files changed, 28 deletions(-) (limited to 'include/linux') diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c index 33ebec043800..8717a3343512 100644 --- a/drivers/thermal/thermal_core.c +++ b/drivers/thermal/thermal_core.c @@ -1389,17 +1389,6 @@ free_tz: } EXPORT_SYMBOL_GPL(thermal_zone_device_register_with_trips); -struct thermal_zone_device *thermal_zone_device_register(const char *type, int ntrips, int mask, - void *devdata, struct thermal_zone_device_ops *ops, - const struct thermal_zone_params *tzp, int passive_delay, - int polling_delay) -{ - return thermal_zone_device_register_with_trips(type, NULL, ntrips, mask, - devdata, ops, tzp, - passive_delay, polling_delay); -} -EXPORT_SYMBOL_GPL(thermal_zone_device_register); - struct thermal_zone_device *thermal_tripless_zone_device_register( const char *type, void *devdata, diff --git a/include/linux/thermal.h b/include/linux/thermal.h index 1a1433eb9b59..c99440aac1a1 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -300,14 +300,6 @@ int thermal_acpi_critical_trip_temp(struct acpi_device *adev, int *ret_temp); #endif #ifdef CONFIG_THERMAL -struct thermal_zone_device *thermal_zone_device_register( - const char *type, - int num_trips, int mask, - void *devdata, - struct thermal_zone_device_ops *ops, - const struct thermal_zone_params *tzp, - int passive_delay, int polling_delay); - struct thermal_zone_device *thermal_zone_device_register_with_trips( const char *type, struct thermal_trip *trips, @@ -364,15 +356,6 @@ int thermal_zone_device_enable(struct thermal_zone_device *tz); int thermal_zone_device_disable(struct thermal_zone_device *tz); void thermal_zone_device_critical(struct thermal_zone_device *tz); #else -static inline struct thermal_zone_device *thermal_zone_device_register( - const char *type, - int num_trips, int mask, - void *devdata, - struct thermal_zone_device_ops *ops, - const struct thermal_zone_params *tzp, - int passive_delay, int polling_delay) -{ return ERR_PTR(-ENODEV); } - static inline struct thermal_zone_device *thermal_zone_device_register_with_trips( const char *type, struct thermal_trip *trips, -- cgit v1.2.3 From e7716c74e3882405f9eca16faa6cb1bf19995399 Mon Sep 17 00:00:00 2001 From: Philipp Stanner Date: Mon, 21 Aug 2023 10:21:29 +0200 Subject: xarray: Document necessary flag in alloc functions Adds a new line to the docstrings of functions wrapping __xa_alloc() and __xa_alloc_cyclic(), informing about the necessity of flag XA_FLAGS_ALLOC being set previously. The documentation so far says that functions wrapping __xa_alloc() and __xa_alloc_cyclic() are supposed to return either -ENOMEM or -EBUSY in case of an error. If the xarray has been initialized without the flag XA_FLAGS_ALLOC, however, they fail with a different, undocumented error code. As hinted at in Documentation/core-api/xarray.rst, wrappers around these functions should only be invoked when the flag has been set. The functions' documentation should reflect that as well. Signed-off-by: Philipp Stanner Signed-off-by: Matthew Wilcox (Oracle) --- include/linux/xarray.h | 18 ++++++++++++++++++ lib/xarray.c | 6 ++++++ 2 files changed, 24 insertions(+) (limited to 'include/linux') diff --git a/include/linux/xarray.h b/include/linux/xarray.h index 741703b45f61..cb571dfcf4b1 100644 --- a/include/linux/xarray.h +++ b/include/linux/xarray.h @@ -856,6 +856,9 @@ static inline int __must_check xa_insert_irq(struct xarray *xa, * stores the index into the @id pointer, then stores the entry at * that index. A concurrent lookup will not see an uninitialised @id. * + * Must only be operated on an xarray initialized with flag XA_FLAGS_ALLOC set + * in xa_init_flags(). + * * Context: Any context. Takes and releases the xa_lock. May sleep if * the @gfp flags permit. * Return: 0 on success, -ENOMEM if memory could not be allocated or @@ -886,6 +889,9 @@ static inline __must_check int xa_alloc(struct xarray *xa, u32 *id, * stores the index into the @id pointer, then stores the entry at * that index. A concurrent lookup will not see an uninitialised @id. * + * Must only be operated on an xarray initialized with flag XA_FLAGS_ALLOC set + * in xa_init_flags(). + * * Context: Any context. Takes and releases the xa_lock while * disabling softirqs. May sleep if the @gfp flags permit. * Return: 0 on success, -ENOMEM if memory could not be allocated or @@ -916,6 +922,9 @@ static inline int __must_check xa_alloc_bh(struct xarray *xa, u32 *id, * stores the index into the @id pointer, then stores the entry at * that index. A concurrent lookup will not see an uninitialised @id. * + * Must only be operated on an xarray initialized with flag XA_FLAGS_ALLOC set + * in xa_init_flags(). + * * Context: Process context. Takes and releases the xa_lock while * disabling interrupts. May sleep if the @gfp flags permit. * Return: 0 on success, -ENOMEM if memory could not be allocated or @@ -949,6 +958,9 @@ static inline int __must_check xa_alloc_irq(struct xarray *xa, u32 *id, * The search for an empty entry will start at @next and will wrap * around if necessary. * + * Must only be operated on an xarray initialized with flag XA_FLAGS_ALLOC set + * in xa_init_flags(). + * * Context: Any context. Takes and releases the xa_lock. May sleep if * the @gfp flags permit. * Return: 0 if the allocation succeeded without wrapping. 1 if the @@ -983,6 +995,9 @@ static inline int xa_alloc_cyclic(struct xarray *xa, u32 *id, void *entry, * The search for an empty entry will start at @next and will wrap * around if necessary. * + * Must only be operated on an xarray initialized with flag XA_FLAGS_ALLOC set + * in xa_init_flags(). + * * Context: Any context. Takes and releases the xa_lock while * disabling softirqs. May sleep if the @gfp flags permit. * Return: 0 if the allocation succeeded without wrapping. 1 if the @@ -1017,6 +1032,9 @@ static inline int xa_alloc_cyclic_bh(struct xarray *xa, u32 *id, void *entry, * The search for an empty entry will start at @next and will wrap * around if necessary. * + * Must only be operated on an xarray initialized with flag XA_FLAGS_ALLOC set + * in xa_init_flags(). + * * Context: Process context. Takes and releases the xa_lock while * disabling interrupts. May sleep if the @gfp flags permit. * Return: 0 if the allocation succeeded without wrapping. 1 if the diff --git a/lib/xarray.c b/lib/xarray.c index 142e36f9dfda..39f07bfc4dcc 100644 --- a/lib/xarray.c +++ b/lib/xarray.c @@ -1802,6 +1802,9 @@ EXPORT_SYMBOL(xa_get_order); * stores the index into the @id pointer, then stores the entry at * that index. A concurrent lookup will not see an uninitialised @id. * + * Must only be operated on an xarray initialized with flag XA_FLAGS_ALLOC set + * in xa_init_flags(). + * * Context: Any context. Expects xa_lock to be held on entry. May * release and reacquire xa_lock if @gfp flags permit. * Return: 0 on success, -ENOMEM if memory could not be allocated or @@ -1850,6 +1853,9 @@ EXPORT_SYMBOL(__xa_alloc); * The search for an empty entry will start at @next and will wrap * around if necessary. * + * Must only be operated on an xarray initialized with flag XA_FLAGS_ALLOC set + * in xa_init_flags(). + * * Context: Any context. Expects xa_lock to be held on entry. May * release and reacquire xa_lock if @gfp flags permit. * Return: 0 if the allocation succeeded without wrapping. 1 if the -- cgit v1.2.3 From 39285e124edbc752331e98ace37cc141a6a3747a Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Tue, 5 Sep 2023 08:46:10 +0000 Subject: net: team: do not use dynamic lockdep key team interface has used a dynamic lockdep key to avoid false-positive lockdep deadlock detection. Virtual interfaces such as team usually have their own lock for protecting private data. These interfaces can be nested. team0 | team1 Each interface's lock is actually different(team0->lock and team1->lock). So, mutex_lock(&team0->lock); mutex_lock(&team1->lock); mutex_unlock(&team1->lock); mutex_unlock(&team0->lock); The above case is absolutely safe. But lockdep warns about deadlock. Because the lockdep understands these two locks are same. This is a false-positive lockdep warning. So, in order to avoid this problem, the team interfaces started to use dynamic lockdep key. The false-positive problem was fixed, but it introduced a new problem. When the new team virtual interface is created, it registers a dynamic lockdep key(creates dynamic lockdep key) and uses it. But there is the limitation of the number of lockdep keys. So, If so many team interfaces are created, it consumes all lockdep keys. Then, the lockdep stops to work and warns about it. In order to fix this problem, team interfaces use the subclass instead of the dynamic key. So, when a new team interface is created, it doesn't register(create) a new lockdep, but uses existed subclass key instead. It is already used by the bonding interface for a similar case. As the bonding interface does, the subclass variable is the same as the 'dev->nested_level'. This variable indicates the depth in the stacked interface graph. The 'dev->nested_level' is protected by RTNL and RCU. So, 'mutex_lock_nested()' for 'team->lock' requires RTNL or RCU. In the current code, 'team->lock' is usually acquired under RTNL, there is no problem with using 'dev->nested_level'. The 'team_nl_team_get()' and The 'lb_stats_refresh()' functions acquire 'team->lock' without RTNL. But these don't iterate their own ports nested so they don't need nested lock. Reproducer: for i in {0..1000} do ip link add team$i type team ip link add dummy$i master team$i type dummy ip link set dummy$i up ip link set team$i up done Splat looks like: BUG: MAX_LOCKDEP_ENTRIES too low! turning off the locking correctness validator. Please attach the output of /proc/lock_stat to the bug report CPU: 0 PID: 4104 Comm: ip Not tainted 6.5.0-rc7+ #45 Call Trace: dump_stack_lvl+0x64/0xb0 add_lock_to_list+0x30d/0x5e0 check_prev_add+0x73a/0x23a0 ... sock_def_readable+0xfe/0x4f0 netlink_broadcast+0x76b/0xac0 nlmsg_notify+0x69/0x1d0 dev_open+0xed/0x130 ... Reported-by: syzbot+9bbbacfbf1e04d5221f7@syzkaller.appspotmail.com Fixes: 369f61bee0f5 ("team: fix nested locking lockdep warning") Signed-off-by: Taehee Yoo Signed-off-by: David S. Miller --- drivers/net/team/team.c | 111 +++++++++++++++---------------- drivers/net/team/team_mode_loadbalance.c | 4 +- include/linux/if_team.h | 30 ++++++++- 3 files changed, 85 insertions(+), 60 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index e8b94580194e..ad29122a5468 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -1135,8 +1135,8 @@ static int team_port_add(struct team *team, struct net_device *port_dev, struct netlink_ext_ack *extack) { struct net_device *dev = team->dev; - struct team_port *port; char *portname = port_dev->name; + struct team_port *port; int err; if (port_dev->flags & IFF_LOOPBACK) { @@ -1203,18 +1203,31 @@ static int team_port_add(struct team *team, struct net_device *port_dev, memcpy(port->orig.dev_addr, port_dev->dev_addr, port_dev->addr_len); - err = team_port_enter(team, port); + err = dev_open(port_dev, extack); if (err) { - netdev_err(dev, "Device %s failed to enter team mode\n", + netdev_dbg(dev, "Device %s opening failed\n", portname); - goto err_port_enter; + goto err_dev_open; } - err = dev_open(port_dev, extack); + err = team_upper_dev_link(team, port, extack); if (err) { - netdev_dbg(dev, "Device %s opening failed\n", + netdev_err(dev, "Device %s failed to set upper link\n", portname); - goto err_dev_open; + goto err_set_upper_link; + } + + /* lockdep subclass variable(dev->nested_level) was updated by + * team_upper_dev_link(). + */ + team_unlock(team); + team_lock(team); + + err = team_port_enter(team, port); + if (err) { + netdev_err(dev, "Device %s failed to enter team mode\n", + portname); + goto err_port_enter; } err = vlan_vids_add_by_dev(port_dev, dev); @@ -1242,13 +1255,6 @@ static int team_port_add(struct team *team, struct net_device *port_dev, goto err_handler_register; } - err = team_upper_dev_link(team, port, extack); - if (err) { - netdev_err(dev, "Device %s failed to set upper link\n", - portname); - goto err_set_upper_link; - } - err = __team_option_inst_add_port(team, port); if (err) { netdev_err(dev, "Device %s failed to add per-port options\n", @@ -1295,9 +1301,6 @@ err_set_slave_promisc: __team_option_inst_del_port(team, port); err_option_port_add: - team_upper_dev_unlink(team, port); - -err_set_upper_link: netdev_rx_handler_unregister(port_dev); err_handler_register: @@ -1307,13 +1310,16 @@ err_enable_netpoll: vlan_vids_del_by_dev(port_dev, dev); err_vids_add: + team_port_leave(team, port); + +err_port_enter: + team_upper_dev_unlink(team, port); + +err_set_upper_link: dev_close(port_dev); err_dev_open: - team_port_leave(team, port); team_port_set_orig_dev_addr(port); - -err_port_enter: dev_set_mtu(port_dev, port->orig.mtu); err_set_mtu: @@ -1616,6 +1622,7 @@ static int team_init(struct net_device *dev) int err; team->dev = dev; + mutex_init(&team->lock); team_set_no_mode(team); team->notifier_ctx = false; @@ -1643,8 +1650,6 @@ static int team_init(struct net_device *dev) goto err_options_register; netif_carrier_off(dev); - lockdep_register_key(&team->team_lock_key); - __mutex_init(&team->lock, "team->team_lock_key", &team->team_lock_key); netdev_lockdep_set_classes(dev); return 0; @@ -1665,7 +1670,7 @@ static void team_uninit(struct net_device *dev) struct team_port *port; struct team_port *tmp; - mutex_lock(&team->lock); + team_lock(team); list_for_each_entry_safe(port, tmp, &team->port_list, list) team_port_del(team, port->dev); @@ -1674,9 +1679,8 @@ static void team_uninit(struct net_device *dev) team_mcast_rejoin_fini(team); team_notify_peers_fini(team); team_queue_override_fini(team); - mutex_unlock(&team->lock); + team_unlock(team); netdev_change_features(dev); - lockdep_unregister_key(&team->team_lock_key); } static void team_destructor(struct net_device *dev) @@ -1790,18 +1794,18 @@ static void team_set_rx_mode(struct net_device *dev) static int team_set_mac_address(struct net_device *dev, void *p) { - struct sockaddr *addr = p; struct team *team = netdev_priv(dev); + struct sockaddr *addr = p; struct team_port *port; if (dev->type == ARPHRD_ETHER && !is_valid_ether_addr(addr->sa_data)) return -EADDRNOTAVAIL; dev_addr_set(dev, addr->sa_data); - mutex_lock(&team->lock); + team_lock(team); list_for_each_entry(port, &team->port_list, list) if (team->ops.port_change_dev_addr) team->ops.port_change_dev_addr(team, port); - mutex_unlock(&team->lock); + team_unlock(team); return 0; } @@ -1815,7 +1819,7 @@ static int team_change_mtu(struct net_device *dev, int new_mtu) * Alhough this is reader, it's guarded by team lock. It's not possible * to traverse list in reverse under rcu_read_lock */ - mutex_lock(&team->lock); + team_lock(team); team->port_mtu_change_allowed = true; list_for_each_entry(port, &team->port_list, list) { err = dev_set_mtu(port->dev, new_mtu); @@ -1826,7 +1830,7 @@ static int team_change_mtu(struct net_device *dev, int new_mtu) } } team->port_mtu_change_allowed = false; - mutex_unlock(&team->lock); + team_unlock(team); dev->mtu = new_mtu; @@ -1836,7 +1840,7 @@ unwind: list_for_each_entry_continue_reverse(port, &team->port_list, list) dev_set_mtu(port->dev, dev->mtu); team->port_mtu_change_allowed = false; - mutex_unlock(&team->lock); + team_unlock(team); return err; } @@ -1890,20 +1894,20 @@ static int team_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid) * Alhough this is reader, it's guarded by team lock. It's not possible * to traverse list in reverse under rcu_read_lock */ - mutex_lock(&team->lock); + team_lock(team); list_for_each_entry(port, &team->port_list, list) { err = vlan_vid_add(port->dev, proto, vid); if (err) goto unwind; } - mutex_unlock(&team->lock); + team_unlock(team); return 0; unwind: list_for_each_entry_continue_reverse(port, &team->port_list, list) vlan_vid_del(port->dev, proto, vid); - mutex_unlock(&team->lock); + team_unlock(team); return err; } @@ -1913,10 +1917,10 @@ static int team_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, u16 vid) struct team *team = netdev_priv(dev); struct team_port *port; - mutex_lock(&team->lock); + team_lock(team); list_for_each_entry(port, &team->port_list, list) vlan_vid_del(port->dev, proto, vid); - mutex_unlock(&team->lock); + team_unlock(team); return 0; } @@ -1938,9 +1942,9 @@ static void team_netpoll_cleanup(struct net_device *dev) { struct team *team = netdev_priv(dev); - mutex_lock(&team->lock); + team_lock(team); __team_netpoll_cleanup(team); - mutex_unlock(&team->lock); + team_unlock(team); } static int team_netpoll_setup(struct net_device *dev, @@ -1950,7 +1954,7 @@ static int team_netpoll_setup(struct net_device *dev, struct team_port *port; int err = 0; - mutex_lock(&team->lock); + team_lock(team); list_for_each_entry(port, &team->port_list, list) { err = __team_port_enable_netpoll(port); if (err) { @@ -1958,7 +1962,7 @@ static int team_netpoll_setup(struct net_device *dev, break; } } - mutex_unlock(&team->lock); + team_unlock(team); return err; } #endif @@ -1969,9 +1973,9 @@ static int team_add_slave(struct net_device *dev, struct net_device *port_dev, struct team *team = netdev_priv(dev); int err; - mutex_lock(&team->lock); + team_lock(team); err = team_port_add(team, port_dev, extack); - mutex_unlock(&team->lock); + team_unlock(team); if (!err) netdev_change_features(dev); @@ -1984,19 +1988,12 @@ static int team_del_slave(struct net_device *dev, struct net_device *port_dev) struct team *team = netdev_priv(dev); int err; - mutex_lock(&team->lock); + team_lock(team); err = team_port_del(team, port_dev); - mutex_unlock(&team->lock); - - if (err) - return err; + team_unlock(team); - if (netif_is_team_master(port_dev)) { - lockdep_unregister_key(&team->team_lock_key); - lockdep_register_key(&team->team_lock_key); - lockdep_set_class(&team->lock, &team->team_lock_key); - } - netdev_change_features(dev); + if (!err) + netdev_change_features(dev); return err; } @@ -2316,13 +2313,13 @@ static struct team *team_nl_team_get(struct genl_info *info) } team = netdev_priv(dev); - mutex_lock(&team->lock); + __team_lock(team); return team; } static void team_nl_team_put(struct team *team) { - mutex_unlock(&team->lock); + team_unlock(team); dev_put(team->dev); } @@ -2984,9 +2981,9 @@ static void team_port_change_check(struct team_port *port, bool linkup) { struct team *team = port->team; - mutex_lock(&team->lock); + team_lock(team); __team_port_change_check(port, linkup); - mutex_unlock(&team->lock); + team_unlock(team); } diff --git a/drivers/net/team/team_mode_loadbalance.c b/drivers/net/team/team_mode_loadbalance.c index 00f8989c29c0..7bcc9d37447a 100644 --- a/drivers/net/team/team_mode_loadbalance.c +++ b/drivers/net/team/team_mode_loadbalance.c @@ -478,7 +478,7 @@ static void lb_stats_refresh(struct work_struct *work) team = lb_priv_ex->team; lb_priv = get_lb_priv(team); - if (!mutex_trylock(&team->lock)) { + if (!team_trylock(team)) { schedule_delayed_work(&lb_priv_ex->stats.refresh_dw, 0); return; } @@ -515,7 +515,7 @@ static void lb_stats_refresh(struct work_struct *work) schedule_delayed_work(&lb_priv_ex->stats.refresh_dw, (lb_priv_ex->stats.refresh_interval * HZ) / 10); - mutex_unlock(&team->lock); + team_unlock(team); } static void lb_stats_refresh_interval_get(struct team *team, diff --git a/include/linux/if_team.h b/include/linux/if_team.h index 1b9b15a492fa..12d4447fc8ab 100644 --- a/include/linux/if_team.h +++ b/include/linux/if_team.h @@ -221,10 +221,38 @@ struct team { atomic_t count_pending; struct delayed_work dw; } mcast_rejoin; - struct lock_class_key team_lock_key; long mode_priv[TEAM_MODE_PRIV_LONGS]; }; +static inline void __team_lock(struct team *team) +{ + mutex_lock(&team->lock); +} + +static inline int team_trylock(struct team *team) +{ + return mutex_trylock(&team->lock); +} + +#ifdef CONFIG_LOCKDEP +static inline void team_lock(struct team *team) +{ + ASSERT_RTNL(); + mutex_lock_nested(&team->lock, team->dev->nested_level); +} + +#else +static inline void team_lock(struct team *team) +{ + __team_lock(team); +} +#endif + +static inline void team_unlock(struct team *team) +{ + mutex_unlock(&team->lock); +} + static inline int team_dev_queue_xmit(struct team *team, struct team_port *port, struct sk_buff *skb) { -- cgit v1.2.3 From 1a961e74d5abbea049588a3d74b759955b4ed9d5 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 5 Sep 2023 16:42:02 -0700 Subject: net: phylink: fix sphinx complaint about invalid literal sphinx complains about the use of "%PHYLINK_PCS_NEG_*": Documentation/networking/kapi:144: ./include/linux/phylink.h:601: WARNING: Inline literal start-string without end-string. Documentation/networking/kapi:144: ./include/linux/phylink.h:633: WARNING: Inline literal start-string without end-string. These are not valid symbols so drop the '%' prefix. Alternatively we could use %PHYLINK_PCS_NEG_\* (escape the *) or use normal literal ``PHYLINK_PCS_NEG_*`` but there is already a handful of un-adorned DEFINE_* in this file. Fixes: f99d471afa03 ("net: phylink: add PCS negotiation mode") Reported-by: Stephen Rothwell Link: https://lore.kernel.org/all/20230626162908.2f149f98@canb.auug.org.au/ Signed-off-by: Jakub Kicinski Reviewed-by: Bagas Sanjaya Signed-off-by: David S. Miller --- include/linux/phylink.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/phylink.h b/include/linux/phylink.h index 7d07f8736431..2b886ea654bb 100644 --- a/include/linux/phylink.h +++ b/include/linux/phylink.h @@ -600,7 +600,7 @@ void pcs_get_state(struct phylink_pcs *pcs, * * The %neg_mode argument should be tested via the phylink_mode_*() family of * functions, or for PCS that set pcs->neg_mode true, should be tested - * against the %PHYLINK_PCS_NEG_* definitions. + * against the PHYLINK_PCS_NEG_* definitions. */ int pcs_config(struct phylink_pcs *pcs, unsigned int neg_mode, phy_interface_t interface, const unsigned long *advertising, @@ -630,7 +630,7 @@ void pcs_an_restart(struct phylink_pcs *pcs); * * The %mode argument should be tested via the phylink_mode_*() family of * functions, or for PCS that set pcs->neg_mode true, should be tested - * against the %PHYLINK_PCS_NEG_* definitions. + * against the PHYLINK_PCS_NEG_* definitions. */ void pcs_link_up(struct phylink_pcs *pcs, unsigned int neg_mode, phy_interface_t interface, int speed, int duplex); -- cgit v1.2.3 From 8f3f06dfd6873135068ccf1a0b386308e8c4da38 Mon Sep 17 00:00:00 2001 From: WANG Xuerui Date: Wed, 6 Sep 2023 22:53:55 +0800 Subject: raid6: Add LoongArch SIMD syndrome calculation The algorithms work on 64 bytes at a time, which is the L1 cache line size of all current and future LoongArch cores (that we care about), as confirmed by Huacai. The code is based on the generic int.uc algorithm, unrolled 4 times for LSX and 2 times for LASX. Further unrolling does not meaningfully improve the performance according to experiments. Performance numbers measured during system boot on a 3A5000 @ 2.5GHz: > raid6: lasx gen() 12726 MB/s > raid6: lsx gen() 10001 MB/s > raid6: int64x8 gen() 2876 MB/s > raid6: int64x4 gen() 3867 MB/s > raid6: int64x2 gen() 2531 MB/s > raid6: int64x1 gen() 1945 MB/s Comparison of xor() speeds (from different boots but meaningful anyway): > lasx: 11226 MB/s > lsx: 6395 MB/s > int64x4: 2147 MB/s Performance as measured by raid6test: > raid6: lasx gen() 25109 MB/s > raid6: lsx gen() 13233 MB/s > raid6: int64x8 gen() 4164 MB/s > raid6: int64x4 gen() 6005 MB/s > raid6: int64x2 gen() 5781 MB/s > raid6: int64x1 gen() 4119 MB/s > raid6: using algorithm lasx gen() 25109 MB/s > raid6: .... xor() 14439 MB/s, rmw enabled Acked-by: Song Liu Signed-off-by: WANG Xuerui Signed-off-by: Huacai Chen --- include/linux/raid/pq.h | 2 + lib/raid6/Makefile | 1 + lib/raid6/algos.c | 8 + lib/raid6/loongarch.h | 38 ++++ lib/raid6/loongarch_simd.c | 422 +++++++++++++++++++++++++++++++++++++++++++++ lib/raid6/test/Makefile | 12 ++ 6 files changed, 483 insertions(+) create mode 100644 lib/raid6/loongarch.h create mode 100644 lib/raid6/loongarch_simd.c (limited to 'include/linux') diff --git a/include/linux/raid/pq.h b/include/linux/raid/pq.h index f29aaaf2eb21..874447485848 100644 --- a/include/linux/raid/pq.h +++ b/include/linux/raid/pq.h @@ -108,6 +108,8 @@ extern const struct raid6_calls raid6_vpermxor1; extern const struct raid6_calls raid6_vpermxor2; extern const struct raid6_calls raid6_vpermxor4; extern const struct raid6_calls raid6_vpermxor8; +extern const struct raid6_calls raid6_lsx; +extern const struct raid6_calls raid6_lasx; struct raid6_recov_calls { void (*data2)(int, size_t, int, int, void **); diff --git a/lib/raid6/Makefile b/lib/raid6/Makefile index 45e17619422b..2b9ebe105480 100644 --- a/lib/raid6/Makefile +++ b/lib/raid6/Makefile @@ -9,6 +9,7 @@ raid6_pq-$(CONFIG_ALTIVEC) += altivec1.o altivec2.o altivec4.o altivec8.o \ vpermxor1.o vpermxor2.o vpermxor4.o vpermxor8.o raid6_pq-$(CONFIG_KERNEL_MODE_NEON) += neon.o neon1.o neon2.o neon4.o neon8.o recov_neon.o recov_neon_inner.o raid6_pq-$(CONFIG_S390) += s390vx8.o recov_s390xc.o +raid6_pq-$(CONFIG_LOONGARCH) += loongarch_simd.o hostprogs += mktables diff --git a/lib/raid6/algos.c b/lib/raid6/algos.c index a22a05c9af8a..739c7ebcae1a 100644 --- a/lib/raid6/algos.c +++ b/lib/raid6/algos.c @@ -73,6 +73,14 @@ const struct raid6_calls * const raid6_algos[] = { &raid6_neonx2, &raid6_neonx1, #endif +#ifdef CONFIG_LOONGARCH +#ifdef CONFIG_CPU_HAS_LASX + &raid6_lasx, +#endif +#ifdef CONFIG_CPU_HAS_LSX + &raid6_lsx, +#endif +#endif #if defined(__ia64__) &raid6_intx32, &raid6_intx16, diff --git a/lib/raid6/loongarch.h b/lib/raid6/loongarch.h new file mode 100644 index 000000000000..acfc33ce7056 --- /dev/null +++ b/lib/raid6/loongarch.h @@ -0,0 +1,38 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (C) 2023 WANG Xuerui + * + * raid6/loongarch.h + * + * Definitions common to LoongArch RAID-6 code only + */ + +#ifndef _LIB_RAID6_LOONGARCH_H +#define _LIB_RAID6_LOONGARCH_H + +#ifdef __KERNEL__ + +#include +#include + +#else /* for user-space testing */ + +#include + +/* have to supply these defines for glibc 2.37- and musl */ +#ifndef HWCAP_LOONGARCH_LSX +#define HWCAP_LOONGARCH_LSX (1 << 4) +#endif +#ifndef HWCAP_LOONGARCH_LASX +#define HWCAP_LOONGARCH_LASX (1 << 5) +#endif + +#define kernel_fpu_begin() +#define kernel_fpu_end() + +#define cpu_has_lsx (getauxval(AT_HWCAP) & HWCAP_LOONGARCH_LSX) +#define cpu_has_lasx (getauxval(AT_HWCAP) & HWCAP_LOONGARCH_LASX) + +#endif /* __KERNEL__ */ + +#endif /* _LIB_RAID6_LOONGARCH_H */ diff --git a/lib/raid6/loongarch_simd.c b/lib/raid6/loongarch_simd.c new file mode 100644 index 000000000000..aa5d9f924ca3 --- /dev/null +++ b/lib/raid6/loongarch_simd.c @@ -0,0 +1,422 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * RAID6 syndrome calculations in LoongArch SIMD (LSX & LASX) + * + * Copyright 2023 WANG Xuerui + * + * Based on the generic RAID-6 code (int.uc): + * + * Copyright 2002-2004 H. Peter Anvin + */ + +#include +#include "loongarch.h" + +/* + * The vector algorithms are currently priority 0, which means the generic + * scalar algorithms are not being disabled if vector support is present. + * This is like the similar LoongArch RAID5 XOR code, with the main reason + * repeated here: it cannot be ruled out at this point of time, that some + * future (maybe reduced) models could run the vector algorithms slower than + * the scalar ones, maybe for errata or micro-op reasons. It may be + * appropriate to revisit this after one or two more uarch generations. + */ + +#ifdef CONFIG_CPU_HAS_LSX +#define NSIZE 16 + +static int raid6_has_lsx(void) +{ + return cpu_has_lsx; +} + +static void raid6_lsx_gen_syndrome(int disks, size_t bytes, void **ptrs) +{ + u8 **dptr = (u8 **)ptrs; + u8 *p, *q; + int d, z, z0; + + z0 = disks - 3; /* Highest data disk */ + p = dptr[z0+1]; /* XOR parity */ + q = dptr[z0+2]; /* RS syndrome */ + + kernel_fpu_begin(); + + /* + * $vr0, $vr1, $vr2, $vr3: wp + * $vr4, $vr5, $vr6, $vr7: wq + * $vr8, $vr9, $vr10, $vr11: wd + * $vr12, $vr13, $vr14, $vr15: w2 + * $vr16, $vr17, $vr18, $vr19: w1 + */ + for (d = 0; d < bytes; d += NSIZE*4) { + /* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */ + asm volatile("vld $vr0, %0" : : "m"(dptr[z0][d+0*NSIZE])); + asm volatile("vld $vr1, %0" : : "m"(dptr[z0][d+1*NSIZE])); + asm volatile("vld $vr2, %0" : : "m"(dptr[z0][d+2*NSIZE])); + asm volatile("vld $vr3, %0" : : "m"(dptr[z0][d+3*NSIZE])); + asm volatile("vori.b $vr4, $vr0, 0"); + asm volatile("vori.b $vr5, $vr1, 0"); + asm volatile("vori.b $vr6, $vr2, 0"); + asm volatile("vori.b $vr7, $vr3, 0"); + for (z = z0-1; z >= 0; z--) { + /* wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE]; */ + asm volatile("vld $vr8, %0" : : "m"(dptr[z][d+0*NSIZE])); + asm volatile("vld $vr9, %0" : : "m"(dptr[z][d+1*NSIZE])); + asm volatile("vld $vr10, %0" : : "m"(dptr[z][d+2*NSIZE])); + asm volatile("vld $vr11, %0" : : "m"(dptr[z][d+3*NSIZE])); + /* wp$$ ^= wd$$; */ + asm volatile("vxor.v $vr0, $vr0, $vr8"); + asm volatile("vxor.v $vr1, $vr1, $vr9"); + asm volatile("vxor.v $vr2, $vr2, $vr10"); + asm volatile("vxor.v $vr3, $vr3, $vr11"); + /* w2$$ = MASK(wq$$); */ + asm volatile("vslti.b $vr12, $vr4, 0"); + asm volatile("vslti.b $vr13, $vr5, 0"); + asm volatile("vslti.b $vr14, $vr6, 0"); + asm volatile("vslti.b $vr15, $vr7, 0"); + /* w1$$ = SHLBYTE(wq$$); */ + asm volatile("vslli.b $vr16, $vr4, 1"); + asm volatile("vslli.b $vr17, $vr5, 1"); + asm volatile("vslli.b $vr18, $vr6, 1"); + asm volatile("vslli.b $vr19, $vr7, 1"); + /* w2$$ &= NBYTES(0x1d); */ + asm volatile("vandi.b $vr12, $vr12, 0x1d"); + asm volatile("vandi.b $vr13, $vr13, 0x1d"); + asm volatile("vandi.b $vr14, $vr14, 0x1d"); + asm volatile("vandi.b $vr15, $vr15, 0x1d"); + /* w1$$ ^= w2$$; */ + asm volatile("vxor.v $vr16, $vr16, $vr12"); + asm volatile("vxor.v $vr17, $vr17, $vr13"); + asm volatile("vxor.v $vr18, $vr18, $vr14"); + asm volatile("vxor.v $vr19, $vr19, $vr15"); + /* wq$$ = w1$$ ^ wd$$; */ + asm volatile("vxor.v $vr4, $vr16, $vr8"); + asm volatile("vxor.v $vr5, $vr17, $vr9"); + asm volatile("vxor.v $vr6, $vr18, $vr10"); + asm volatile("vxor.v $vr7, $vr19, $vr11"); + } + /* *(unative_t *)&p[d+NSIZE*$$] = wp$$; */ + asm volatile("vst $vr0, %0" : "=m"(p[d+NSIZE*0])); + asm volatile("vst $vr1, %0" : "=m"(p[d+NSIZE*1])); + asm volatile("vst $vr2, %0" : "=m"(p[d+NSIZE*2])); + asm volatile("vst $vr3, %0" : "=m"(p[d+NSIZE*3])); + /* *(unative_t *)&q[d+NSIZE*$$] = wq$$; */ + asm volatile("vst $vr4, %0" : "=m"(q[d+NSIZE*0])); + asm volatile("vst $vr5, %0" : "=m"(q[d+NSIZE*1])); + asm volatile("vst $vr6, %0" : "=m"(q[d+NSIZE*2])); + asm volatile("vst $vr7, %0" : "=m"(q[d+NSIZE*3])); + } + + kernel_fpu_end(); +} + +static void raid6_lsx_xor_syndrome(int disks, int start, int stop, + size_t bytes, void **ptrs) +{ + u8 **dptr = (u8 **)ptrs; + u8 *p, *q; + int d, z, z0; + + z0 = stop; /* P/Q right side optimization */ + p = dptr[disks-2]; /* XOR parity */ + q = dptr[disks-1]; /* RS syndrome */ + + kernel_fpu_begin(); + + /* + * $vr0, $vr1, $vr2, $vr3: wp + * $vr4, $vr5, $vr6, $vr7: wq + * $vr8, $vr9, $vr10, $vr11: wd + * $vr12, $vr13, $vr14, $vr15: w2 + * $vr16, $vr17, $vr18, $vr19: w1 + */ + for (d = 0; d < bytes; d += NSIZE*4) { + /* P/Q data pages */ + /* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */ + asm volatile("vld $vr0, %0" : : "m"(dptr[z0][d+0*NSIZE])); + asm volatile("vld $vr1, %0" : : "m"(dptr[z0][d+1*NSIZE])); + asm volatile("vld $vr2, %0" : : "m"(dptr[z0][d+2*NSIZE])); + asm volatile("vld $vr3, %0" : : "m"(dptr[z0][d+3*NSIZE])); + asm volatile("vori.b $vr4, $vr0, 0"); + asm volatile("vori.b $vr5, $vr1, 0"); + asm volatile("vori.b $vr6, $vr2, 0"); + asm volatile("vori.b $vr7, $vr3, 0"); + for (z = z0-1; z >= start; z--) { + /* wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE]; */ + asm volatile("vld $vr8, %0" : : "m"(dptr[z][d+0*NSIZE])); + asm volatile("vld $vr9, %0" : : "m"(dptr[z][d+1*NSIZE])); + asm volatile("vld $vr10, %0" : : "m"(dptr[z][d+2*NSIZE])); + asm volatile("vld $vr11, %0" : : "m"(dptr[z][d+3*NSIZE])); + /* wp$$ ^= wd$$; */ + asm volatile("vxor.v $vr0, $vr0, $vr8"); + asm volatile("vxor.v $vr1, $vr1, $vr9"); + asm volatile("vxor.v $vr2, $vr2, $vr10"); + asm volatile("vxor.v $vr3, $vr3, $vr11"); + /* w2$$ = MASK(wq$$); */ + asm volatile("vslti.b $vr12, $vr4, 0"); + asm volatile("vslti.b $vr13, $vr5, 0"); + asm volatile("vslti.b $vr14, $vr6, 0"); + asm volatile("vslti.b $vr15, $vr7, 0"); + /* w1$$ = SHLBYTE(wq$$); */ + asm volatile("vslli.b $vr16, $vr4, 1"); + asm volatile("vslli.b $vr17, $vr5, 1"); + asm volatile("vslli.b $vr18, $vr6, 1"); + asm volatile("vslli.b $vr19, $vr7, 1"); + /* w2$$ &= NBYTES(0x1d); */ + asm volatile("vandi.b $vr12, $vr12, 0x1d"); + asm volatile("vandi.b $vr13, $vr13, 0x1d"); + asm volatile("vandi.b $vr14, $vr14, 0x1d"); + asm volatile("vandi.b $vr15, $vr15, 0x1d"); + /* w1$$ ^= w2$$; */ + asm volatile("vxor.v $vr16, $vr16, $vr12"); + asm volatile("vxor.v $vr17, $vr17, $vr13"); + asm volatile("vxor.v $vr18, $vr18, $vr14"); + asm volatile("vxor.v $vr19, $vr19, $vr15"); + /* wq$$ = w1$$ ^ wd$$; */ + asm volatile("vxor.v $vr4, $vr16, $vr8"); + asm volatile("vxor.v $vr5, $vr17, $vr9"); + asm volatile("vxor.v $vr6, $vr18, $vr10"); + asm volatile("vxor.v $vr7, $vr19, $vr11"); + } + + /* P/Q left side optimization */ + for (z = start-1; z >= 0; z--) { + /* w2$$ = MASK(wq$$); */ + asm volatile("vslti.b $vr12, $vr4, 0"); + asm volatile("vslti.b $vr13, $vr5, 0"); + asm volatile("vslti.b $vr14, $vr6, 0"); + asm volatile("vslti.b $vr15, $vr7, 0"); + /* w1$$ = SHLBYTE(wq$$); */ + asm volatile("vslli.b $vr16, $vr4, 1"); + asm volatile("vslli.b $vr17, $vr5, 1"); + asm volatile("vslli.b $vr18, $vr6, 1"); + asm volatile("vslli.b $vr19, $vr7, 1"); + /* w2$$ &= NBYTES(0x1d); */ + asm volatile("vandi.b $vr12, $vr12, 0x1d"); + asm volatile("vandi.b $vr13, $vr13, 0x1d"); + asm volatile("vandi.b $vr14, $vr14, 0x1d"); + asm volatile("vandi.b $vr15, $vr15, 0x1d"); + /* wq$$ = w1$$ ^ w2$$; */ + asm volatile("vxor.v $vr4, $vr16, $vr12"); + asm volatile("vxor.v $vr5, $vr17, $vr13"); + asm volatile("vxor.v $vr6, $vr18, $vr14"); + asm volatile("vxor.v $vr7, $vr19, $vr15"); + } + /* + * *(unative_t *)&p[d+NSIZE*$$] ^= wp$$; + * *(unative_t *)&q[d+NSIZE*$$] ^= wq$$; + */ + asm volatile( + "vld $vr20, %0\n\t" + "vld $vr21, %1\n\t" + "vld $vr22, %2\n\t" + "vld $vr23, %3\n\t" + "vld $vr24, %4\n\t" + "vld $vr25, %5\n\t" + "vld $vr26, %6\n\t" + "vld $vr27, %7\n\t" + "vxor.v $vr20, $vr20, $vr0\n\t" + "vxor.v $vr21, $vr21, $vr1\n\t" + "vxor.v $vr22, $vr22, $vr2\n\t" + "vxor.v $vr23, $vr23, $vr3\n\t" + "vxor.v $vr24, $vr24, $vr4\n\t" + "vxor.v $vr25, $vr25, $vr5\n\t" + "vxor.v $vr26, $vr26, $vr6\n\t" + "vxor.v $vr27, $vr27, $vr7\n\t" + "vst $vr20, %0\n\t" + "vst $vr21, %1\n\t" + "vst $vr22, %2\n\t" + "vst $vr23, %3\n\t" + "vst $vr24, %4\n\t" + "vst $vr25, %5\n\t" + "vst $vr26, %6\n\t" + "vst $vr27, %7\n\t" + : "+m"(p[d+NSIZE*0]), "+m"(p[d+NSIZE*1]), + "+m"(p[d+NSIZE*2]), "+m"(p[d+NSIZE*3]), + "+m"(q[d+NSIZE*0]), "+m"(q[d+NSIZE*1]), + "+m"(q[d+NSIZE*2]), "+m"(q[d+NSIZE*3]) + ); + } + + kernel_fpu_end(); +} + +const struct raid6_calls raid6_lsx = { + raid6_lsx_gen_syndrome, + raid6_lsx_xor_syndrome, + raid6_has_lsx, + "lsx", + .priority = 0 /* see the comment near the top of the file for reason */ +}; + +#undef NSIZE +#endif /* CONFIG_CPU_HAS_LSX */ + +#ifdef CONFIG_CPU_HAS_LASX +#define NSIZE 32 + +static int raid6_has_lasx(void) +{ + return cpu_has_lasx; +} + +static void raid6_lasx_gen_syndrome(int disks, size_t bytes, void **ptrs) +{ + u8 **dptr = (u8 **)ptrs; + u8 *p, *q; + int d, z, z0; + + z0 = disks - 3; /* Highest data disk */ + p = dptr[z0+1]; /* XOR parity */ + q = dptr[z0+2]; /* RS syndrome */ + + kernel_fpu_begin(); + + /* + * $xr0, $xr1: wp + * $xr2, $xr3: wq + * $xr4, $xr5: wd + * $xr6, $xr7: w2 + * $xr8, $xr9: w1 + */ + for (d = 0; d < bytes; d += NSIZE*2) { + /* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */ + asm volatile("xvld $xr0, %0" : : "m"(dptr[z0][d+0*NSIZE])); + asm volatile("xvld $xr1, %0" : : "m"(dptr[z0][d+1*NSIZE])); + asm volatile("xvori.b $xr2, $xr0, 0"); + asm volatile("xvori.b $xr3, $xr1, 0"); + for (z = z0-1; z >= 0; z--) { + /* wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE]; */ + asm volatile("xvld $xr4, %0" : : "m"(dptr[z][d+0*NSIZE])); + asm volatile("xvld $xr5, %0" : : "m"(dptr[z][d+1*NSIZE])); + /* wp$$ ^= wd$$; */ + asm volatile("xvxor.v $xr0, $xr0, $xr4"); + asm volatile("xvxor.v $xr1, $xr1, $xr5"); + /* w2$$ = MASK(wq$$); */ + asm volatile("xvslti.b $xr6, $xr2, 0"); + asm volatile("xvslti.b $xr7, $xr3, 0"); + /* w1$$ = SHLBYTE(wq$$); */ + asm volatile("xvslli.b $xr8, $xr2, 1"); + asm volatile("xvslli.b $xr9, $xr3, 1"); + /* w2$$ &= NBYTES(0x1d); */ + asm volatile("xvandi.b $xr6, $xr6, 0x1d"); + asm volatile("xvandi.b $xr7, $xr7, 0x1d"); + /* w1$$ ^= w2$$; */ + asm volatile("xvxor.v $xr8, $xr8, $xr6"); + asm volatile("xvxor.v $xr9, $xr9, $xr7"); + /* wq$$ = w1$$ ^ wd$$; */ + asm volatile("xvxor.v $xr2, $xr8, $xr4"); + asm volatile("xvxor.v $xr3, $xr9, $xr5"); + } + /* *(unative_t *)&p[d+NSIZE*$$] = wp$$; */ + asm volatile("xvst $xr0, %0" : "=m"(p[d+NSIZE*0])); + asm volatile("xvst $xr1, %0" : "=m"(p[d+NSIZE*1])); + /* *(unative_t *)&q[d+NSIZE*$$] = wq$$; */ + asm volatile("xvst $xr2, %0" : "=m"(q[d+NSIZE*0])); + asm volatile("xvst $xr3, %0" : "=m"(q[d+NSIZE*1])); + } + + kernel_fpu_end(); +} + +static void raid6_lasx_xor_syndrome(int disks, int start, int stop, + size_t bytes, void **ptrs) +{ + u8 **dptr = (u8 **)ptrs; + u8 *p, *q; + int d, z, z0; + + z0 = stop; /* P/Q right side optimization */ + p = dptr[disks-2]; /* XOR parity */ + q = dptr[disks-1]; /* RS syndrome */ + + kernel_fpu_begin(); + + /* + * $xr0, $xr1: wp + * $xr2, $xr3: wq + * $xr4, $xr5: wd + * $xr6, $xr7: w2 + * $xr8, $xr9: w1 + */ + for (d = 0; d < bytes; d += NSIZE*2) { + /* P/Q data pages */ + /* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */ + asm volatile("xvld $xr0, %0" : : "m"(dptr[z0][d+0*NSIZE])); + asm volatile("xvld $xr1, %0" : : "m"(dptr[z0][d+1*NSIZE])); + asm volatile("xvori.b $xr2, $xr0, 0"); + asm volatile("xvori.b $xr3, $xr1, 0"); + for (z = z0-1; z >= start; z--) { + /* wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE]; */ + asm volatile("xvld $xr4, %0" : : "m"(dptr[z][d+0*NSIZE])); + asm volatile("xvld $xr5, %0" : : "m"(dptr[z][d+1*NSIZE])); + /* wp$$ ^= wd$$; */ + asm volatile("xvxor.v $xr0, $xr0, $xr4"); + asm volatile("xvxor.v $xr1, $xr1, $xr5"); + /* w2$$ = MASK(wq$$); */ + asm volatile("xvslti.b $xr6, $xr2, 0"); + asm volatile("xvslti.b $xr7, $xr3, 0"); + /* w1$$ = SHLBYTE(wq$$); */ + asm volatile("xvslli.b $xr8, $xr2, 1"); + asm volatile("xvslli.b $xr9, $xr3, 1"); + /* w2$$ &= NBYTES(0x1d); */ + asm volatile("xvandi.b $xr6, $xr6, 0x1d"); + asm volatile("xvandi.b $xr7, $xr7, 0x1d"); + /* w1$$ ^= w2$$; */ + asm volatile("xvxor.v $xr8, $xr8, $xr6"); + asm volatile("xvxor.v $xr9, $xr9, $xr7"); + /* wq$$ = w1$$ ^ wd$$; */ + asm volatile("xvxor.v $xr2, $xr8, $xr4"); + asm volatile("xvxor.v $xr3, $xr9, $xr5"); + } + + /* P/Q left side optimization */ + for (z = start-1; z >= 0; z--) { + /* w2$$ = MASK(wq$$); */ + asm volatile("xvslti.b $xr6, $xr2, 0"); + asm volatile("xvslti.b $xr7, $xr3, 0"); + /* w1$$ = SHLBYTE(wq$$); */ + asm volatile("xvslli.b $xr8, $xr2, 1"); + asm volatile("xvslli.b $xr9, $xr3, 1"); + /* w2$$ &= NBYTES(0x1d); */ + asm volatile("xvandi.b $xr6, $xr6, 0x1d"); + asm volatile("xvandi.b $xr7, $xr7, 0x1d"); + /* wq$$ = w1$$ ^ w2$$; */ + asm volatile("xvxor.v $xr2, $xr8, $xr6"); + asm volatile("xvxor.v $xr3, $xr9, $xr7"); + } + /* + * *(unative_t *)&p[d+NSIZE*$$] ^= wp$$; + * *(unative_t *)&q[d+NSIZE*$$] ^= wq$$; + */ + asm volatile( + "xvld $xr10, %0\n\t" + "xvld $xr11, %1\n\t" + "xvld $xr12, %2\n\t" + "xvld $xr13, %3\n\t" + "xvxor.v $xr10, $xr10, $xr0\n\t" + "xvxor.v $xr11, $xr11, $xr1\n\t" + "xvxor.v $xr12, $xr12, $xr2\n\t" + "xvxor.v $xr13, $xr13, $xr3\n\t" + "xvst $xr10, %0\n\t" + "xvst $xr11, %1\n\t" + "xvst $xr12, %2\n\t" + "xvst $xr13, %3\n\t" + : "+m"(p[d+NSIZE*0]), "+m"(p[d+NSIZE*1]), + "+m"(q[d+NSIZE*0]), "+m"(q[d+NSIZE*1]) + ); + } + + kernel_fpu_end(); +} + +const struct raid6_calls raid6_lasx = { + raid6_lasx_gen_syndrome, + raid6_lasx_xor_syndrome, + raid6_has_lasx, + "lasx", + .priority = 0 /* see the comment near the top of the file for reason */ +}; +#undef NSIZE +#endif /* CONFIG_CPU_HAS_LASX */ diff --git a/lib/raid6/test/Makefile b/lib/raid6/test/Makefile index 1f693ea3b980..7b244bce32b3 100644 --- a/lib/raid6/test/Makefile +++ b/lib/raid6/test/Makefile @@ -41,6 +41,16 @@ ifeq ($(findstring ppc,$(ARCH)),ppc) gcc -c -x c - >/dev/null && rm ./-.o && echo yes) endif +ifeq ($(ARCH),loongarch64) + CFLAGS += -I../../../arch/loongarch/include -DCONFIG_LOONGARCH=1 + CFLAGS += $(shell echo 'vld $$vr0, $$zero, 0' | \ + gcc -c -x assembler - >/dev/null 2>&1 && \ + rm ./-.o && echo -DCONFIG_CPU_HAS_LSX=1) + CFLAGS += $(shell echo 'xvld $$xr0, $$zero, 0' | \ + gcc -c -x assembler - >/dev/null 2>&1 && \ + rm ./-.o && echo -DCONFIG_CPU_HAS_LASX=1) +endif + ifeq ($(IS_X86),yes) OBJS += mmx.o sse1.o sse2.o avx2.o recov_ssse3.o recov_avx2.o avx512.o recov_avx512.o CFLAGS += -DCONFIG_X86 @@ -54,6 +64,8 @@ else ifeq ($(HAS_ALTIVEC),yes) CFLAGS += -DCONFIG_ALTIVEC OBJS += altivec1.o altivec2.o altivec4.o altivec8.o \ vpermxor1.o vpermxor2.o vpermxor4.o vpermxor8.o +else ifeq ($(ARCH),loongarch64) + OBJS += loongarch_simd.o endif .c.o: -- cgit v1.2.3 From f2091321044d9fbcadb93dfc1c9cf23e563ea40c Mon Sep 17 00:00:00 2001 From: WANG Xuerui Date: Wed, 6 Sep 2023 22:53:55 +0800 Subject: raid6: Add LoongArch SIMD recovery implementation Similar to the syndrome calculation, the recovery algorithms also work on 64 bytes at a time to align with the L1 cache line size of current and future LoongArch cores (that we care about). Which means unrolled-by-4 LSX and unrolled-by-2 LASX code. The assembly is originally based on the x86 SSSE3/AVX2 ports, but register allocation has been redone to take advantage of LSX/LASX's 32 vector registers, and instruction sequence has been optimized to suit (e.g. LoongArch can perform per-byte srl and andi on vectors, but x86 cannot). Performance numbers measured by instrumenting the raid6test code, on a 3A5000 system clocked at 2.5GHz: > lasx 2data: 354.987 MiB/s > lasx datap: 350.430 MiB/s > lsx 2data: 340.026 MiB/s > lsx datap: 337.318 MiB/s > intx1 2data: 164.280 MiB/s > intx1 datap: 187.966 MiB/s Because recovery algorithms are chosen solely based on priority and availability, lasx is marked as priority 2 and lsx priority 1. At least for the current generation of LoongArch micro-architectures, LASX should always be faster than LSX whenever supported, and have similar power consumption characteristics (because the only known LASX-capable uarch, the LA464, always compute the full 256-bit result for vector ops). Acked-by: Song Liu Signed-off-by: WANG Xuerui Signed-off-by: Huacai Chen --- include/linux/raid/pq.h | 2 + lib/raid6/Makefile | 2 +- lib/raid6/algos.c | 8 + lib/raid6/recov_loongarch_simd.c | 513 +++++++++++++++++++++++++++++++++++++++ lib/raid6/test/Makefile | 2 +- 5 files changed, 525 insertions(+), 2 deletions(-) create mode 100644 lib/raid6/recov_loongarch_simd.c (limited to 'include/linux') diff --git a/include/linux/raid/pq.h b/include/linux/raid/pq.h index 874447485848..006e18decfad 100644 --- a/include/linux/raid/pq.h +++ b/include/linux/raid/pq.h @@ -125,6 +125,8 @@ extern const struct raid6_recov_calls raid6_recov_avx2; extern const struct raid6_recov_calls raid6_recov_avx512; extern const struct raid6_recov_calls raid6_recov_s390xc; extern const struct raid6_recov_calls raid6_recov_neon; +extern const struct raid6_recov_calls raid6_recov_lsx; +extern const struct raid6_recov_calls raid6_recov_lasx; extern const struct raid6_calls raid6_neonx1; extern const struct raid6_calls raid6_neonx2; diff --git a/lib/raid6/Makefile b/lib/raid6/Makefile index 2b9ebe105480..035b0a4db476 100644 --- a/lib/raid6/Makefile +++ b/lib/raid6/Makefile @@ -9,7 +9,7 @@ raid6_pq-$(CONFIG_ALTIVEC) += altivec1.o altivec2.o altivec4.o altivec8.o \ vpermxor1.o vpermxor2.o vpermxor4.o vpermxor8.o raid6_pq-$(CONFIG_KERNEL_MODE_NEON) += neon.o neon1.o neon2.o neon4.o neon8.o recov_neon.o recov_neon_inner.o raid6_pq-$(CONFIG_S390) += s390vx8.o recov_s390xc.o -raid6_pq-$(CONFIG_LOONGARCH) += loongarch_simd.o +raid6_pq-$(CONFIG_LOONGARCH) += loongarch_simd.o recov_loongarch_simd.o hostprogs += mktables diff --git a/lib/raid6/algos.c b/lib/raid6/algos.c index 739c7ebcae1a..0ec534faf019 100644 --- a/lib/raid6/algos.c +++ b/lib/raid6/algos.c @@ -111,6 +111,14 @@ const struct raid6_recov_calls *const raid6_recov_algos[] = { #endif #if defined(CONFIG_KERNEL_MODE_NEON) &raid6_recov_neon, +#endif +#ifdef CONFIG_LOONGARCH +#ifdef CONFIG_CPU_HAS_LASX + &raid6_recov_lasx, +#endif +#ifdef CONFIG_CPU_HAS_LSX + &raid6_recov_lsx, +#endif #endif &raid6_recov_intx1, NULL diff --git a/lib/raid6/recov_loongarch_simd.c b/lib/raid6/recov_loongarch_simd.c new file mode 100644 index 000000000000..94aeac85e6f7 --- /dev/null +++ b/lib/raid6/recov_loongarch_simd.c @@ -0,0 +1,513 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * RAID6 recovery algorithms in LoongArch SIMD (LSX & LASX) + * + * Copyright (C) 2023 WANG Xuerui + * + * Originally based on recov_avx2.c and recov_ssse3.c: + * + * Copyright (C) 2012 Intel Corporation + * Author: Jim Kukunas + */ + +#include +#include "loongarch.h" + +/* + * Unlike with the syndrome calculation algorithms, there's no boot-time + * selection of recovery algorithms by benchmarking, so we have to specify + * the priorities and hope the future cores will all have decent vector + * support (i.e. no LASX slower than LSX, or even scalar code). + */ + +#ifdef CONFIG_CPU_HAS_LSX +static int raid6_has_lsx(void) +{ + return cpu_has_lsx; +} + +static void raid6_2data_recov_lsx(int disks, size_t bytes, int faila, + int failb, void **ptrs) +{ + u8 *p, *q, *dp, *dq; + const u8 *pbmul; /* P multiplier table for B data */ + const u8 *qmul; /* Q multiplier table (for both) */ + + p = (u8 *)ptrs[disks - 2]; + q = (u8 *)ptrs[disks - 1]; + + /* + * Compute syndrome with zero for the missing data pages + * Use the dead data pages as temporary storage for + * delta p and delta q + */ + dp = (u8 *)ptrs[faila]; + ptrs[faila] = (void *)raid6_empty_zero_page; + ptrs[disks - 2] = dp; + dq = (u8 *)ptrs[failb]; + ptrs[failb] = (void *)raid6_empty_zero_page; + ptrs[disks - 1] = dq; + + raid6_call.gen_syndrome(disks, bytes, ptrs); + + /* Restore pointer table */ + ptrs[faila] = dp; + ptrs[failb] = dq; + ptrs[disks - 2] = p; + ptrs[disks - 1] = q; + + /* Now, pick the proper data tables */ + pbmul = raid6_vgfmul[raid6_gfexi[failb - faila]]; + qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila] ^ raid6_gfexp[failb]]]; + + kernel_fpu_begin(); + + /* + * vr20, vr21: qmul + * vr22, vr23: pbmul + */ + asm volatile("vld $vr20, %0" : : "m" (qmul[0])); + asm volatile("vld $vr21, %0" : : "m" (qmul[16])); + asm volatile("vld $vr22, %0" : : "m" (pbmul[0])); + asm volatile("vld $vr23, %0" : : "m" (pbmul[16])); + + while (bytes) { + /* vr4 - vr7: Q */ + asm volatile("vld $vr4, %0" : : "m" (q[0])); + asm volatile("vld $vr5, %0" : : "m" (q[16])); + asm volatile("vld $vr6, %0" : : "m" (q[32])); + asm volatile("vld $vr7, %0" : : "m" (q[48])); + /* vr4 - vr7: Q + Qxy */ + asm volatile("vld $vr8, %0" : : "m" (dq[0])); + asm volatile("vld $vr9, %0" : : "m" (dq[16])); + asm volatile("vld $vr10, %0" : : "m" (dq[32])); + asm volatile("vld $vr11, %0" : : "m" (dq[48])); + asm volatile("vxor.v $vr4, $vr4, $vr8"); + asm volatile("vxor.v $vr5, $vr5, $vr9"); + asm volatile("vxor.v $vr6, $vr6, $vr10"); + asm volatile("vxor.v $vr7, $vr7, $vr11"); + /* vr0 - vr3: P */ + asm volatile("vld $vr0, %0" : : "m" (p[0])); + asm volatile("vld $vr1, %0" : : "m" (p[16])); + asm volatile("vld $vr2, %0" : : "m" (p[32])); + asm volatile("vld $vr3, %0" : : "m" (p[48])); + /* vr0 - vr3: P + Pxy */ + asm volatile("vld $vr8, %0" : : "m" (dp[0])); + asm volatile("vld $vr9, %0" : : "m" (dp[16])); + asm volatile("vld $vr10, %0" : : "m" (dp[32])); + asm volatile("vld $vr11, %0" : : "m" (dp[48])); + asm volatile("vxor.v $vr0, $vr0, $vr8"); + asm volatile("vxor.v $vr1, $vr1, $vr9"); + asm volatile("vxor.v $vr2, $vr2, $vr10"); + asm volatile("vxor.v $vr3, $vr3, $vr11"); + + /* vr8 - vr11: higher 4 bits of each byte of (Q + Qxy) */ + asm volatile("vsrli.b $vr8, $vr4, 4"); + asm volatile("vsrli.b $vr9, $vr5, 4"); + asm volatile("vsrli.b $vr10, $vr6, 4"); + asm volatile("vsrli.b $vr11, $vr7, 4"); + /* vr4 - vr7: lower 4 bits of each byte of (Q + Qxy) */ + asm volatile("vandi.b $vr4, $vr4, 0x0f"); + asm volatile("vandi.b $vr5, $vr5, 0x0f"); + asm volatile("vandi.b $vr6, $vr6, 0x0f"); + asm volatile("vandi.b $vr7, $vr7, 0x0f"); + /* lookup from qmul[0] */ + asm volatile("vshuf.b $vr4, $vr20, $vr20, $vr4"); + asm volatile("vshuf.b $vr5, $vr20, $vr20, $vr5"); + asm volatile("vshuf.b $vr6, $vr20, $vr20, $vr6"); + asm volatile("vshuf.b $vr7, $vr20, $vr20, $vr7"); + /* lookup from qmul[16] */ + asm volatile("vshuf.b $vr8, $vr21, $vr21, $vr8"); + asm volatile("vshuf.b $vr9, $vr21, $vr21, $vr9"); + asm volatile("vshuf.b $vr10, $vr21, $vr21, $vr10"); + asm volatile("vshuf.b $vr11, $vr21, $vr21, $vr11"); + /* vr16 - vr19: B(Q + Qxy) */ + asm volatile("vxor.v $vr16, $vr8, $vr4"); + asm volatile("vxor.v $vr17, $vr9, $vr5"); + asm volatile("vxor.v $vr18, $vr10, $vr6"); + asm volatile("vxor.v $vr19, $vr11, $vr7"); + + /* vr4 - vr7: higher 4 bits of each byte of (P + Pxy) */ + asm volatile("vsrli.b $vr4, $vr0, 4"); + asm volatile("vsrli.b $vr5, $vr1, 4"); + asm volatile("vsrli.b $vr6, $vr2, 4"); + asm volatile("vsrli.b $vr7, $vr3, 4"); + /* vr12 - vr15: lower 4 bits of each byte of (P + Pxy) */ + asm volatile("vandi.b $vr12, $vr0, 0x0f"); + asm volatile("vandi.b $vr13, $vr1, 0x0f"); + asm volatile("vandi.b $vr14, $vr2, 0x0f"); + asm volatile("vandi.b $vr15, $vr3, 0x0f"); + /* lookup from pbmul[0] */ + asm volatile("vshuf.b $vr12, $vr22, $vr22, $vr12"); + asm volatile("vshuf.b $vr13, $vr22, $vr22, $vr13"); + asm volatile("vshuf.b $vr14, $vr22, $vr22, $vr14"); + asm volatile("vshuf.b $vr15, $vr22, $vr22, $vr15"); + /* lookup from pbmul[16] */ + asm volatile("vshuf.b $vr4, $vr23, $vr23, $vr4"); + asm volatile("vshuf.b $vr5, $vr23, $vr23, $vr5"); + asm volatile("vshuf.b $vr6, $vr23, $vr23, $vr6"); + asm volatile("vshuf.b $vr7, $vr23, $vr23, $vr7"); + /* vr4 - vr7: A(P + Pxy) */ + asm volatile("vxor.v $vr4, $vr4, $vr12"); + asm volatile("vxor.v $vr5, $vr5, $vr13"); + asm volatile("vxor.v $vr6, $vr6, $vr14"); + asm volatile("vxor.v $vr7, $vr7, $vr15"); + + /* vr4 - vr7: A(P + Pxy) + B(Q + Qxy) = Dx */ + asm volatile("vxor.v $vr4, $vr4, $vr16"); + asm volatile("vxor.v $vr5, $vr5, $vr17"); + asm volatile("vxor.v $vr6, $vr6, $vr18"); + asm volatile("vxor.v $vr7, $vr7, $vr19"); + asm volatile("vst $vr4, %0" : "=m" (dq[0])); + asm volatile("vst $vr5, %0" : "=m" (dq[16])); + asm volatile("vst $vr6, %0" : "=m" (dq[32])); + asm volatile("vst $vr7, %0" : "=m" (dq[48])); + + /* vr0 - vr3: P + Pxy + Dx = Dy */ + asm volatile("vxor.v $vr0, $vr0, $vr4"); + asm volatile("vxor.v $vr1, $vr1, $vr5"); + asm volatile("vxor.v $vr2, $vr2, $vr6"); + asm volatile("vxor.v $vr3, $vr3, $vr7"); + asm volatile("vst $vr0, %0" : "=m" (dp[0])); + asm volatile("vst $vr1, %0" : "=m" (dp[16])); + asm volatile("vst $vr2, %0" : "=m" (dp[32])); + asm volatile("vst $vr3, %0" : "=m" (dp[48])); + + bytes -= 64; + p += 64; + q += 64; + dp += 64; + dq += 64; + } + + kernel_fpu_end(); +} + +static void raid6_datap_recov_lsx(int disks, size_t bytes, int faila, + void **ptrs) +{ + u8 *p, *q, *dq; + const u8 *qmul; /* Q multiplier table */ + + p = (u8 *)ptrs[disks - 2]; + q = (u8 *)ptrs[disks - 1]; + + /* + * Compute syndrome with zero for the missing data page + * Use the dead data page as temporary storage for delta q + */ + dq = (u8 *)ptrs[faila]; + ptrs[faila] = (void *)raid6_empty_zero_page; + ptrs[disks - 1] = dq; + + raid6_call.gen_syndrome(disks, bytes, ptrs); + + /* Restore pointer table */ + ptrs[faila] = dq; + ptrs[disks - 1] = q; + + /* Now, pick the proper data tables */ + qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila]]]; + + kernel_fpu_begin(); + + /* vr22, vr23: qmul */ + asm volatile("vld $vr22, %0" : : "m" (qmul[0])); + asm volatile("vld $vr23, %0" : : "m" (qmul[16])); + + while (bytes) { + /* vr0 - vr3: P + Dx */ + asm volatile("vld $vr0, %0" : : "m" (p[0])); + asm volatile("vld $vr1, %0" : : "m" (p[16])); + asm volatile("vld $vr2, %0" : : "m" (p[32])); + asm volatile("vld $vr3, %0" : : "m" (p[48])); + /* vr4 - vr7: Qx */ + asm volatile("vld $vr4, %0" : : "m" (dq[0])); + asm volatile("vld $vr5, %0" : : "m" (dq[16])); + asm volatile("vld $vr6, %0" : : "m" (dq[32])); + asm volatile("vld $vr7, %0" : : "m" (dq[48])); + /* vr4 - vr7: Q + Qx */ + asm volatile("vld $vr8, %0" : : "m" (q[0])); + asm volatile("vld $vr9, %0" : : "m" (q[16])); + asm volatile("vld $vr10, %0" : : "m" (q[32])); + asm volatile("vld $vr11, %0" : : "m" (q[48])); + asm volatile("vxor.v $vr4, $vr4, $vr8"); + asm volatile("vxor.v $vr5, $vr5, $vr9"); + asm volatile("vxor.v $vr6, $vr6, $vr10"); + asm volatile("vxor.v $vr7, $vr7, $vr11"); + + /* vr8 - vr11: higher 4 bits of each byte of (Q + Qx) */ + asm volatile("vsrli.b $vr8, $vr4, 4"); + asm volatile("vsrli.b $vr9, $vr5, 4"); + asm volatile("vsrli.b $vr10, $vr6, 4"); + asm volatile("vsrli.b $vr11, $vr7, 4"); + /* vr4 - vr7: lower 4 bits of each byte of (Q + Qx) */ + asm volatile("vandi.b $vr4, $vr4, 0x0f"); + asm volatile("vandi.b $vr5, $vr5, 0x0f"); + asm volatile("vandi.b $vr6, $vr6, 0x0f"); + asm volatile("vandi.b $vr7, $vr7, 0x0f"); + /* lookup from qmul[0] */ + asm volatile("vshuf.b $vr4, $vr22, $vr22, $vr4"); + asm volatile("vshuf.b $vr5, $vr22, $vr22, $vr5"); + asm volatile("vshuf.b $vr6, $vr22, $vr22, $vr6"); + asm volatile("vshuf.b $vr7, $vr22, $vr22, $vr7"); + /* lookup from qmul[16] */ + asm volatile("vshuf.b $vr8, $vr23, $vr23, $vr8"); + asm volatile("vshuf.b $vr9, $vr23, $vr23, $vr9"); + asm volatile("vshuf.b $vr10, $vr23, $vr23, $vr10"); + asm volatile("vshuf.b $vr11, $vr23, $vr23, $vr11"); + /* vr4 - vr7: qmul(Q + Qx) = Dx */ + asm volatile("vxor.v $vr4, $vr4, $vr8"); + asm volatile("vxor.v $vr5, $vr5, $vr9"); + asm volatile("vxor.v $vr6, $vr6, $vr10"); + asm volatile("vxor.v $vr7, $vr7, $vr11"); + asm volatile("vst $vr4, %0" : "=m" (dq[0])); + asm volatile("vst $vr5, %0" : "=m" (dq[16])); + asm volatile("vst $vr6, %0" : "=m" (dq[32])); + asm volatile("vst $vr7, %0" : "=m" (dq[48])); + + /* vr0 - vr3: P + Dx + Dx = P */ + asm volatile("vxor.v $vr0, $vr0, $vr4"); + asm volatile("vxor.v $vr1, $vr1, $vr5"); + asm volatile("vxor.v $vr2, $vr2, $vr6"); + asm volatile("vxor.v $vr3, $vr3, $vr7"); + asm volatile("vst $vr0, %0" : "=m" (p[0])); + asm volatile("vst $vr1, %0" : "=m" (p[16])); + asm volatile("vst $vr2, %0" : "=m" (p[32])); + asm volatile("vst $vr3, %0" : "=m" (p[48])); + + bytes -= 64; + p += 64; + q += 64; + dq += 64; + } + + kernel_fpu_end(); +} + +const struct raid6_recov_calls raid6_recov_lsx = { + .data2 = raid6_2data_recov_lsx, + .datap = raid6_datap_recov_lsx, + .valid = raid6_has_lsx, + .name = "lsx", + .priority = 1, +}; +#endif /* CONFIG_CPU_HAS_LSX */ + +#ifdef CONFIG_CPU_HAS_LASX +static int raid6_has_lasx(void) +{ + return cpu_has_lasx; +} + +static void raid6_2data_recov_lasx(int disks, size_t bytes, int faila, + int failb, void **ptrs) +{ + u8 *p, *q, *dp, *dq; + const u8 *pbmul; /* P multiplier table for B data */ + const u8 *qmul; /* Q multiplier table (for both) */ + + p = (u8 *)ptrs[disks - 2]; + q = (u8 *)ptrs[disks - 1]; + + /* + * Compute syndrome with zero for the missing data pages + * Use the dead data pages as temporary storage for + * delta p and delta q + */ + dp = (u8 *)ptrs[faila]; + ptrs[faila] = (void *)raid6_empty_zero_page; + ptrs[disks - 2] = dp; + dq = (u8 *)ptrs[failb]; + ptrs[failb] = (void *)raid6_empty_zero_page; + ptrs[disks - 1] = dq; + + raid6_call.gen_syndrome(disks, bytes, ptrs); + + /* Restore pointer table */ + ptrs[faila] = dp; + ptrs[failb] = dq; + ptrs[disks - 2] = p; + ptrs[disks - 1] = q; + + /* Now, pick the proper data tables */ + pbmul = raid6_vgfmul[raid6_gfexi[failb - faila]]; + qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila] ^ raid6_gfexp[failb]]]; + + kernel_fpu_begin(); + + /* + * xr20, xr21: qmul + * xr22, xr23: pbmul + */ + asm volatile("vld $vr20, %0" : : "m" (qmul[0])); + asm volatile("vld $vr21, %0" : : "m" (qmul[16])); + asm volatile("vld $vr22, %0" : : "m" (pbmul[0])); + asm volatile("vld $vr23, %0" : : "m" (pbmul[16])); + asm volatile("xvreplve0.q $xr20, $xr20"); + asm volatile("xvreplve0.q $xr21, $xr21"); + asm volatile("xvreplve0.q $xr22, $xr22"); + asm volatile("xvreplve0.q $xr23, $xr23"); + + while (bytes) { + /* xr0, xr1: Q */ + asm volatile("xvld $xr0, %0" : : "m" (q[0])); + asm volatile("xvld $xr1, %0" : : "m" (q[32])); + /* xr0, xr1: Q + Qxy */ + asm volatile("xvld $xr4, %0" : : "m" (dq[0])); + asm volatile("xvld $xr5, %0" : : "m" (dq[32])); + asm volatile("xvxor.v $xr0, $xr0, $xr4"); + asm volatile("xvxor.v $xr1, $xr1, $xr5"); + /* xr2, xr3: P */ + asm volatile("xvld $xr2, %0" : : "m" (p[0])); + asm volatile("xvld $xr3, %0" : : "m" (p[32])); + /* xr2, xr3: P + Pxy */ + asm volatile("xvld $xr4, %0" : : "m" (dp[0])); + asm volatile("xvld $xr5, %0" : : "m" (dp[32])); + asm volatile("xvxor.v $xr2, $xr2, $xr4"); + asm volatile("xvxor.v $xr3, $xr3, $xr5"); + + /* xr4, xr5: higher 4 bits of each byte of (Q + Qxy) */ + asm volatile("xvsrli.b $xr4, $xr0, 4"); + asm volatile("xvsrli.b $xr5, $xr1, 4"); + /* xr0, xr1: lower 4 bits of each byte of (Q + Qxy) */ + asm volatile("xvandi.b $xr0, $xr0, 0x0f"); + asm volatile("xvandi.b $xr1, $xr1, 0x0f"); + /* lookup from qmul[0] */ + asm volatile("xvshuf.b $xr0, $xr20, $xr20, $xr0"); + asm volatile("xvshuf.b $xr1, $xr20, $xr20, $xr1"); + /* lookup from qmul[16] */ + asm volatile("xvshuf.b $xr4, $xr21, $xr21, $xr4"); + asm volatile("xvshuf.b $xr5, $xr21, $xr21, $xr5"); + /* xr6, xr7: B(Q + Qxy) */ + asm volatile("xvxor.v $xr6, $xr4, $xr0"); + asm volatile("xvxor.v $xr7, $xr5, $xr1"); + + /* xr4, xr5: higher 4 bits of each byte of (P + Pxy) */ + asm volatile("xvsrli.b $xr4, $xr2, 4"); + asm volatile("xvsrli.b $xr5, $xr3, 4"); + /* xr0, xr1: lower 4 bits of each byte of (P + Pxy) */ + asm volatile("xvandi.b $xr0, $xr2, 0x0f"); + asm volatile("xvandi.b $xr1, $xr3, 0x0f"); + /* lookup from pbmul[0] */ + asm volatile("xvshuf.b $xr0, $xr22, $xr22, $xr0"); + asm volatile("xvshuf.b $xr1, $xr22, $xr22, $xr1"); + /* lookup from pbmul[16] */ + asm volatile("xvshuf.b $xr4, $xr23, $xr23, $xr4"); + asm volatile("xvshuf.b $xr5, $xr23, $xr23, $xr5"); + /* xr0, xr1: A(P + Pxy) */ + asm volatile("xvxor.v $xr0, $xr0, $xr4"); + asm volatile("xvxor.v $xr1, $xr1, $xr5"); + + /* xr0, xr1: A(P + Pxy) + B(Q + Qxy) = Dx */ + asm volatile("xvxor.v $xr0, $xr0, $xr6"); + asm volatile("xvxor.v $xr1, $xr1, $xr7"); + + /* xr2, xr3: P + Pxy + Dx = Dy */ + asm volatile("xvxor.v $xr2, $xr2, $xr0"); + asm volatile("xvxor.v $xr3, $xr3, $xr1"); + + asm volatile("xvst $xr0, %0" : "=m" (dq[0])); + asm volatile("xvst $xr1, %0" : "=m" (dq[32])); + asm volatile("xvst $xr2, %0" : "=m" (dp[0])); + asm volatile("xvst $xr3, %0" : "=m" (dp[32])); + + bytes -= 64; + p += 64; + q += 64; + dp += 64; + dq += 64; + } + + kernel_fpu_end(); +} + +static void raid6_datap_recov_lasx(int disks, size_t bytes, int faila, + void **ptrs) +{ + u8 *p, *q, *dq; + const u8 *qmul; /* Q multiplier table */ + + p = (u8 *)ptrs[disks - 2]; + q = (u8 *)ptrs[disks - 1]; + + /* + * Compute syndrome with zero for the missing data page + * Use the dead data page as temporary storage for delta q + */ + dq = (u8 *)ptrs[faila]; + ptrs[faila] = (void *)raid6_empty_zero_page; + ptrs[disks - 1] = dq; + + raid6_call.gen_syndrome(disks, bytes, ptrs); + + /* Restore pointer table */ + ptrs[faila] = dq; + ptrs[disks - 1] = q; + + /* Now, pick the proper data tables */ + qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila]]]; + + kernel_fpu_begin(); + + /* xr22, xr23: qmul */ + asm volatile("vld $vr22, %0" : : "m" (qmul[0])); + asm volatile("xvreplve0.q $xr22, $xr22"); + asm volatile("vld $vr23, %0" : : "m" (qmul[16])); + asm volatile("xvreplve0.q $xr23, $xr23"); + + while (bytes) { + /* xr0, xr1: P + Dx */ + asm volatile("xvld $xr0, %0" : : "m" (p[0])); + asm volatile("xvld $xr1, %0" : : "m" (p[32])); + /* xr2, xr3: Qx */ + asm volatile("xvld $xr2, %0" : : "m" (dq[0])); + asm volatile("xvld $xr3, %0" : : "m" (dq[32])); + /* xr2, xr3: Q + Qx */ + asm volatile("xvld $xr4, %0" : : "m" (q[0])); + asm volatile("xvld $xr5, %0" : : "m" (q[32])); + asm volatile("xvxor.v $xr2, $xr2, $xr4"); + asm volatile("xvxor.v $xr3, $xr3, $xr5"); + + /* xr4, xr5: higher 4 bits of each byte of (Q + Qx) */ + asm volatile("xvsrli.b $xr4, $xr2, 4"); + asm volatile("xvsrli.b $xr5, $xr3, 4"); + /* xr2, xr3: lower 4 bits of each byte of (Q + Qx) */ + asm volatile("xvandi.b $xr2, $xr2, 0x0f"); + asm volatile("xvandi.b $xr3, $xr3, 0x0f"); + /* lookup from qmul[0] */ + asm volatile("xvshuf.b $xr2, $xr22, $xr22, $xr2"); + asm volatile("xvshuf.b $xr3, $xr22, $xr22, $xr3"); + /* lookup from qmul[16] */ + asm volatile("xvshuf.b $xr4, $xr23, $xr23, $xr4"); + asm volatile("xvshuf.b $xr5, $xr23, $xr23, $xr5"); + /* xr2, xr3: qmul(Q + Qx) = Dx */ + asm volatile("xvxor.v $xr2, $xr2, $xr4"); + asm volatile("xvxor.v $xr3, $xr3, $xr5"); + + /* xr0, xr1: P + Dx + Dx = P */ + asm volatile("xvxor.v $xr0, $xr0, $xr2"); + asm volatile("xvxor.v $xr1, $xr1, $xr3"); + + asm volatile("xvst $xr2, %0" : "=m" (dq[0])); + asm volatile("xvst $xr3, %0" : "=m" (dq[32])); + asm volatile("xvst $xr0, %0" : "=m" (p[0])); + asm volatile("xvst $xr1, %0" : "=m" (p[32])); + + bytes -= 64; + p += 64; + q += 64; + dq += 64; + } + + kernel_fpu_end(); +} + +const struct raid6_recov_calls raid6_recov_lasx = { + .data2 = raid6_2data_recov_lasx, + .datap = raid6_datap_recov_lasx, + .valid = raid6_has_lasx, + .name = "lasx", + .priority = 2, +}; +#endif /* CONFIG_CPU_HAS_LASX */ diff --git a/lib/raid6/test/Makefile b/lib/raid6/test/Makefile index 7b244bce32b3..2abe0076a636 100644 --- a/lib/raid6/test/Makefile +++ b/lib/raid6/test/Makefile @@ -65,7 +65,7 @@ else ifeq ($(HAS_ALTIVEC),yes) OBJS += altivec1.o altivec2.o altivec4.o altivec8.o \ vpermxor1.o vpermxor2.o vpermxor4.o vpermxor8.o else ifeq ($(ARCH),loongarch64) - OBJS += loongarch_simd.o + OBJS += loongarch_simd.o recov_loongarch_simd.o endif .c.o: -- cgit v1.2.3 From 9b04c764af18a1dab6d48ca0671f70cdcccf90a2 Mon Sep 17 00:00:00 2001 From: Qing Zhang Date: Wed, 6 Sep 2023 22:54:16 +0800 Subject: kasan: Add __HAVE_ARCH_SHADOW_MAP to support arch specific mapping MIPS, LoongArch and some other architectures have many holes between different segments and the valid address space (256T available) is insufficient to map all these segments to kasan shadow memory with the common formula provided by kasan core. So we need architecture specific mapping formulas to ensure different segments are mapped individually, and only limited space lengths of those specific segments are mapped to shadow. Therefore, when the incoming address is converted to a shadow, we need to add a condition to determine whether it is valid. Reviewed-by: Andrey Konovalov Signed-off-by: Qing Zhang Signed-off-by: Huacai Chen --- include/linux/kasan.h | 2 ++ mm/kasan/kasan.h | 6 ++++++ 2 files changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kasan.h b/include/linux/kasan.h index 819b6bc8ac08..3df5499f7936 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -54,11 +54,13 @@ extern p4d_t kasan_early_shadow_p4d[MAX_PTRS_PER_P4D]; int kasan_populate_early_shadow(const void *shadow_start, const void *shadow_end); +#ifndef __HAVE_ARCH_SHADOW_MAP static inline void *kasan_mem_to_shadow(const void *addr) { return (void *)((unsigned long)addr >> KASAN_SHADOW_SCALE_SHIFT) + KASAN_SHADOW_OFFSET; } +#endif int kasan_add_zero_shadow(void *start, unsigned long size); void kasan_remove_zero_shadow(void *start, unsigned long size); diff --git a/mm/kasan/kasan.h b/mm/kasan/kasan.h index 2e973b36fe07..f70e3d7a602e 100644 --- a/mm/kasan/kasan.h +++ b/mm/kasan/kasan.h @@ -291,16 +291,22 @@ struct kasan_stack_ring { #if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS) +#ifndef __HAVE_ARCH_SHADOW_MAP static inline const void *kasan_shadow_to_mem(const void *shadow_addr) { return (void *)(((unsigned long)shadow_addr - KASAN_SHADOW_OFFSET) << KASAN_SHADOW_SCALE_SHIFT); } +#endif static __always_inline bool addr_has_metadata(const void *addr) { +#ifdef __HAVE_ARCH_SHADOW_MAP + return (kasan_mem_to_shadow((void *)addr) != NULL); +#else return (kasan_reset_tag(addr) >= kasan_shadow_to_mem((void *)KASAN_SHADOW_START)); +#endif } /** -- cgit v1.2.3 From 08c6d8bae48c2c28f7017d7b61b5d5a1518ceb39 Mon Sep 17 00:00:00 2001 From: Lukasz Majewski Date: Tue, 5 Sep 2023 11:33:15 +0200 Subject: net: phy: Provide Module 4 KSZ9477 errata (DS80000754C) The KSZ9477 errata points out (in 'Module 4') the link up/down problems when EEE (Energy Efficient Ethernet) is enabled in the device to which the KSZ9477 tries to auto negotiate. The suggested workaround is to clear advertisement of EEE for PHYs in this chip driver. To avoid regressions with other switch ICs the new MICREL_NO_EEE flag has been introduced. Moreover, the in-register disablement of MMD_DEVICE_ID_EEE_ADV.MMD_EEE_ADV MMD register is removed, as this code is both; now executed too late (after previous rework of the PHY and DSA for KSZ switches) and not required as setting all members of eee_broken_modes bit field prevents the KSZ9477 from advertising EEE. Fixes: 69d3b36ca045 ("net: dsa: microchip: enable EEE support") # for KSZ9477 Signed-off-by: Lukasz Majewski Tested-by: Oleksij Rempel # Confirmed disabled EEE with oscilloscope. Reviewed-by: Oleksij Rempel Reviewed-by: Florian Fainelli Link: https://lore.kernel.org/r/20230905093315.784052-1-lukma@denx.de Signed-off-by: Jakub Kicinski --- drivers/net/dsa/microchip/ksz_common.c | 16 +++++++++++++++- drivers/net/phy/micrel.c | 9 ++++++--- include/linux/micrel_phy.h | 1 + 3 files changed, 22 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c index 6673122266b7..42db7679c360 100644 --- a/drivers/net/dsa/microchip/ksz_common.c +++ b/drivers/net/dsa/microchip/ksz_common.c @@ -2335,13 +2335,27 @@ static u32 ksz_get_phy_flags(struct dsa_switch *ds, int port) { struct ksz_device *dev = ds->priv; - if (dev->chip_id == KSZ8830_CHIP_ID) { + switch (dev->chip_id) { + case KSZ8830_CHIP_ID: /* Silicon Errata Sheet (DS80000830A): * Port 1 does not work with LinkMD Cable-Testing. * Port 1 does not respond to received PAUSE control frames. */ if (!port) return MICREL_KSZ8_P1_ERRATA; + break; + case KSZ9477_CHIP_ID: + /* KSZ9477 Errata DS80000754C + * + * Module 4: Energy Efficient Ethernet (EEE) feature select must + * be manually disabled + * The EEE feature is enabled by default, but it is not fully + * operational. It must be manually disabled through register + * controls. If not disabled, the PHY ports can auto-negotiate + * to enable EEE, and this feature can cause link drops when + * linked to another device supporting EEE. + */ + return MICREL_NO_EEE; } return 0; diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index b6d7981b2d1e..927d3d54658e 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -1800,9 +1800,6 @@ static const struct ksz9477_errata_write ksz9477_errata_writes[] = { /* Transmit waveform amplitude can be improved (1000BASE-T, 100BASE-TX, 10BASE-Te) */ {0x1c, 0x04, 0x00d0}, - /* Energy Efficient Ethernet (EEE) feature select must be manually disabled */ - {0x07, 0x3c, 0x0000}, - /* Register settings are required to meet data sheet supply current specifications */ {0x1c, 0x13, 0x6eff}, {0x1c, 0x14, 0xe6ff}, @@ -1847,6 +1844,12 @@ static int ksz9477_config_init(struct phy_device *phydev) return err; } + /* According to KSZ9477 Errata DS80000754C (Module 4) all EEE modes + * in this switch shall be regarded as broken. + */ + if (phydev->dev_flags & MICREL_NO_EEE) + phydev->eee_broken_modes = -1; + err = genphy_restart_aneg(phydev); if (err) return err; diff --git a/include/linux/micrel_phy.h b/include/linux/micrel_phy.h index 322d87255984..4e27ca7c49de 100644 --- a/include/linux/micrel_phy.h +++ b/include/linux/micrel_phy.h @@ -44,6 +44,7 @@ #define MICREL_PHY_50MHZ_CLK BIT(0) #define MICREL_PHY_FXEN BIT(1) #define MICREL_KSZ8_P1_ERRATA BIT(2) +#define MICREL_NO_EEE BIT(3) #define MICREL_KSZ9021_EXTREG_CTRL 0xB #define MICREL_KSZ9021_EXTREG_DATA_WRITE 0xC -- cgit v1.2.3 From 6afcf0fb92701487421aa73c692855aa70fbc796 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 7 Sep 2023 11:01:04 -0700 Subject: Revert "net: team: do not use dynamic lockdep key" This reverts commit 39285e124edbc752331e98ace37cc141a6a3747a. Looks like the change has unintended consequences in exposing objects before they are initialized. Let's drop this patch and try again in net-next. Reported-by: syzbot+44ae022028805f4600fc@syzkaller.appspotmail.com Fixes: 39285e124edb ("net: team: do not use dynamic lockdep key") Link: https://lore.kernel.org/all/20230907103124.6adb7256@kernel.org/ Signed-off-by: Jakub Kicinski --- drivers/net/team/team.c | 111 ++++++++++++++++--------------- drivers/net/team/team_mode_loadbalance.c | 4 +- include/linux/if_team.h | 30 +-------- 3 files changed, 60 insertions(+), 85 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index ad29122a5468..e8b94580194e 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -1135,8 +1135,8 @@ static int team_port_add(struct team *team, struct net_device *port_dev, struct netlink_ext_ack *extack) { struct net_device *dev = team->dev; - char *portname = port_dev->name; struct team_port *port; + char *portname = port_dev->name; int err; if (port_dev->flags & IFF_LOOPBACK) { @@ -1203,31 +1203,18 @@ static int team_port_add(struct team *team, struct net_device *port_dev, memcpy(port->orig.dev_addr, port_dev->dev_addr, port_dev->addr_len); - err = dev_open(port_dev, extack); - if (err) { - netdev_dbg(dev, "Device %s opening failed\n", - portname); - goto err_dev_open; - } - - err = team_upper_dev_link(team, port, extack); + err = team_port_enter(team, port); if (err) { - netdev_err(dev, "Device %s failed to set upper link\n", + netdev_err(dev, "Device %s failed to enter team mode\n", portname); - goto err_set_upper_link; + goto err_port_enter; } - /* lockdep subclass variable(dev->nested_level) was updated by - * team_upper_dev_link(). - */ - team_unlock(team); - team_lock(team); - - err = team_port_enter(team, port); + err = dev_open(port_dev, extack); if (err) { - netdev_err(dev, "Device %s failed to enter team mode\n", + netdev_dbg(dev, "Device %s opening failed\n", portname); - goto err_port_enter; + goto err_dev_open; } err = vlan_vids_add_by_dev(port_dev, dev); @@ -1255,6 +1242,13 @@ static int team_port_add(struct team *team, struct net_device *port_dev, goto err_handler_register; } + err = team_upper_dev_link(team, port, extack); + if (err) { + netdev_err(dev, "Device %s failed to set upper link\n", + portname); + goto err_set_upper_link; + } + err = __team_option_inst_add_port(team, port); if (err) { netdev_err(dev, "Device %s failed to add per-port options\n", @@ -1301,6 +1295,9 @@ err_set_slave_promisc: __team_option_inst_del_port(team, port); err_option_port_add: + team_upper_dev_unlink(team, port); + +err_set_upper_link: netdev_rx_handler_unregister(port_dev); err_handler_register: @@ -1310,16 +1307,13 @@ err_enable_netpoll: vlan_vids_del_by_dev(port_dev, dev); err_vids_add: - team_port_leave(team, port); - -err_port_enter: - team_upper_dev_unlink(team, port); - -err_set_upper_link: dev_close(port_dev); err_dev_open: + team_port_leave(team, port); team_port_set_orig_dev_addr(port); + +err_port_enter: dev_set_mtu(port_dev, port->orig.mtu); err_set_mtu: @@ -1622,7 +1616,6 @@ static int team_init(struct net_device *dev) int err; team->dev = dev; - mutex_init(&team->lock); team_set_no_mode(team); team->notifier_ctx = false; @@ -1650,6 +1643,8 @@ static int team_init(struct net_device *dev) goto err_options_register; netif_carrier_off(dev); + lockdep_register_key(&team->team_lock_key); + __mutex_init(&team->lock, "team->team_lock_key", &team->team_lock_key); netdev_lockdep_set_classes(dev); return 0; @@ -1670,7 +1665,7 @@ static void team_uninit(struct net_device *dev) struct team_port *port; struct team_port *tmp; - team_lock(team); + mutex_lock(&team->lock); list_for_each_entry_safe(port, tmp, &team->port_list, list) team_port_del(team, port->dev); @@ -1679,8 +1674,9 @@ static void team_uninit(struct net_device *dev) team_mcast_rejoin_fini(team); team_notify_peers_fini(team); team_queue_override_fini(team); - team_unlock(team); + mutex_unlock(&team->lock); netdev_change_features(dev); + lockdep_unregister_key(&team->team_lock_key); } static void team_destructor(struct net_device *dev) @@ -1794,18 +1790,18 @@ static void team_set_rx_mode(struct net_device *dev) static int team_set_mac_address(struct net_device *dev, void *p) { - struct team *team = netdev_priv(dev); struct sockaddr *addr = p; + struct team *team = netdev_priv(dev); struct team_port *port; if (dev->type == ARPHRD_ETHER && !is_valid_ether_addr(addr->sa_data)) return -EADDRNOTAVAIL; dev_addr_set(dev, addr->sa_data); - team_lock(team); + mutex_lock(&team->lock); list_for_each_entry(port, &team->port_list, list) if (team->ops.port_change_dev_addr) team->ops.port_change_dev_addr(team, port); - team_unlock(team); + mutex_unlock(&team->lock); return 0; } @@ -1819,7 +1815,7 @@ static int team_change_mtu(struct net_device *dev, int new_mtu) * Alhough this is reader, it's guarded by team lock. It's not possible * to traverse list in reverse under rcu_read_lock */ - team_lock(team); + mutex_lock(&team->lock); team->port_mtu_change_allowed = true; list_for_each_entry(port, &team->port_list, list) { err = dev_set_mtu(port->dev, new_mtu); @@ -1830,7 +1826,7 @@ static int team_change_mtu(struct net_device *dev, int new_mtu) } } team->port_mtu_change_allowed = false; - team_unlock(team); + mutex_unlock(&team->lock); dev->mtu = new_mtu; @@ -1840,7 +1836,7 @@ unwind: list_for_each_entry_continue_reverse(port, &team->port_list, list) dev_set_mtu(port->dev, dev->mtu); team->port_mtu_change_allowed = false; - team_unlock(team); + mutex_unlock(&team->lock); return err; } @@ -1894,20 +1890,20 @@ static int team_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid) * Alhough this is reader, it's guarded by team lock. It's not possible * to traverse list in reverse under rcu_read_lock */ - team_lock(team); + mutex_lock(&team->lock); list_for_each_entry(port, &team->port_list, list) { err = vlan_vid_add(port->dev, proto, vid); if (err) goto unwind; } - team_unlock(team); + mutex_unlock(&team->lock); return 0; unwind: list_for_each_entry_continue_reverse(port, &team->port_list, list) vlan_vid_del(port->dev, proto, vid); - team_unlock(team); + mutex_unlock(&team->lock); return err; } @@ -1917,10 +1913,10 @@ static int team_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, u16 vid) struct team *team = netdev_priv(dev); struct team_port *port; - team_lock(team); + mutex_lock(&team->lock); list_for_each_entry(port, &team->port_list, list) vlan_vid_del(port->dev, proto, vid); - team_unlock(team); + mutex_unlock(&team->lock); return 0; } @@ -1942,9 +1938,9 @@ static void team_netpoll_cleanup(struct net_device *dev) { struct team *team = netdev_priv(dev); - team_lock(team); + mutex_lock(&team->lock); __team_netpoll_cleanup(team); - team_unlock(team); + mutex_unlock(&team->lock); } static int team_netpoll_setup(struct net_device *dev, @@ -1954,7 +1950,7 @@ static int team_netpoll_setup(struct net_device *dev, struct team_port *port; int err = 0; - team_lock(team); + mutex_lock(&team->lock); list_for_each_entry(port, &team->port_list, list) { err = __team_port_enable_netpoll(port); if (err) { @@ -1962,7 +1958,7 @@ static int team_netpoll_setup(struct net_device *dev, break; } } - team_unlock(team); + mutex_unlock(&team->lock); return err; } #endif @@ -1973,9 +1969,9 @@ static int team_add_slave(struct net_device *dev, struct net_device *port_dev, struct team *team = netdev_priv(dev); int err; - team_lock(team); + mutex_lock(&team->lock); err = team_port_add(team, port_dev, extack); - team_unlock(team); + mutex_unlock(&team->lock); if (!err) netdev_change_features(dev); @@ -1988,12 +1984,19 @@ static int team_del_slave(struct net_device *dev, struct net_device *port_dev) struct team *team = netdev_priv(dev); int err; - team_lock(team); + mutex_lock(&team->lock); err = team_port_del(team, port_dev); - team_unlock(team); + mutex_unlock(&team->lock); - if (!err) - netdev_change_features(dev); + if (err) + return err; + + if (netif_is_team_master(port_dev)) { + lockdep_unregister_key(&team->team_lock_key); + lockdep_register_key(&team->team_lock_key); + lockdep_set_class(&team->lock, &team->team_lock_key); + } + netdev_change_features(dev); return err; } @@ -2313,13 +2316,13 @@ static struct team *team_nl_team_get(struct genl_info *info) } team = netdev_priv(dev); - __team_lock(team); + mutex_lock(&team->lock); return team; } static void team_nl_team_put(struct team *team) { - team_unlock(team); + mutex_unlock(&team->lock); dev_put(team->dev); } @@ -2981,9 +2984,9 @@ static void team_port_change_check(struct team_port *port, bool linkup) { struct team *team = port->team; - team_lock(team); + mutex_lock(&team->lock); __team_port_change_check(port, linkup); - team_unlock(team); + mutex_unlock(&team->lock); } diff --git a/drivers/net/team/team_mode_loadbalance.c b/drivers/net/team/team_mode_loadbalance.c index 7bcc9d37447a..00f8989c29c0 100644 --- a/drivers/net/team/team_mode_loadbalance.c +++ b/drivers/net/team/team_mode_loadbalance.c @@ -478,7 +478,7 @@ static void lb_stats_refresh(struct work_struct *work) team = lb_priv_ex->team; lb_priv = get_lb_priv(team); - if (!team_trylock(team)) { + if (!mutex_trylock(&team->lock)) { schedule_delayed_work(&lb_priv_ex->stats.refresh_dw, 0); return; } @@ -515,7 +515,7 @@ static void lb_stats_refresh(struct work_struct *work) schedule_delayed_work(&lb_priv_ex->stats.refresh_dw, (lb_priv_ex->stats.refresh_interval * HZ) / 10); - team_unlock(team); + mutex_unlock(&team->lock); } static void lb_stats_refresh_interval_get(struct team *team, diff --git a/include/linux/if_team.h b/include/linux/if_team.h index 12d4447fc8ab..1b9b15a492fa 100644 --- a/include/linux/if_team.h +++ b/include/linux/if_team.h @@ -221,38 +221,10 @@ struct team { atomic_t count_pending; struct delayed_work dw; } mcast_rejoin; + struct lock_class_key team_lock_key; long mode_priv[TEAM_MODE_PRIV_LONGS]; }; -static inline void __team_lock(struct team *team) -{ - mutex_lock(&team->lock); -} - -static inline int team_trylock(struct team *team) -{ - return mutex_trylock(&team->lock); -} - -#ifdef CONFIG_LOCKDEP -static inline void team_lock(struct team *team) -{ - ASSERT_RTNL(); - mutex_lock_nested(&team->lock, team->dev->nested_level); -} - -#else -static inline void team_lock(struct team *team) -{ - __team_lock(team); -} -#endif - -static inline void team_unlock(struct team *team) -{ - mutex_unlock(&team->lock); -} - static inline int team_dev_queue_xmit(struct team *team, struct team_port *port, struct sk_buff *skb) { -- cgit v1.2.3 From f94cf2206b066bd6d761d3347fd35f77b828c376 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 7 Sep 2023 09:40:07 -0400 Subject: buffer: Make bh_offset() work for compound pages If the buffer pointed to by the buffer_head is part of a compound page, bh_offset() assumes that b_page is the precise page that contains the data. A recent change to jbd2 inadvertently violated that assumption. By using page_size(), we support both b_page being set to the head page (as page_size() will return the size of the entire folio) and the precise page (as it will return PAGE_SIZE for a tail page). Fixes: 8147c4c4546f ("jbd2: use a folio in jbd2_journal_write_metadata_buffer()") Reported-by: Zorro Lang Tested-by: Ritesh Harjani (IBM) Signed-off-by: Matthew Wilcox (Oracle) --- include/linux/buffer_head.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 6cb3e9af78c9..4ba242073adc 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -173,7 +173,10 @@ static __always_inline int buffer_uptodate(const struct buffer_head *bh) return test_bit_acquire(BH_Uptodate, &bh->b_state); } -#define bh_offset(bh) ((unsigned long)(bh)->b_data & ~PAGE_MASK) +static inline unsigned long bh_offset(const struct buffer_head *bh) +{ + return (unsigned long)(bh)->b_data & (page_size(bh->b_page) - 1); +} /* If we *know* page->private refers to buffer_heads */ #define page_buffers(page) \ -- cgit v1.2.3 From 41a5db8d8161457b121a03fde999ff6e00090ee2 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Sun, 27 Aug 2023 08:27:34 -0700 Subject: bpf: Add support for non-fix-size percpu mem allocation This is needed for later percpu mem allocation when the allocation is done by bpf program. For such cases, a global bpf_global_percpu_ma is added where a flexible allocation size is needed. Signed-off-by: Yonghong Song Link: https://lore.kernel.org/r/20230827152734.1995725-1-yonghong.song@linux.dev Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 4 ++-- kernel/bpf/core.c | 8 +++++--- kernel/bpf/memalloc.c | 14 ++++++-------- 3 files changed, 13 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 024e8b28c34b..440dd1f59a1c 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -55,8 +55,8 @@ struct cgroup; extern struct idr btf_idr; extern spinlock_t btf_idr_lock; extern struct kobject *btf_kobj; -extern struct bpf_mem_alloc bpf_global_ma; -extern bool bpf_global_ma_set; +extern struct bpf_mem_alloc bpf_global_ma, bpf_global_percpu_ma; +extern bool bpf_global_ma_set, bpf_global_percpu_ma_set; typedef u64 (*bpf_callback_t)(u64, u64, u64, u64, u64); typedef int (*bpf_iter_init_seq_priv_t)(void *private_data, diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 0f8f036d8bd1..95599df82ee4 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -64,8 +64,8 @@ #define OFF insn->off #define IMM insn->imm -struct bpf_mem_alloc bpf_global_ma; -bool bpf_global_ma_set; +struct bpf_mem_alloc bpf_global_ma, bpf_global_percpu_ma; +bool bpf_global_ma_set, bpf_global_percpu_ma_set; /* No hurry in this branch * @@ -2921,7 +2921,9 @@ static int __init bpf_global_ma_init(void) ret = bpf_mem_alloc_init(&bpf_global_ma, 0, false); bpf_global_ma_set = !ret; - return ret; + ret = bpf_mem_alloc_init(&bpf_global_percpu_ma, 0, true); + bpf_global_percpu_ma_set = !ret; + return !bpf_global_ma_set || !bpf_global_percpu_ma_set; } late_initcall(bpf_global_ma_init); #endif diff --git a/kernel/bpf/memalloc.c b/kernel/bpf/memalloc.c index 9c49ae53deaf..cb60445de98a 100644 --- a/kernel/bpf/memalloc.c +++ b/kernel/bpf/memalloc.c @@ -499,15 +499,16 @@ int bpf_mem_alloc_init(struct bpf_mem_alloc *ma, int size, bool percpu) struct obj_cgroup *objcg = NULL; int cpu, i, unit_size, percpu_size = 0; + /* room for llist_node and per-cpu pointer */ + if (percpu) + percpu_size = LLIST_NODE_SZ + sizeof(void *); + if (size) { pc = __alloc_percpu_gfp(sizeof(*pc), 8, GFP_KERNEL); if (!pc) return -ENOMEM; - if (percpu) - /* room for llist_node and per-cpu pointer */ - percpu_size = LLIST_NODE_SZ + sizeof(void *); - else + if (!percpu) size += LLIST_NODE_SZ; /* room for llist_node */ unit_size = size; @@ -527,10 +528,6 @@ int bpf_mem_alloc_init(struct bpf_mem_alloc *ma, int size, bool percpu) return 0; } - /* size == 0 && percpu is an invalid combination */ - if (WARN_ON_ONCE(percpu)) - return -EINVAL; - pcc = __alloc_percpu_gfp(sizeof(*cc), 8, GFP_KERNEL); if (!pcc) return -ENOMEM; @@ -543,6 +540,7 @@ int bpf_mem_alloc_init(struct bpf_mem_alloc *ma, int size, bool percpu) c = &cc->cache[i]; c->unit_size = sizes[i]; c->objcg = objcg; + c->percpu_size = percpu_size; c->tgt = c; prefill_mem_cache(c, cpu); } -- cgit v1.2.3 From 55db92f42fe4a4ef7b4c2b4960c6212c8512dd53 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Sun, 27 Aug 2023 08:27:39 -0700 Subject: bpf: Add BPF_KPTR_PERCPU as a field type BPF_KPTR_PERCPU represents a percpu field type like below struct val_t { ... fields ... }; struct t { ... struct val_t __percpu_kptr *percpu_data_ptr; ... }; where #define __percpu_kptr __attribute__((btf_type_tag("percpu_kptr"))) While BPF_KPTR_REF points to a trusted kernel object or a trusted local object, BPF_KPTR_PERCPU points to a trusted local percpu object. This patch added basic support for BPF_KPTR_PERCPU related to percpu_kptr field parsing, recording and free operations. BPF_KPTR_PERCPU also supports the same map types as BPF_KPTR_REF does. Note that unlike a local kptr, it is possible that a BPF_KTPR_PERCPU struct may not contain any special fields like other kptr, bpf_spin_lock, bpf_list_head, etc. Signed-off-by: Yonghong Song Link: https://lore.kernel.org/r/20230827152739.1996391-1-yonghong.song@linux.dev Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 18 ++++++++++++------ kernel/bpf/btf.c | 5 +++++ kernel/bpf/syscall.c | 4 ++++ 3 files changed, 21 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 440dd1f59a1c..87eeb3a46a1d 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -180,14 +180,15 @@ enum btf_field_type { BPF_TIMER = (1 << 1), BPF_KPTR_UNREF = (1 << 2), BPF_KPTR_REF = (1 << 3), - BPF_KPTR = BPF_KPTR_UNREF | BPF_KPTR_REF, - BPF_LIST_HEAD = (1 << 4), - BPF_LIST_NODE = (1 << 5), - BPF_RB_ROOT = (1 << 6), - BPF_RB_NODE = (1 << 7), + BPF_KPTR_PERCPU = (1 << 4), + BPF_KPTR = BPF_KPTR_UNREF | BPF_KPTR_REF | BPF_KPTR_PERCPU, + BPF_LIST_HEAD = (1 << 5), + BPF_LIST_NODE = (1 << 6), + BPF_RB_ROOT = (1 << 7), + BPF_RB_NODE = (1 << 8), BPF_GRAPH_NODE_OR_ROOT = BPF_LIST_NODE | BPF_LIST_HEAD | BPF_RB_NODE | BPF_RB_ROOT, - BPF_REFCOUNT = (1 << 8), + BPF_REFCOUNT = (1 << 9), }; typedef void (*btf_dtor_kfunc_t)(void *); @@ -300,6 +301,8 @@ static inline const char *btf_field_type_name(enum btf_field_type type) case BPF_KPTR_UNREF: case BPF_KPTR_REF: return "kptr"; + case BPF_KPTR_PERCPU: + return "percpu_kptr"; case BPF_LIST_HEAD: return "bpf_list_head"; case BPF_LIST_NODE: @@ -325,6 +328,7 @@ static inline u32 btf_field_type_size(enum btf_field_type type) return sizeof(struct bpf_timer); case BPF_KPTR_UNREF: case BPF_KPTR_REF: + case BPF_KPTR_PERCPU: return sizeof(u64); case BPF_LIST_HEAD: return sizeof(struct bpf_list_head); @@ -351,6 +355,7 @@ static inline u32 btf_field_type_align(enum btf_field_type type) return __alignof__(struct bpf_timer); case BPF_KPTR_UNREF: case BPF_KPTR_REF: + case BPF_KPTR_PERCPU: return __alignof__(u64); case BPF_LIST_HEAD: return __alignof__(struct bpf_list_head); @@ -389,6 +394,7 @@ static inline void bpf_obj_init_field(const struct btf_field *field, void *addr) case BPF_TIMER: case BPF_KPTR_UNREF: case BPF_KPTR_REF: + case BPF_KPTR_PERCPU: break; default: WARN_ON_ONCE(1); diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 1095bbe29859..187b57276fec 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -3293,6 +3293,8 @@ static int btf_find_kptr(const struct btf *btf, const struct btf_type *t, type = BPF_KPTR_UNREF; else if (!strcmp("kptr", __btf_name_by_offset(btf, t->name_off))) type = BPF_KPTR_REF; + else if (!strcmp("percpu_kptr", __btf_name_by_offset(btf, t->name_off))) + type = BPF_KPTR_PERCPU; else return -EINVAL; @@ -3457,6 +3459,7 @@ static int btf_find_struct_field(const struct btf *btf, break; case BPF_KPTR_UNREF: case BPF_KPTR_REF: + case BPF_KPTR_PERCPU: ret = btf_find_kptr(btf, member_type, off, sz, idx < info_cnt ? &info[idx] : &tmp); if (ret < 0) @@ -3523,6 +3526,7 @@ static int btf_find_datasec_var(const struct btf *btf, const struct btf_type *t, break; case BPF_KPTR_UNREF: case BPF_KPTR_REF: + case BPF_KPTR_PERCPU: ret = btf_find_kptr(btf, var_type, off, sz, idx < info_cnt ? &info[idx] : &tmp); if (ret < 0) @@ -3783,6 +3787,7 @@ struct btf_record *btf_parse_fields(const struct btf *btf, const struct btf_type break; case BPF_KPTR_UNREF: case BPF_KPTR_REF: + case BPF_KPTR_PERCPU: ret = btf_parse_kptr(btf, &rec->fields[i], &info_arr[i]); if (ret < 0) goto end; diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index eb01c31ed591..6a692f3bea15 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -514,6 +514,7 @@ void btf_record_free(struct btf_record *rec) switch (rec->fields[i].type) { case BPF_KPTR_UNREF: case BPF_KPTR_REF: + case BPF_KPTR_PERCPU: if (rec->fields[i].kptr.module) module_put(rec->fields[i].kptr.module); btf_put(rec->fields[i].kptr.btf); @@ -560,6 +561,7 @@ struct btf_record *btf_record_dup(const struct btf_record *rec) switch (fields[i].type) { case BPF_KPTR_UNREF: case BPF_KPTR_REF: + case BPF_KPTR_PERCPU: btf_get(fields[i].kptr.btf); if (fields[i].kptr.module && !try_module_get(fields[i].kptr.module)) { ret = -ENXIO; @@ -650,6 +652,7 @@ void bpf_obj_free_fields(const struct btf_record *rec, void *obj) WRITE_ONCE(*(u64 *)field_ptr, 0); break; case BPF_KPTR_REF: + case BPF_KPTR_PERCPU: xchgd_field = (void *)xchg((unsigned long *)field_ptr, 0); if (!xchgd_field) break; @@ -1045,6 +1048,7 @@ static int map_check_btf(struct bpf_map *map, const struct btf *btf, break; case BPF_KPTR_UNREF: case BPF_KPTR_REF: + case BPF_KPTR_PERCPU: case BPF_REFCOUNT: if (map->map_type != BPF_MAP_TYPE_HASH && map->map_type != BPF_MAP_TYPE_PERCPU_HASH && -- cgit v1.2.3 From 01cc55af93884f1ff5a883426e1924378dfcc62a Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Sun, 27 Aug 2023 08:27:49 -0700 Subject: bpf: Add bpf_this_cpu_ptr/bpf_per_cpu_ptr support for allocated percpu obj The bpf helpers bpf_this_cpu_ptr() and bpf_per_cpu_ptr() are re-purposed for allocated percpu objects. For an allocated percpu obj, the reg type is 'PTR_TO_BTF_ID | MEM_PERCPU | MEM_RCU'. The return type for these two re-purposed helpera is 'PTR_TO_MEM | MEM_RCU | MEM_ALLOC'. The MEM_ALLOC allows that the per-cpu data can be read and written. Since the memory allocator bpf_mem_alloc() returns a ptr to a percpu ptr for percpu data, the first argument of bpf_this_cpu_ptr() and bpf_per_cpu_ptr() is patched with a dereference before passing to the helper func. Signed-off-by: Yonghong Song Link: https://lore.kernel.org/r/20230827152749.1997202-1-yonghong.song@linux.dev Signed-off-by: Alexei Starovoitov --- include/linux/bpf_verifier.h | 1 + kernel/bpf/verifier.c | 59 ++++++++++++++++++++++++++++++++++++++------ 2 files changed, 52 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index b6e58dab8e27..a3236651ec64 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -480,6 +480,7 @@ struct bpf_insn_aux_data { bool zext_dst; /* this insn zero extends dst reg */ bool storage_get_func_atomic; /* bpf_*_storage_get() with atomic memory alloc */ bool is_iter_next; /* bpf_iter__next() kfunc call */ + bool call_with_percpu_alloc_ptr; /* {this,per}_cpu_ptr() with prog percpu alloc */ u8 alu_state; /* used in combination with alu_limit */ /* below fields are initialized once */ diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 6c886ead18f6..6b7e7ca611f3 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -6221,7 +6221,7 @@ static int check_ptr_to_btf_access(struct bpf_verifier_env *env, } if (type_is_alloc(reg->type) && !type_is_non_owning_ref(reg->type) && - !reg->ref_obj_id) { + !(reg->type & MEM_RCU) && !reg->ref_obj_id) { verbose(env, "verifier internal error: ref_obj_id for allocated object must be non-zero\n"); return -EFAULT; } @@ -7765,6 +7765,7 @@ static const struct bpf_reg_types btf_ptr_types = { static const struct bpf_reg_types percpu_btf_ptr_types = { .types = { PTR_TO_BTF_ID | MEM_PERCPU, + PTR_TO_BTF_ID | MEM_PERCPU | MEM_RCU, PTR_TO_BTF_ID | MEM_PERCPU | PTR_TRUSTED, } }; @@ -7941,6 +7942,7 @@ found: } break; case PTR_TO_BTF_ID | MEM_PERCPU: + case PTR_TO_BTF_ID | MEM_PERCPU | MEM_RCU: case PTR_TO_BTF_ID | MEM_PERCPU | PTR_TRUSTED: /* Handled by helper specific checks */ break; @@ -9547,6 +9549,7 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn int *insn_idx_p) { enum bpf_prog_type prog_type = resolve_prog_type(env->prog); + bool returns_cpu_specific_alloc_ptr = false; const struct bpf_func_proto *fn = NULL; enum bpf_return_type ret_type; enum bpf_type_flag ret_flag; @@ -9785,6 +9788,23 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn break; } + case BPF_FUNC_per_cpu_ptr: + case BPF_FUNC_this_cpu_ptr: + { + struct bpf_reg_state *reg = ®s[BPF_REG_1]; + const struct btf_type *type; + + if (reg->type & MEM_RCU) { + type = btf_type_by_id(reg->btf, reg->btf_id); + if (!type || !btf_type_is_struct(type)) { + verbose(env, "Helper has invalid btf/btf_id in R1\n"); + return -EFAULT; + } + returns_cpu_specific_alloc_ptr = true; + env->insn_aux_data[insn_idx].call_with_percpu_alloc_ptr = true; + } + break; + } case BPF_FUNC_user_ringbuf_drain: err = __check_func_call(env, insn, insn_idx_p, meta.subprogno, set_user_ringbuf_callback_state); @@ -9874,14 +9894,18 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn regs[BPF_REG_0].type = PTR_TO_MEM | ret_flag; regs[BPF_REG_0].mem_size = tsize; } else { - /* MEM_RDONLY may be carried from ret_flag, but it - * doesn't apply on PTR_TO_BTF_ID. Fold it, otherwise - * it will confuse the check of PTR_TO_BTF_ID in - * check_mem_access(). - */ - ret_flag &= ~MEM_RDONLY; + if (returns_cpu_specific_alloc_ptr) { + regs[BPF_REG_0].type = PTR_TO_BTF_ID | MEM_ALLOC | MEM_RCU; + } else { + /* MEM_RDONLY may be carried from ret_flag, but it + * doesn't apply on PTR_TO_BTF_ID. Fold it, otherwise + * it will confuse the check of PTR_TO_BTF_ID in + * check_mem_access(). + */ + ret_flag &= ~MEM_RDONLY; + regs[BPF_REG_0].type = PTR_TO_BTF_ID | ret_flag; + } - regs[BPF_REG_0].type = PTR_TO_BTF_ID | ret_flag; regs[BPF_REG_0].btf = meta.ret_btf; regs[BPF_REG_0].btf_id = meta.ret_btf_id; } @@ -18676,6 +18700,25 @@ static int do_misc_fixups(struct bpf_verifier_env *env) goto patch_call_imm; } + /* bpf_per_cpu_ptr() and bpf_this_cpu_ptr() */ + if (env->insn_aux_data[i + delta].call_with_percpu_alloc_ptr) { + /* patch with 'r1 = *(u64 *)(r1 + 0)' since for percpu data, + * bpf_mem_alloc() returns a ptr to the percpu data ptr. + */ + insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, 0); + insn_buf[1] = *insn; + cnt = 2; + + new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt); + if (!new_prog) + return -ENOMEM; + + delta += cnt - 1; + env->prog = prog = new_prog; + insn = new_prog->insnsi + i + delta; + goto patch_call_imm; + } + /* BPF_EMIT_CALL() assumptions in some of the map_gen_lookup * and other inlining handlers are currently limited to 64 bit * only. -- cgit v1.2.3 From 6fdac58c560e4d164eb8161987bee045147cabe4 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Thu, 7 Sep 2023 22:19:12 -0400 Subject: tracing: Remove unused trace_event_file dir field Now that eventfs structure is used to create the events directory via the eventfs dynamically allocate code, the "dir" field of the trace_event_file structure is no longer used. Remove it. Link: https://lkml.kernel.org/r/20230908022001.580400115@goodmis.org Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Andrew Morton Cc: Ajay Kaher Signed-off-by: Steven Rostedt (Google) --- include/linux/trace_events.h | 1 - kernel/trace/trace_events.c | 13 ------------- 2 files changed, 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index eb5c3add939b..12f875e9e69a 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -650,7 +650,6 @@ struct trace_event_file { struct trace_event_call *event_call; struct event_filter __rcu *filter; struct eventfs_file *ef; - struct dentry *dir; struct trace_array *tr; struct trace_subsystem_dir *system; struct list_head triggers; diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 2af92177b765..065c63991858 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -992,19 +992,6 @@ static void remove_subsystem(struct trace_subsystem_dir *dir) static void remove_event_file_dir(struct trace_event_file *file) { - struct dentry *dir = file->dir; - struct dentry *child; - - if (dir) { - spin_lock(&dir->d_lock); /* probably unneeded */ - list_for_each_entry(child, &dir->d_subdirs, d_child) { - if (d_really_is_positive(child)) /* probably unneeded */ - d_inode(child)->i_private = NULL; - } - spin_unlock(&dir->d_lock); - - tracefs_remove(dir); - } eventfs_remove(file->ef); list_del(&file->list); remove_subsystem(file->system); -- cgit v1.2.3 From 5d153cd128251aaedc8e9657f0a949ec94952055 Mon Sep 17 00:00:00 2001 From: Steve French Date: Fri, 8 Sep 2023 16:34:59 -0500 Subject: spnego: add missing OID to oid registry Add missing OID to the registry. Some servers and clients (including Windows) now request "NEGOEX - SPNEGEO Extended Negotiation Security") See https://datatracker.ietf.org/doc/html/draft-zhu-negoex-02 Reviewed-by: Namjae Jeon Signed-off-by: Steve French --- include/linux/oid_registry.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/oid_registry.h b/include/linux/oid_registry.h index 0f4a8903922a..f86a08ba0207 100644 --- a/include/linux/oid_registry.h +++ b/include/linux/oid_registry.h @@ -67,6 +67,7 @@ enum OID { OID_msOutlookExpress, /* 1.3.6.1.4.1.311.16.4 */ OID_ntlmssp, /* 1.3.6.1.4.1.311.2.2.10 */ + OID_negoex, /* 1.3.6.1.4.1.311.2.2.30 */ OID_spnego, /* 1.3.6.1.5.5.2 */ -- cgit v1.2.3 From 24e0e61db3cb86a66824531989f1df80e0939f26 Mon Sep 17 00:00:00 2001 From: Niklas Cassel Date: Mon, 4 Sep 2023 22:42:56 +0200 Subject: ata: libata: disallow dev-initiated LPM transitions to unsupported states MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In AHCI 1.3.1, the register description for CAP.SSC: "When cleared to ‘0’, software must not allow the HBA to initiate transitions to the Slumber state via agressive link power management nor the PxCMD.ICC field in each port, and the PxSCTL.IPM field in each port must be programmed to disallow device initiated Slumber requests." In AHCI 1.3.1, the register description for CAP.PSC: "When cleared to ‘0’, software must not allow the HBA to initiate transitions to the Partial state via agressive link power management nor the PxCMD.ICC field in each port, and the PxSCTL.IPM field in each port must be programmed to disallow device initiated Partial requests." Ensure that we always set the corresponding bits in PxSCTL.IPM, such that a device is not allowed to initiate transitions to power states which are unsupported by the HBA. DevSleep is always initiated by the HBA, however, for completeness, set the corresponding bit in PxSCTL.IPM such that agressive link power management cannot transition to DevSleep if DevSleep is not supported. sata_link_scr_lpm() is used by libahci, ata_piix and libata-pmp. However, only libahci has the ability to read the CAP/CAP2 register to see if these features are supported. Therefore, in order to not introduce any regressions on ata_piix or libata-pmp, create flags that indicate that the respective feature is NOT supported. This way, the behavior for ata_piix and libata-pmp should remain unchanged. This change is based on a patch originally submitted by Runa Guo-oc. Signed-off-by: Niklas Cassel Fixes: 1152b2617a6e ("libata: implement sata_link_scr_lpm() and make ata_dev_set_feature() global") Cc: stable@vger.kernel.org Signed-off-by: Damien Le Moal --- drivers/ata/ahci.c | 9 +++++++++ drivers/ata/libata-sata.c | 19 ++++++++++++++++--- include/linux/libata.h | 4 ++++ 3 files changed, 29 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c index abb5911c9d09..08745e7db820 100644 --- a/drivers/ata/ahci.c +++ b/drivers/ata/ahci.c @@ -1883,6 +1883,15 @@ static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) else dev_info(&pdev->dev, "SSS flag set, parallel bus scan disabled\n"); + if (!(hpriv->cap & HOST_CAP_PART)) + host->flags |= ATA_HOST_NO_PART; + + if (!(hpriv->cap & HOST_CAP_SSC)) + host->flags |= ATA_HOST_NO_SSC; + + if (!(hpriv->cap2 & HOST_CAP2_SDS)) + host->flags |= ATA_HOST_NO_DEVSLP; + if (pi.flags & ATA_FLAG_EM) ahci_reset_em(host); diff --git a/drivers/ata/libata-sata.c b/drivers/ata/libata-sata.c index 5d31c08be013..a701e1538482 100644 --- a/drivers/ata/libata-sata.c +++ b/drivers/ata/libata-sata.c @@ -396,10 +396,23 @@ int sata_link_scr_lpm(struct ata_link *link, enum ata_lpm_policy policy, case ATA_LPM_MED_POWER_WITH_DIPM: case ATA_LPM_MIN_POWER_WITH_PARTIAL: case ATA_LPM_MIN_POWER: - if (ata_link_nr_enabled(link) > 0) - /* no restrictions on LPM transitions */ + if (ata_link_nr_enabled(link) > 0) { + /* assume no restrictions on LPM transitions */ scontrol &= ~(0x7 << 8); - else { + + /* + * If the controller does not support partial, slumber, + * or devsleep, then disallow these transitions. + */ + if (link->ap->host->flags & ATA_HOST_NO_PART) + scontrol |= (0x1 << 8); + + if (link->ap->host->flags & ATA_HOST_NO_SSC) + scontrol |= (0x2 << 8); + + if (link->ap->host->flags & ATA_HOST_NO_DEVSLP) + scontrol |= (0x4 << 8); + } else { /* empty port, power off */ scontrol &= ~0xf; scontrol |= (0x1 << 2); diff --git a/include/linux/libata.h b/include/linux/libata.h index 52d58b13e5ee..bf4913f4d7ac 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -222,6 +222,10 @@ enum { ATA_HOST_PARALLEL_SCAN = (1 << 2), /* Ports on this host can be scanned in parallel */ ATA_HOST_IGNORE_ATA = (1 << 3), /* Ignore ATA devices on this host. */ + ATA_HOST_NO_PART = (1 << 4), /* Host does not support partial */ + ATA_HOST_NO_SSC = (1 << 5), /* Host does not support slumber */ + ATA_HOST_NO_DEVSLP = (1 << 6), /* Host does not support devslp */ + /* bits 24:31 of host->flags are reserved for LLD specific flags */ /* various lengths of time */ -- cgit v1.2.3 From 2f4d3e293392571e02b106c8b431b638bd029276 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 1 Sep 2023 16:40:32 +0300 Subject: gpio: pca953x: Drop unused fields in struct pca953x_platform_data New code should solely use firmware nodes for the specifics and not any callbacks. Signed-off-by: Andy Shevchenko Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-pca953x.c | 37 ++++++++--------------------------- include/linux/platform_data/pca953x.h | 13 ------------ 2 files changed, 8 insertions(+), 42 deletions(-) (limited to 'include/linux') diff --git a/drivers/gpio/gpio-pca953x.c b/drivers/gpio/gpio-pca953x.c index bdd50a78e414..02695abd0eb1 100644 --- a/drivers/gpio/gpio-pca953x.c +++ b/drivers/gpio/gpio-pca953x.c @@ -211,7 +211,6 @@ struct pca953x_chip { struct i2c_client *client; struct gpio_chip gpio_chip; - const char *const *names; unsigned long driver_data; struct regulator *regulator; @@ -712,7 +711,6 @@ static void pca953x_setup_gpio(struct pca953x_chip *chip, int gpios) gc->label = dev_name(&chip->client->dev); gc->parent = &chip->client->dev; gc->owner = THIS_MODULE; - gc->names = chip->names; } #ifdef CONFIG_GPIO_PCA953X_IRQ @@ -998,7 +996,7 @@ static int pca953x_irq_setup(struct pca953x_chip *chip, } #endif -static int device_pca95xx_init(struct pca953x_chip *chip, u32 invert) +static int device_pca95xx_init(struct pca953x_chip *chip) { DECLARE_BITMAP(val, MAX_LINE); u8 regaddr; @@ -1016,24 +1014,21 @@ static int device_pca95xx_init(struct pca953x_chip *chip, u32 invert) if (ret) goto out; - /* set platform specific polarity inversion */ - if (invert) - bitmap_fill(val, MAX_LINE); - else - bitmap_zero(val, MAX_LINE); + /* clear polarity inversion */ + bitmap_zero(val, MAX_LINE); ret = pca953x_write_regs(chip, chip->regs->invert, val); out: return ret; } -static int device_pca957x_init(struct pca953x_chip *chip, u32 invert) +static int device_pca957x_init(struct pca953x_chip *chip) { DECLARE_BITMAP(val, MAX_LINE); unsigned int i; int ret; - ret = device_pca95xx_init(chip, invert); + ret = device_pca95xx_init(chip); if (ret) goto out; @@ -1054,9 +1049,8 @@ static int pca953x_probe(struct i2c_client *client) { struct pca953x_platform_data *pdata; struct pca953x_chip *chip; - int irq_base = 0; + int irq_base; int ret; - u32 invert = 0; struct regulator *reg; const struct regmap_config *regmap_config; @@ -1068,8 +1062,6 @@ static int pca953x_probe(struct i2c_client *client) if (pdata) { irq_base = pdata->irq_base; chip->gpio_start = pdata->gpio_base; - invert = pdata->invert; - chip->names = pdata->names; } else { struct gpio_desc *reset_gpio; @@ -1158,10 +1150,10 @@ static int pca953x_probe(struct i2c_client *client) */ if (PCA_CHIP_TYPE(chip->driver_data) == PCA957X_TYPE) { chip->regs = &pca957x_regs; - ret = device_pca957x_init(chip, invert); + ret = device_pca957x_init(chip); } else { chip->regs = &pca953x_regs; - ret = device_pca95xx_init(chip, invert); + ret = device_pca95xx_init(chip); } if (ret) goto err_exit; @@ -1174,13 +1166,6 @@ static int pca953x_probe(struct i2c_client *client) if (ret) goto err_exit; - if (pdata && pdata->setup) { - ret = pdata->setup(client, chip->gpio_chip.base, - chip->gpio_chip.ngpio, pdata->context); - if (ret < 0) - dev_warn(&client->dev, "setup failed, %d\n", ret); - } - return 0; err_exit: @@ -1190,14 +1175,8 @@ err_exit: static void pca953x_remove(struct i2c_client *client) { - struct pca953x_platform_data *pdata = dev_get_platdata(&client->dev); struct pca953x_chip *chip = i2c_get_clientdata(client); - if (pdata && pdata->teardown) { - pdata->teardown(client, chip->gpio_chip.base, - chip->gpio_chip.ngpio, pdata->context); - } - regulator_disable(chip->regulator); } diff --git a/include/linux/platform_data/pca953x.h b/include/linux/platform_data/pca953x.h index 96c1a14ab365..3c3787c4d96c 100644 --- a/include/linux/platform_data/pca953x.h +++ b/include/linux/platform_data/pca953x.h @@ -11,21 +11,8 @@ struct pca953x_platform_data { /* number of the first GPIO */ unsigned gpio_base; - /* initial polarity inversion setting */ - u32 invert; - /* interrupt base */ int irq_base; - - void *context; /* param to setup/teardown */ - - int (*setup)(struct i2c_client *client, - unsigned gpio, unsigned ngpio, - void *context); - void (*teardown)(struct i2c_client *client, - unsigned gpio, unsigned ngpio, - void *context); - const char *const *names; }; #endif /* _LINUX_PCA953X_H */ -- cgit v1.2.3 From cf8e8658100d4eae80ce9b21f7a81cb024dd5057 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 20 Oct 2022 15:54:33 +0200 Subject: arch: Remove Itanium (IA-64) architecture The Itanium architecture is obsolete, and an informal survey [0] reveals that any residual use of Itanium hardware in production is mostly HP-UX or OpenVMS based. The use of Linux on Itanium appears to be limited to enthusiasts that occasionally boot a fresh Linux kernel to see whether things are still working as intended, and perhaps to churn out some distro packages that are rarely used in practice. None of the original companies behind Itanium still produce or support any hardware or software for the architecture, and it is listed as 'Orphaned' in the MAINTAINERS file, as apparently, none of the engineers that contributed on behalf of those companies (nor anyone else, for that matter) have been willing to support or maintain the architecture upstream or even be responsible for applying the odd fix. The Intel firmware team removed all IA-64 support from the Tianocore/EDK2 reference implementation of EFI in 2018. (Itanium is the original architecture for which EFI was developed, and the way Linux supports it deviates significantly from other architectures.) Some distros, such as Debian and Gentoo, still maintain [unofficial] ia64 ports, but many have dropped support years ago. While the argument is being made [1] that there is a 'for the common good' angle to being able to build and run existing projects such as the Grid Community Toolkit [2] on Itanium for interoperability testing, the fact remains that none of those projects are known to be deployed on Linux/ia64, and very few people actually have access to such a system in the first place. Even if there were ways imaginable in which Linux/ia64 could be put to good use today, what matters is whether anyone is actually doing that, and this does not appear to be the case. There are no emulators widely available, and so boot testing Itanium is generally infeasible for ordinary contributors. GCC still supports IA-64 but its compile farm [3] no longer has any IA-64 machines. GLIBC would like to get rid of IA-64 [4] too because it would permit some overdue code cleanups. In summary, the benefits to the ecosystem of having IA-64 be part of it are mostly theoretical, whereas the maintenance overhead of keeping it supported is real. So let's rip off the band aid, and remove the IA-64 arch code entirely. This follows the timeline proposed by the Debian/ia64 maintainer [5], which removes support in a controlled manner, leaving IA-64 in a known good state in the most recent LTS release. Other projects will follow once the kernel support is removed. [0] https://lore.kernel.org/all/CAMj1kXFCMh_578jniKpUtx_j8ByHnt=s7S+yQ+vGbKt9ud7+kQ@mail.gmail.com/ [1] https://lore.kernel.org/all/0075883c-7c51-00f5-2c2d-5119c1820410@web.de/ [2] https://gridcf.org/gct-docs/latest/index.html [3] https://cfarm.tetaneutral.net/machines/list/ [4] https://lore.kernel.org/all/87bkiilpc4.fsf@mid.deneb.enyo.de/ [5] https://lore.kernel.org/all/ff58a3e76e5102c94bb5946d99187b358def688a.camel@physik.fu-berlin.de/ Acked-by: Tony Luck Signed-off-by: Ard Biesheuvel --- Documentation/arch/ia64/aliasing.rst | 246 --- Documentation/arch/ia64/efirtc.rst | 144 -- Documentation/arch/ia64/err_inject.rst | 1067 --------- Documentation/arch/ia64/features.rst | 3 - Documentation/arch/ia64/fsys.rst | 303 --- Documentation/arch/ia64/ia64.rst | 49 - Documentation/arch/ia64/index.rst | 19 - Documentation/arch/ia64/irq-redir.rst | 80 - Documentation/arch/ia64/mca.rst | 198 -- Documentation/arch/ia64/serial.rst | 165 -- Documentation/core-api/cpu_hotplug.rst | 6 - MAINTAINERS | 11 - arch/Kconfig | 1 - arch/ia64/Kbuild | 3 - arch/ia64/Kconfig | 394 ---- arch/ia64/Kconfig.debug | 55 - arch/ia64/Makefile | 82 - arch/ia64/configs/bigsur_defconfig | 102 - arch/ia64/configs/generic_defconfig | 206 -- arch/ia64/configs/gensparse_defconfig | 184 -- arch/ia64/configs/tiger_defconfig | 169 -- arch/ia64/configs/zx1_defconfig | 148 -- arch/ia64/hp/common/Makefile | 10 - arch/ia64/hp/common/aml_nfw.c | 232 -- arch/ia64/hp/common/sba_iommu.c | 2155 ------------------ arch/ia64/include/asm/Kbuild | 6 - arch/ia64/include/asm/acenv.h | 49 - arch/ia64/include/asm/acpi-ext.h | 17 - arch/ia64/include/asm/acpi.h | 110 - arch/ia64/include/asm/asm-offsets.h | 1 - arch/ia64/include/asm/asm-prototypes.h | 30 - arch/ia64/include/asm/asmmacro.h | 136 -- arch/ia64/include/asm/atomic.h | 216 -- arch/ia64/include/asm/barrier.h | 79 - arch/ia64/include/asm/bitops.h | 453 ---- arch/ia64/include/asm/bug.h | 19 - arch/ia64/include/asm/cache.h | 30 - arch/ia64/include/asm/cacheflush.h | 39 - arch/ia64/include/asm/checksum.h | 63 - arch/ia64/include/asm/clocksource.h | 11 - arch/ia64/include/asm/cmpxchg.h | 33 - arch/ia64/include/asm/cpu.h | 23 - arch/ia64/include/asm/cputime.h | 21 - arch/ia64/include/asm/current.h | 18 - arch/ia64/include/asm/cyclone.h | 16 - arch/ia64/include/asm/delay.h | 89 - arch/ia64/include/asm/device.h | 14 - arch/ia64/include/asm/div64.h | 1 - arch/ia64/include/asm/dma-mapping.h | 16 - arch/ia64/include/asm/dma.h | 17 - arch/ia64/include/asm/dmi.h | 15 - arch/ia64/include/asm/early_ioremap.h | 11 - arch/ia64/include/asm/efi.h | 13 - arch/ia64/include/asm/elf.h | 233 -- arch/ia64/include/asm/emergency-restart.h | 6 - arch/ia64/include/asm/esi.h | 30 - arch/ia64/include/asm/exception.h | 23 - arch/ia64/include/asm/extable.h | 12 - arch/ia64/include/asm/fb.h | 43 - arch/ia64/include/asm/fpswa.h | 74 - arch/ia64/include/asm/ftrace.h | 28 - arch/ia64/include/asm/futex.h | 109 - arch/ia64/include/asm/gcc_intrin.h | 13 - arch/ia64/include/asm/hardirq.h | 27 - arch/ia64/include/asm/hugetlb.h | 34 - arch/ia64/include/asm/hw_irq.h | 167 -- arch/ia64/include/asm/idle.h | 8 - arch/ia64/include/asm/intrinsics.h | 13 - arch/ia64/include/asm/io.h | 271 --- arch/ia64/include/asm/iommu.h | 22 - arch/ia64/include/asm/iosapic.h | 106 - arch/ia64/include/asm/irq.h | 37 - arch/ia64/include/asm/irq_regs.h | 1 - arch/ia64/include/asm/irq_remapping.h | 5 - arch/ia64/include/asm/irqflags.h | 95 - arch/ia64/include/asm/kdebug.h | 45 - arch/ia64/include/asm/kexec.h | 46 - arch/ia64/include/asm/kprobes.h | 116 - arch/ia64/include/asm/kregs.h | 166 -- arch/ia64/include/asm/libata-portmap.h | 9 - arch/ia64/include/asm/linkage.h | 19 - arch/ia64/include/asm/local.h | 1 - arch/ia64/include/asm/mca.h | 185 -- arch/ia64/include/asm/mca_asm.h | 245 --- arch/ia64/include/asm/meminit.h | 59 - arch/ia64/include/asm/mman.h | 18 - arch/ia64/include/asm/mmiowb.h | 17 - arch/ia64/include/asm/mmu.h | 14 - arch/ia64/include/asm/mmu_context.h | 194 -- arch/ia64/include/asm/mmzone.h | 35 - arch/ia64/include/asm/module.h | 35 - arch/ia64/include/asm/module.lds.h | 14 - arch/ia64/include/asm/msidef.h | 43 - arch/ia64/include/asm/native/inst.h | 119 - arch/ia64/include/asm/native/irq.h | 20 - arch/ia64/include/asm/native/patchlist.h | 24 - arch/ia64/include/asm/nodedata.h | 63 - arch/ia64/include/asm/numa.h | 83 - arch/ia64/include/asm/page.h | 208 -- arch/ia64/include/asm/pal.h | 1827 --------------- arch/ia64/include/asm/param.h | 18 - arch/ia64/include/asm/parport.h | 20 - arch/ia64/include/asm/patch.h | 28 - arch/ia64/include/asm/pci.h | 66 - arch/ia64/include/asm/percpu.h | 53 - arch/ia64/include/asm/pgalloc.h | 64 - arch/ia64/include/asm/pgtable.h | 545 ----- arch/ia64/include/asm/processor.h | 660 ------ arch/ia64/include/asm/ptrace.h | 146 -- arch/ia64/include/asm/sal.h | 919 -------- arch/ia64/include/asm/sections.h | 33 - arch/ia64/include/asm/serial.h | 17 - arch/ia64/include/asm/shmparam.h | 13 - arch/ia64/include/asm/signal.h | 33 - arch/ia64/include/asm/smp.h | 103 - arch/ia64/include/asm/sn/intr.h | 15 - arch/ia64/include/asm/sn/sn_sal.h | 124 -- arch/ia64/include/asm/sparsemem.h | 28 - arch/ia64/include/asm/spinlock.h | 265 --- arch/ia64/include/asm/spinlock_types.h | 22 - arch/ia64/include/asm/string.h | 22 - arch/ia64/include/asm/switch_to.h | 71 - arch/ia64/include/asm/syscall.h | 65 - arch/ia64/include/asm/thread_info.h | 131 -- arch/ia64/include/asm/timex.h | 47 - arch/ia64/include/asm/tlb.h | 50 - arch/ia64/include/asm/tlbflush.h | 128 -- arch/ia64/include/asm/topology.h | 56 - arch/ia64/include/asm/types.h | 32 - arch/ia64/include/asm/uaccess.h | 265 --- arch/ia64/include/asm/uncached.h | 9 - arch/ia64/include/asm/unistd.h | 38 - arch/ia64/include/asm/unwind.h | 234 -- arch/ia64/include/asm/user.h | 53 - arch/ia64/include/asm/ustack.h | 12 - arch/ia64/include/asm/uv/uv.h | 30 - arch/ia64/include/asm/uv/uv_hub.h | 315 --- arch/ia64/include/asm/uv/uv_mmrs.h | 825 ------- arch/ia64/include/asm/vermagic.h | 15 - arch/ia64/include/asm/vga.h | 26 - arch/ia64/include/asm/vmalloc.h | 4 - arch/ia64/include/asm/xor.h | 30 - arch/ia64/include/asm/xtp.h | 46 - arch/ia64/include/uapi/asm/Kbuild | 2 - arch/ia64/include/uapi/asm/auxvec.h | 14 - arch/ia64/include/uapi/asm/bitsperlong.h | 9 - arch/ia64/include/uapi/asm/break.h | 23 - arch/ia64/include/uapi/asm/byteorder.h | 7 - arch/ia64/include/uapi/asm/cmpxchg.h | 138 -- arch/ia64/include/uapi/asm/fcntl.h | 15 - arch/ia64/include/uapi/asm/fpu.h | 67 - arch/ia64/include/uapi/asm/gcc_intrin.h | 619 ------ arch/ia64/include/uapi/asm/ia64regs.h | 101 - arch/ia64/include/uapi/asm/intrinsics.h | 82 - arch/ia64/include/uapi/asm/mman.h | 17 - arch/ia64/include/uapi/asm/param.h | 30 - arch/ia64/include/uapi/asm/posix_types.h | 9 - arch/ia64/include/uapi/asm/ptrace.h | 248 --- arch/ia64/include/uapi/asm/ptrace_offsets.h | 269 --- arch/ia64/include/uapi/asm/resource.h | 8 - arch/ia64/include/uapi/asm/rse.h | 67 - arch/ia64/include/uapi/asm/setup.h | 25 - arch/ia64/include/uapi/asm/sigcontext.h | 71 - arch/ia64/include/uapi/asm/siginfo.h | 28 - arch/ia64/include/uapi/asm/signal.h | 98 - arch/ia64/include/uapi/asm/stat.h | 52 - arch/ia64/include/uapi/asm/statfs.h | 21 - arch/ia64/include/uapi/asm/swab.h | 35 - arch/ia64/include/uapi/asm/types.h | 32 - arch/ia64/include/uapi/asm/ucontext.h | 13 - arch/ia64/include/uapi/asm/unistd.h | 22 - arch/ia64/include/uapi/asm/ustack.h | 13 - arch/ia64/install.sh | 30 - arch/ia64/kernel/.gitignore | 3 - arch/ia64/kernel/Makefile | 46 - arch/ia64/kernel/Makefile.gate | 29 - arch/ia64/kernel/acpi-ext.c | 101 - arch/ia64/kernel/acpi.c | 913 -------- arch/ia64/kernel/asm-offsets.c | 289 --- arch/ia64/kernel/audit.c | 63 - arch/ia64/kernel/brl_emu.c | 217 -- arch/ia64/kernel/crash.c | 257 --- arch/ia64/kernel/crash_dump.c | 27 - arch/ia64/kernel/cyclone.c | 125 -- arch/ia64/kernel/dma-mapping.c | 9 - arch/ia64/kernel/efi.c | 1360 ------------ arch/ia64/kernel/efi_stub.S | 87 - arch/ia64/kernel/elfcore.c | 77 - arch/ia64/kernel/entry.S | 1427 ------------ arch/ia64/kernel/entry.h | 83 - arch/ia64/kernel/err_inject.c | 273 --- arch/ia64/kernel/esi.c | 193 -- arch/ia64/kernel/esi_stub.S | 99 - arch/ia64/kernel/fsys.S | 837 ------- arch/ia64/kernel/fsyscall_gtod_data.h | 30 - arch/ia64/kernel/ftrace.c | 196 -- arch/ia64/kernel/gate-data.S | 3 - arch/ia64/kernel/gate.S | 380 ---- arch/ia64/kernel/gate.lds.S | 108 - arch/ia64/kernel/head.S | 1167 ---------- arch/ia64/kernel/iosapic.c | 1137 ---------- arch/ia64/kernel/irq.c | 181 -- arch/ia64/kernel/irq.h | 3 - arch/ia64/kernel/irq_ia64.c | 645 ------ arch/ia64/kernel/irq_lsapic.c | 45 - arch/ia64/kernel/ivt.S | 1688 -------------- arch/ia64/kernel/kprobes.c | 911 -------- arch/ia64/kernel/machine_kexec.c | 163 -- arch/ia64/kernel/mca.c | 2111 ------------------ arch/ia64/kernel/mca_asm.S | 1123 ---------- arch/ia64/kernel/mca_drv.c | 796 ------- arch/ia64/kernel/mca_drv.h | 123 -- arch/ia64/kernel/mca_drv_asm.S | 56 - arch/ia64/kernel/minstate.h | 251 --- arch/ia64/kernel/module.c | 959 -------- arch/ia64/kernel/msi_ia64.c | 198 -- arch/ia64/kernel/numa.c | 73 - arch/ia64/kernel/pal.S | 306 --- arch/ia64/kernel/palinfo.c | 942 -------- arch/ia64/kernel/patch.c | 237 -- arch/ia64/kernel/pci-dma.c | 33 - arch/ia64/kernel/perfmon_itanium.h | 116 - arch/ia64/kernel/process.c | 611 ------ arch/ia64/kernel/ptrace.c | 2012 ----------------- arch/ia64/kernel/relocate_kernel.S | 321 --- arch/ia64/kernel/sal.c | 400 ---- arch/ia64/kernel/salinfo.c | 646 ------ arch/ia64/kernel/setup.c | 1081 --------- arch/ia64/kernel/sigframe.h | 26 - arch/ia64/kernel/signal.c | 412 ---- arch/ia64/kernel/smp.c | 335 --- arch/ia64/kernel/smpboot.c | 839 ------- arch/ia64/kernel/stacktrace.c | 40 - arch/ia64/kernel/sys_ia64.c | 197 -- arch/ia64/kernel/syscalls/Makefile | 32 - arch/ia64/kernel/syscalls/syscall.tbl | 375 ---- arch/ia64/kernel/time.c | 463 ---- arch/ia64/kernel/topology.c | 410 ---- arch/ia64/kernel/traps.c | 612 ------ arch/ia64/kernel/unaligned.c | 1560 ------------- arch/ia64/kernel/uncached.c | 273 --- arch/ia64/kernel/unwind.c | 2320 -------------------- arch/ia64/kernel/unwind_decoder.c | 460 ---- arch/ia64/kernel/unwind_i.h | 165 -- arch/ia64/kernel/vmlinux.lds.S | 224 -- arch/ia64/lib/Makefile | 48 - arch/ia64/lib/checksum.c | 102 - arch/ia64/lib/clear_page.S | 79 - arch/ia64/lib/clear_user.S | 212 -- arch/ia64/lib/copy_page.S | 101 - arch/ia64/lib/copy_page_mck.S | 188 -- arch/ia64/lib/copy_user.S | 613 ------ arch/ia64/lib/csum_partial_copy.c | 98 - arch/ia64/lib/do_csum.S | 324 --- arch/ia64/lib/flush.S | 119 - arch/ia64/lib/idiv32.S | 86 - arch/ia64/lib/idiv64.S | 83 - arch/ia64/lib/io.c | 51 - arch/ia64/lib/ip_fast_csum.S | 148 -- arch/ia64/lib/memcpy.S | 304 --- arch/ia64/lib/memcpy_mck.S | 659 ------ arch/ia64/lib/memset.S | 365 --- arch/ia64/lib/strlen.S | 195 -- arch/ia64/lib/strncpy_from_user.S | 47 - arch/ia64/lib/strnlen_user.S | 48 - arch/ia64/lib/xor.S | 181 -- arch/ia64/mm/Makefile | 11 - arch/ia64/mm/contig.c | 208 -- arch/ia64/mm/discontig.c | 635 ------ arch/ia64/mm/extable.c | 24 - arch/ia64/mm/fault.c | 251 --- arch/ia64/mm/hugetlbpage.c | 186 -- arch/ia64/mm/init.c | 532 ----- arch/ia64/mm/ioremap.c | 94 - arch/ia64/mm/numa.c | 80 - arch/ia64/mm/tlb.c | 591 ----- arch/ia64/pci/Makefile | 5 - arch/ia64/pci/fixup.c | 80 - arch/ia64/pci/pci.c | 576 ----- arch/ia64/scripts/check-gas | 16 - arch/ia64/scripts/check-gas-asm.S | 2 - arch/ia64/scripts/check-model.c | 1 - arch/ia64/scripts/check-segrel.S | 5 - arch/ia64/scripts/check-segrel.lds | 13 - arch/ia64/scripts/check-serialize.S | 2 - arch/ia64/scripts/check-text-align.S | 7 - arch/ia64/scripts/toolchain-flags | 54 - arch/ia64/scripts/unwcheck.py | 65 - arch/ia64/uv/Makefile | 12 - arch/ia64/uv/kernel/Makefile | 12 - arch/ia64/uv/kernel/setup.c | 120 - drivers/acpi/Kconfig | 6 +- drivers/acpi/numa/Kconfig | 4 +- drivers/acpi/osl.c | 2 +- drivers/char/Kconfig | 4 +- drivers/char/Makefile | 1 - drivers/char/agp/Kconfig | 16 +- drivers/char/agp/Makefile | 2 - drivers/char/agp/hp-agp.c | 550 ----- drivers/char/agp/i460-agp.c | 659 ------ drivers/char/hpet.c | 30 - drivers/char/hw_random/Kconfig | 2 +- drivers/char/mem.c | 12 - drivers/char/mspec.c | 295 --- drivers/cpufreq/Kconfig | 11 - drivers/cpufreq/Makefile | 1 - drivers/cpufreq/ia64-acpi-cpufreq.c | 353 --- drivers/firmware/Kconfig | 24 - drivers/firmware/Makefile | 1 - drivers/firmware/efi/Kconfig | 6 +- drivers/firmware/efi/efi.c | 13 +- drivers/firmware/pcdp.c | 135 -- drivers/firmware/pcdp.h | 108 - drivers/gpu/drm/drm_ioc32.c | 4 +- drivers/input/serio/i8042.h | 2 +- drivers/iommu/Kconfig | 4 +- drivers/iommu/intel/Kconfig | 2 +- drivers/media/cec/platform/Kconfig | 2 +- drivers/misc/Kconfig | 2 +- drivers/misc/sgi-gru/gru.h | 4 +- drivers/misc/sgi-gru/gru_instructions.h | 12 +- drivers/misc/sgi-gru/grufile.c | 72 - drivers/misc/sgi-gru/gruhandles.c | 6 - drivers/misc/sgi-gru/grumain.c | 4 - drivers/misc/sgi-xp/xp.h | 2 +- drivers/misc/sgi-xp/xp_uv.c | 24 - drivers/misc/sgi-xp/xpc_main.c | 31 - drivers/misc/sgi-xp/xpc_uv.c | 85 - drivers/net/ethernet/broadcom/tg3.c | 2 +- drivers/net/ethernet/brocade/bna/bnad.h | 1 - .../net/ethernet/qlogic/netxen/netxen_nic_main.c | 2 - drivers/pci/vgaarb.c | 2 +- drivers/tty/serial/8250/Kconfig | 2 +- drivers/tty/vt/keyboard.c | 2 +- drivers/video/fbdev/Kconfig | 2 +- drivers/watchdog/Kconfig | 2 +- fs/Kconfig | 2 +- fs/afs/main.c | 2 - fs/xfs/xfs_ioctl32.h | 2 +- include/linux/acpi.h | 9 +- include/linux/efi.h | 7 - include/linux/mm.h | 2 - include/linux/moduleparam.h | 2 +- include/trace/events/mmflags.h | 2 +- init/Kconfig | 2 +- kernel/cpu.c | 3 - kernel/fork.c | 2 +- kernel/sched/core.c | 29 +- kernel/sysctl.c | 9 - lib/Kconfig.debug | 2 +- lib/decompress_unxz.c | 3 - lib/xz/Kconfig | 5 - mm/mmap.c | 6 +- tools/arch/ia64/include/asm/barrier.h | 59 - tools/arch/ia64/include/uapi/asm/bitsperlong.h | 9 - tools/arch/ia64/include/uapi/asm/mman.h | 7 - usr/include/Makefile | 6 - 357 files changed, 45 insertions(+), 64955 deletions(-) delete mode 100644 Documentation/arch/ia64/aliasing.rst delete mode 100644 Documentation/arch/ia64/efirtc.rst delete mode 100644 Documentation/arch/ia64/err_inject.rst delete mode 100644 Documentation/arch/ia64/features.rst delete mode 100644 Documentation/arch/ia64/fsys.rst delete mode 100644 Documentation/arch/ia64/ia64.rst delete mode 100644 Documentation/arch/ia64/index.rst delete mode 100644 Documentation/arch/ia64/irq-redir.rst delete mode 100644 Documentation/arch/ia64/mca.rst delete mode 100644 Documentation/arch/ia64/serial.rst delete mode 100644 arch/ia64/Kbuild delete mode 100644 arch/ia64/Kconfig delete mode 100644 arch/ia64/Kconfig.debug delete mode 100644 arch/ia64/Makefile delete mode 100644 arch/ia64/configs/bigsur_defconfig delete mode 100644 arch/ia64/configs/generic_defconfig delete mode 100644 arch/ia64/configs/gensparse_defconfig delete mode 100644 arch/ia64/configs/tiger_defconfig delete mode 100644 arch/ia64/configs/zx1_defconfig delete mode 100644 arch/ia64/hp/common/Makefile delete mode 100644 arch/ia64/hp/common/aml_nfw.c delete mode 100644 arch/ia64/hp/common/sba_iommu.c delete mode 100644 arch/ia64/include/asm/Kbuild delete mode 100644 arch/ia64/include/asm/acenv.h delete mode 100644 arch/ia64/include/asm/acpi-ext.h delete mode 100644 arch/ia64/include/asm/acpi.h delete mode 100644 arch/ia64/include/asm/asm-offsets.h delete mode 100644 arch/ia64/include/asm/asm-prototypes.h delete mode 100644 arch/ia64/include/asm/asmmacro.h delete mode 100644 arch/ia64/include/asm/atomic.h delete mode 100644 arch/ia64/include/asm/barrier.h delete mode 100644 arch/ia64/include/asm/bitops.h delete mode 100644 arch/ia64/include/asm/bug.h delete mode 100644 arch/ia64/include/asm/cache.h delete mode 100644 arch/ia64/include/asm/cacheflush.h delete mode 100644 arch/ia64/include/asm/checksum.h delete mode 100644 arch/ia64/include/asm/clocksource.h delete mode 100644 arch/ia64/include/asm/cmpxchg.h delete mode 100644 arch/ia64/include/asm/cpu.h delete mode 100644 arch/ia64/include/asm/cputime.h delete mode 100644 arch/ia64/include/asm/current.h delete mode 100644 arch/ia64/include/asm/cyclone.h delete mode 100644 arch/ia64/include/asm/delay.h delete mode 100644 arch/ia64/include/asm/device.h delete mode 100644 arch/ia64/include/asm/div64.h delete mode 100644 arch/ia64/include/asm/dma-mapping.h delete mode 100644 arch/ia64/include/asm/dma.h delete mode 100644 arch/ia64/include/asm/dmi.h delete mode 100644 arch/ia64/include/asm/early_ioremap.h delete mode 100644 arch/ia64/include/asm/efi.h delete mode 100644 arch/ia64/include/asm/elf.h delete mode 100644 arch/ia64/include/asm/emergency-restart.h delete mode 100644 arch/ia64/include/asm/esi.h delete mode 100644 arch/ia64/include/asm/exception.h delete mode 100644 arch/ia64/include/asm/extable.h delete mode 100644 arch/ia64/include/asm/fb.h delete mode 100644 arch/ia64/include/asm/fpswa.h delete mode 100644 arch/ia64/include/asm/ftrace.h delete mode 100644 arch/ia64/include/asm/futex.h delete mode 100644 arch/ia64/include/asm/gcc_intrin.h delete mode 100644 arch/ia64/include/asm/hardirq.h delete mode 100644 arch/ia64/include/asm/hugetlb.h delete mode 100644 arch/ia64/include/asm/hw_irq.h delete mode 100644 arch/ia64/include/asm/idle.h delete mode 100644 arch/ia64/include/asm/intrinsics.h delete mode 100644 arch/ia64/include/asm/io.h delete mode 100644 arch/ia64/include/asm/iommu.h delete mode 100644 arch/ia64/include/asm/iosapic.h delete mode 100644 arch/ia64/include/asm/irq.h delete mode 100644 arch/ia64/include/asm/irq_regs.h delete mode 100644 arch/ia64/include/asm/irq_remapping.h delete mode 100644 arch/ia64/include/asm/irqflags.h delete mode 100644 arch/ia64/include/asm/kdebug.h delete mode 100644 arch/ia64/include/asm/kexec.h delete mode 100644 arch/ia64/include/asm/kprobes.h delete mode 100644 arch/ia64/include/asm/kregs.h delete mode 100644 arch/ia64/include/asm/libata-portmap.h delete mode 100644 arch/ia64/include/asm/linkage.h delete mode 100644 arch/ia64/include/asm/local.h delete mode 100644 arch/ia64/include/asm/mca.h delete mode 100644 arch/ia64/include/asm/mca_asm.h delete mode 100644 arch/ia64/include/asm/meminit.h delete mode 100644 arch/ia64/include/asm/mman.h delete mode 100644 arch/ia64/include/asm/mmiowb.h delete mode 100644 arch/ia64/include/asm/mmu.h delete mode 100644 arch/ia64/include/asm/mmu_context.h delete mode 100644 arch/ia64/include/asm/mmzone.h delete mode 100644 arch/ia64/include/asm/module.h delete mode 100644 arch/ia64/include/asm/module.lds.h delete mode 100644 arch/ia64/include/asm/msidef.h delete mode 100644 arch/ia64/include/asm/native/inst.h delete mode 100644 arch/ia64/include/asm/native/irq.h delete mode 100644 arch/ia64/include/asm/native/patchlist.h delete mode 100644 arch/ia64/include/asm/nodedata.h delete mode 100644 arch/ia64/include/asm/numa.h delete mode 100644 arch/ia64/include/asm/page.h delete mode 100644 arch/ia64/include/asm/pal.h delete mode 100644 arch/ia64/include/asm/param.h delete mode 100644 arch/ia64/include/asm/parport.h delete mode 100644 arch/ia64/include/asm/patch.h delete mode 100644 arch/ia64/include/asm/pci.h delete mode 100644 arch/ia64/include/asm/percpu.h delete mode 100644 arch/ia64/include/asm/pgalloc.h delete mode 100644 arch/ia64/include/asm/pgtable.h delete mode 100644 arch/ia64/include/asm/processor.h delete mode 100644 arch/ia64/include/asm/ptrace.h delete mode 100644 arch/ia64/include/asm/sal.h delete mode 100644 arch/ia64/include/asm/sections.h delete mode 100644 arch/ia64/include/asm/serial.h delete mode 100644 arch/ia64/include/asm/shmparam.h delete mode 100644 arch/ia64/include/asm/signal.h delete mode 100644 arch/ia64/include/asm/smp.h delete mode 100644 arch/ia64/include/asm/sn/intr.h delete mode 100644 arch/ia64/include/asm/sn/sn_sal.h delete mode 100644 arch/ia64/include/asm/sparsemem.h delete mode 100644 arch/ia64/include/asm/spinlock.h delete mode 100644 arch/ia64/include/asm/spinlock_types.h delete mode 100644 arch/ia64/include/asm/string.h delete mode 100644 arch/ia64/include/asm/switch_to.h delete mode 100644 arch/ia64/include/asm/syscall.h delete mode 100644 arch/ia64/include/asm/thread_info.h delete mode 100644 arch/ia64/include/asm/timex.h delete mode 100644 arch/ia64/include/asm/tlb.h delete mode 100644 arch/ia64/include/asm/tlbflush.h delete mode 100644 arch/ia64/include/asm/topology.h delete mode 100644 arch/ia64/include/asm/types.h delete mode 100644 arch/ia64/include/asm/uaccess.h delete mode 100644 arch/ia64/include/asm/uncached.h delete mode 100644 arch/ia64/include/asm/unistd.h delete mode 100644 arch/ia64/include/asm/unwind.h delete mode 100644 arch/ia64/include/asm/user.h delete mode 100644 arch/ia64/include/asm/ustack.h delete mode 100644 arch/ia64/include/asm/uv/uv.h delete mode 100644 arch/ia64/include/asm/uv/uv_hub.h delete mode 100644 arch/ia64/include/asm/uv/uv_mmrs.h delete mode 100644 arch/ia64/include/asm/vermagic.h delete mode 100644 arch/ia64/include/asm/vga.h delete mode 100644 arch/ia64/include/asm/vmalloc.h delete mode 100644 arch/ia64/include/asm/xor.h delete mode 100644 arch/ia64/include/asm/xtp.h delete mode 100644 arch/ia64/include/uapi/asm/Kbuild delete mode 100644 arch/ia64/include/uapi/asm/auxvec.h delete mode 100644 arch/ia64/include/uapi/asm/bitsperlong.h delete mode 100644 arch/ia64/include/uapi/asm/break.h delete mode 100644 arch/ia64/include/uapi/asm/byteorder.h delete mode 100644 arch/ia64/include/uapi/asm/cmpxchg.h delete mode 100644 arch/ia64/include/uapi/asm/fcntl.h delete mode 100644 arch/ia64/include/uapi/asm/fpu.h delete mode 100644 arch/ia64/include/uapi/asm/gcc_intrin.h delete mode 100644 arch/ia64/include/uapi/asm/ia64regs.h delete mode 100644 arch/ia64/include/uapi/asm/intrinsics.h delete mode 100644 arch/ia64/include/uapi/asm/mman.h delete mode 100644 arch/ia64/include/uapi/asm/param.h delete mode 100644 arch/ia64/include/uapi/asm/posix_types.h delete mode 100644 arch/ia64/include/uapi/asm/ptrace.h delete mode 100644 arch/ia64/include/uapi/asm/ptrace_offsets.h delete mode 100644 arch/ia64/include/uapi/asm/resource.h delete mode 100644 arch/ia64/include/uapi/asm/rse.h delete mode 100644 arch/ia64/include/uapi/asm/setup.h delete mode 100644 arch/ia64/include/uapi/asm/sigcontext.h delete mode 100644 arch/ia64/include/uapi/asm/siginfo.h delete mode 100644 arch/ia64/include/uapi/asm/signal.h delete mode 100644 arch/ia64/include/uapi/asm/stat.h delete mode 100644 arch/ia64/include/uapi/asm/statfs.h delete mode 100644 arch/ia64/include/uapi/asm/swab.h delete mode 100644 arch/ia64/include/uapi/asm/types.h delete mode 100644 arch/ia64/include/uapi/asm/ucontext.h delete mode 100644 arch/ia64/include/uapi/asm/unistd.h delete mode 100644 arch/ia64/include/uapi/asm/ustack.h delete mode 100755 arch/ia64/install.sh delete mode 100644 arch/ia64/kernel/.gitignore delete mode 100644 arch/ia64/kernel/Makefile delete mode 100644 arch/ia64/kernel/Makefile.gate delete mode 100644 arch/ia64/kernel/acpi-ext.c delete mode 100644 arch/ia64/kernel/acpi.c delete mode 100644 arch/ia64/kernel/asm-offsets.c delete mode 100644 arch/ia64/kernel/audit.c delete mode 100644 arch/ia64/kernel/brl_emu.c delete mode 100644 arch/ia64/kernel/crash.c delete mode 100644 arch/ia64/kernel/crash_dump.c delete mode 100644 arch/ia64/kernel/cyclone.c delete mode 100644 arch/ia64/kernel/dma-mapping.c delete mode 100644 arch/ia64/kernel/efi.c delete mode 100644 arch/ia64/kernel/efi_stub.S delete mode 100644 arch/ia64/kernel/elfcore.c delete mode 100644 arch/ia64/kernel/entry.S delete mode 100644 arch/ia64/kernel/entry.h delete mode 100644 arch/ia64/kernel/err_inject.c delete mode 100644 arch/ia64/kernel/esi.c delete mode 100644 arch/ia64/kernel/esi_stub.S delete mode 100644 arch/ia64/kernel/fsys.S delete mode 100644 arch/ia64/kernel/fsyscall_gtod_data.h delete mode 100644 arch/ia64/kernel/ftrace.c delete mode 100644 arch/ia64/kernel/gate-data.S delete mode 100644 arch/ia64/kernel/gate.S delete mode 100644 arch/ia64/kernel/gate.lds.S delete mode 100644 arch/ia64/kernel/head.S delete mode 100644 arch/ia64/kernel/iosapic.c delete mode 100644 arch/ia64/kernel/irq.c delete mode 100644 arch/ia64/kernel/irq.h delete mode 100644 arch/ia64/kernel/irq_ia64.c delete mode 100644 arch/ia64/kernel/irq_lsapic.c delete mode 100644 arch/ia64/kernel/ivt.S delete mode 100644 arch/ia64/kernel/kprobes.c delete mode 100644 arch/ia64/kernel/machine_kexec.c delete mode 100644 arch/ia64/kernel/mca.c delete mode 100644 arch/ia64/kernel/mca_asm.S delete mode 100644 arch/ia64/kernel/mca_drv.c delete mode 100644 arch/ia64/kernel/mca_drv.h delete mode 100644 arch/ia64/kernel/mca_drv_asm.S delete mode 100644 arch/ia64/kernel/minstate.h delete mode 100644 arch/ia64/kernel/module.c delete mode 100644 arch/ia64/kernel/msi_ia64.c delete mode 100644 arch/ia64/kernel/numa.c delete mode 100644 arch/ia64/kernel/pal.S delete mode 100644 arch/ia64/kernel/palinfo.c delete mode 100644 arch/ia64/kernel/patch.c delete mode 100644 arch/ia64/kernel/pci-dma.c delete mode 100644 arch/ia64/kernel/perfmon_itanium.h delete mode 100644 arch/ia64/kernel/process.c delete mode 100644 arch/ia64/kernel/ptrace.c delete mode 100644 arch/ia64/kernel/relocate_kernel.S delete mode 100644 arch/ia64/kernel/sal.c delete mode 100644 arch/ia64/kernel/salinfo.c delete mode 100644 arch/ia64/kernel/setup.c delete mode 100644 arch/ia64/kernel/sigframe.h delete mode 100644 arch/ia64/kernel/signal.c delete mode 100644 arch/ia64/kernel/smp.c delete mode 100644 arch/ia64/kernel/smpboot.c delete mode 100644 arch/ia64/kernel/stacktrace.c delete mode 100644 arch/ia64/kernel/sys_ia64.c delete mode 100644 arch/ia64/kernel/syscalls/Makefile delete mode 100644 arch/ia64/kernel/syscalls/syscall.tbl delete mode 100644 arch/ia64/kernel/time.c delete mode 100644 arch/ia64/kernel/topology.c delete mode 100644 arch/ia64/kernel/traps.c delete mode 100644 arch/ia64/kernel/unaligned.c delete mode 100644 arch/ia64/kernel/uncached.c delete mode 100644 arch/ia64/kernel/unwind.c delete mode 100644 arch/ia64/kernel/unwind_decoder.c delete mode 100644 arch/ia64/kernel/unwind_i.h delete mode 100644 arch/ia64/kernel/vmlinux.lds.S delete mode 100644 arch/ia64/lib/Makefile delete mode 100644 arch/ia64/lib/checksum.c delete mode 100644 arch/ia64/lib/clear_page.S delete mode 100644 arch/ia64/lib/clear_user.S delete mode 100644 arch/ia64/lib/copy_page.S delete mode 100644 arch/ia64/lib/copy_page_mck.S delete mode 100644 arch/ia64/lib/copy_user.S delete mode 100644 arch/ia64/lib/csum_partial_copy.c delete mode 100644 arch/ia64/lib/do_csum.S delete mode 100644 arch/ia64/lib/flush.S delete mode 100644 arch/ia64/lib/idiv32.S delete mode 100644 arch/ia64/lib/idiv64.S delete mode 100644 arch/ia64/lib/io.c delete mode 100644 arch/ia64/lib/ip_fast_csum.S delete mode 100644 arch/ia64/lib/memcpy.S delete mode 100644 arch/ia64/lib/memcpy_mck.S delete mode 100644 arch/ia64/lib/memset.S delete mode 100644 arch/ia64/lib/strlen.S delete mode 100644 arch/ia64/lib/strncpy_from_user.S delete mode 100644 arch/ia64/lib/strnlen_user.S delete mode 100644 arch/ia64/lib/xor.S delete mode 100644 arch/ia64/mm/Makefile delete mode 100644 arch/ia64/mm/contig.c delete mode 100644 arch/ia64/mm/discontig.c delete mode 100644 arch/ia64/mm/extable.c delete mode 100644 arch/ia64/mm/fault.c delete mode 100644 arch/ia64/mm/hugetlbpage.c delete mode 100644 arch/ia64/mm/init.c delete mode 100644 arch/ia64/mm/ioremap.c delete mode 100644 arch/ia64/mm/numa.c delete mode 100644 arch/ia64/mm/tlb.c delete mode 100644 arch/ia64/pci/Makefile delete mode 100644 arch/ia64/pci/fixup.c delete mode 100644 arch/ia64/pci/pci.c delete mode 100755 arch/ia64/scripts/check-gas delete mode 100644 arch/ia64/scripts/check-gas-asm.S delete mode 100644 arch/ia64/scripts/check-model.c delete mode 100644 arch/ia64/scripts/check-segrel.S delete mode 100644 arch/ia64/scripts/check-segrel.lds delete mode 100644 arch/ia64/scripts/check-serialize.S delete mode 100644 arch/ia64/scripts/check-text-align.S delete mode 100755 arch/ia64/scripts/toolchain-flags delete mode 100644 arch/ia64/scripts/unwcheck.py delete mode 100644 arch/ia64/uv/Makefile delete mode 100644 arch/ia64/uv/kernel/Makefile delete mode 100644 arch/ia64/uv/kernel/setup.c delete mode 100644 drivers/char/agp/hp-agp.c delete mode 100644 drivers/char/agp/i460-agp.c delete mode 100644 drivers/char/mspec.c delete mode 100644 drivers/cpufreq/ia64-acpi-cpufreq.c delete mode 100644 drivers/firmware/pcdp.c delete mode 100644 drivers/firmware/pcdp.h delete mode 100644 tools/arch/ia64/include/asm/barrier.h delete mode 100644 tools/arch/ia64/include/uapi/asm/bitsperlong.h delete mode 100644 tools/arch/ia64/include/uapi/asm/mman.h (limited to 'include/linux') diff --git a/Documentation/arch/ia64/aliasing.rst b/Documentation/arch/ia64/aliasing.rst deleted file mode 100644 index 36a1e1d4842b..000000000000 --- a/Documentation/arch/ia64/aliasing.rst +++ /dev/null @@ -1,246 +0,0 @@ -================================== -Memory Attribute Aliasing on IA-64 -================================== - -Bjorn Helgaas - -May 4, 2006 - - -Memory Attributes -================= - - Itanium supports several attributes for virtual memory references. - The attribute is part of the virtual translation, i.e., it is - contained in the TLB entry. The ones of most interest to the Linux - kernel are: - - == ====================== - WB Write-back (cacheable) - UC Uncacheable - WC Write-coalescing - == ====================== - - System memory typically uses the WB attribute. The UC attribute is - used for memory-mapped I/O devices. The WC attribute is uncacheable - like UC is, but writes may be delayed and combined to increase - performance for things like frame buffers. - - The Itanium architecture requires that we avoid accessing the same - page with both a cacheable mapping and an uncacheable mapping[1]. - - The design of the chipset determines which attributes are supported - on which regions of the address space. For example, some chipsets - support either WB or UC access to main memory, while others support - only WB access. - -Memory Map -========== - - Platform firmware describes the physical memory map and the - supported attributes for each region. At boot-time, the kernel uses - the EFI GetMemoryMap() interface. ACPI can also describe memory - devices and the attributes they support, but Linux/ia64 currently - doesn't use this information. - - The kernel uses the efi_memmap table returned from GetMemoryMap() to - learn the attributes supported by each region of physical address - space. Unfortunately, this table does not completely describe the - address space because some machines omit some or all of the MMIO - regions from the map. - - The kernel maintains another table, kern_memmap, which describes the - memory Linux is actually using and the attribute for each region. - This contains only system memory; it does not contain MMIO space. - - The kern_memmap table typically contains only a subset of the system - memory described by the efi_memmap. Linux/ia64 can't use all memory - in the system because of constraints imposed by the identity mapping - scheme. - - The efi_memmap table is preserved unmodified because the original - boot-time information is required for kexec. - -Kernel Identity Mappings -======================== - - Linux/ia64 identity mappings are done with large pages, currently - either 16MB or 64MB, referred to as "granules." Cacheable mappings - are speculative[2], so the processor can read any location in the - page at any time, independent of the programmer's intentions. This - means that to avoid attribute aliasing, Linux can create a cacheable - identity mapping only when the entire granule supports cacheable - access. - - Therefore, kern_memmap contains only full granule-sized regions that - can referenced safely by an identity mapping. - - Uncacheable mappings are not speculative, so the processor will - generate UC accesses only to locations explicitly referenced by - software. This allows UC identity mappings to cover granules that - are only partially populated, or populated with a combination of UC - and WB regions. - -User Mappings -============= - - User mappings are typically done with 16K or 64K pages. The smaller - page size allows more flexibility because only 16K or 64K has to be - homogeneous with respect to memory attributes. - -Potential Attribute Aliasing Cases -================================== - - There are several ways the kernel creates new mappings: - -mmap of /dev/mem ----------------- - - This uses remap_pfn_range(), which creates user mappings. These - mappings may be either WB or UC. If the region being mapped - happens to be in kern_memmap, meaning that it may also be mapped - by a kernel identity mapping, the user mapping must use the same - attribute as the kernel mapping. - - If the region is not in kern_memmap, the user mapping should use - an attribute reported as being supported in the EFI memory map. - - Since the EFI memory map does not describe MMIO on some - machines, this should use an uncacheable mapping as a fallback. - -mmap of /sys/class/pci_bus/.../legacy_mem ------------------------------------------ - - This is very similar to mmap of /dev/mem, except that legacy_mem - only allows mmap of the one megabyte "legacy MMIO" area for a - specific PCI bus. Typically this is the first megabyte of - physical address space, but it may be different on machines with - several VGA devices. - - "X" uses this to access VGA frame buffers. Using legacy_mem - rather than /dev/mem allows multiple instances of X to talk to - different VGA cards. - - The /dev/mem mmap constraints apply. - -mmap of /proc/bus/pci/.../??.? ------------------------------- - - This is an MMIO mmap of PCI functions, which additionally may or - may not be requested as using the WC attribute. - - If WC is requested, and the region in kern_memmap is either WC - or UC, and the EFI memory map designates the region as WC, then - the WC mapping is allowed. - - Otherwise, the user mapping must use the same attribute as the - kernel mapping. - -read/write of /dev/mem ----------------------- - - This uses copy_from_user(), which implicitly uses a kernel - identity mapping. This is obviously safe for things in - kern_memmap. - - There may be corner cases of things that are not in kern_memmap, - but could be accessed this way. For example, registers in MMIO - space are not in kern_memmap, but could be accessed with a UC - mapping. This would not cause attribute aliasing. But - registers typically can be accessed only with four-byte or - eight-byte accesses, and the copy_from_user() path doesn't allow - any control over the access size, so this would be dangerous. - -ioremap() ---------- - - This returns a mapping for use inside the kernel. - - If the region is in kern_memmap, we should use the attribute - specified there. - - If the EFI memory map reports that the entire granule supports - WB, we should use that (granules that are partially reserved - or occupied by firmware do not appear in kern_memmap). - - If the granule contains non-WB memory, but we can cover the - region safely with kernel page table mappings, we can use - ioremap_page_range() as most other architectures do. - - Failing all of the above, we have to fall back to a UC mapping. - -Past Problem Cases -================== - -mmap of various MMIO regions from /dev/mem by "X" on Intel platforms --------------------------------------------------------------------- - - The EFI memory map may not report these MMIO regions. - - These must be allowed so that X will work. This means that - when the EFI memory map is incomplete, every /dev/mem mmap must - succeed. It may create either WB or UC user mappings, depending - on whether the region is in kern_memmap or the EFI memory map. - -mmap of 0x0-0x9FFFF /dev/mem by "hwinfo" on HP sx1000 with VGA enabled ----------------------------------------------------------------------- - - The EFI memory map reports the following attributes: - - =============== ======= ================== - 0x00000-0x9FFFF WB only - 0xA0000-0xBFFFF UC only (VGA frame buffer) - 0xC0000-0xFFFFF WB only - =============== ======= ================== - - This mmap is done with user pages, not kernel identity mappings, - so it is safe to use WB mappings. - - The kernel VGA driver may ioremap the VGA frame buffer at 0xA0000, - which uses a granule-sized UC mapping. This granule will cover some - WB-only memory, but since UC is non-speculative, the processor will - never generate an uncacheable reference to the WB-only areas unless - the driver explicitly touches them. - -mmap of 0x0-0xFFFFF legacy_mem by "X" -------------------------------------- - - If the EFI memory map reports that the entire range supports the - same attributes, we can allow the mmap (and we will prefer WB if - supported, as is the case with HP sx[12]000 machines with VGA - disabled). - - If EFI reports the range as partly WB and partly UC (as on sx[12]000 - machines with VGA enabled), we must fail the mmap because there's no - safe attribute to use. - - If EFI reports some of the range but not all (as on Intel firmware - that doesn't report the VGA frame buffer at all), we should fail the - mmap and force the user to map just the specific region of interest. - -mmap of 0xA0000-0xBFFFF legacy_mem by "X" on HP sx1000 with VGA disabled ------------------------------------------------------------------------- - - The EFI memory map reports the following attributes:: - - 0x00000-0xFFFFF WB only (no VGA MMIO hole) - - This is a special case of the previous case, and the mmap should - fail for the same reason as above. - -read of /sys/devices/.../rom ----------------------------- - - For VGA devices, this may cause an ioremap() of 0xC0000. This - used to be done with a UC mapping, because the VGA frame buffer - at 0xA0000 prevents use of a WB granule. The UC mapping causes - an MCA on HP sx[12]000 chipsets. - - We should use WB page table mappings to avoid covering the VGA - frame buffer. - -Notes -===== - - [1] SDM rev 2.2, vol 2, sec 4.4.1. - [2] SDM rev 2.2, vol 2, sec 4.4.6. diff --git a/Documentation/arch/ia64/efirtc.rst b/Documentation/arch/ia64/efirtc.rst deleted file mode 100644 index fd8328408301..000000000000 --- a/Documentation/arch/ia64/efirtc.rst +++ /dev/null @@ -1,144 +0,0 @@ -========================== -EFI Real Time Clock driver -========================== - -S. Eranian - -March 2000 - -1. Introduction -=============== - -This document describes the efirtc.c driver has provided for -the IA-64 platform. - -The purpose of this driver is to supply an API for kernel and user applications -to get access to the Time Service offered by EFI version 0.92. - -EFI provides 4 calls one can make once the OS is booted: GetTime(), -SetTime(), GetWakeupTime(), SetWakeupTime() which are all supported by this -driver. We describe those calls as well the design of the driver in the -following sections. - -2. Design Decisions -=================== - -The original ideas was to provide a very simple driver to get access to, -at first, the time of day service. This is required in order to access, in a -portable way, the CMOS clock. A program like /sbin/hwclock uses such a clock -to initialize the system view of the time during boot. - -Because we wanted to minimize the impact on existing user-level apps using -the CMOS clock, we decided to expose an API that was very similar to the one -used today with the legacy RTC driver (driver/char/rtc.c). However, because -EFI provides a simpler services, not all ioctl() are available. Also -new ioctl()s have been introduced for things that EFI provides but not the -legacy. - -EFI uses a slightly different way of representing the time, noticeably -the reference date is different. Year is the using the full 4-digit format. -The Epoch is January 1st 1998. For backward compatibility reasons we don't -expose this new way of representing time. Instead we use something very -similar to the struct tm, i.e. struct rtc_time, as used by hwclock. -One of the reasons for doing it this way is to allow for EFI to still evolve -without necessarily impacting any of the user applications. The decoupling -enables flexibility and permits writing wrapper code is ncase things change. - -The driver exposes two interfaces, one via the device file and a set of -ioctl()s. The other is read-only via the /proc filesystem. - -As of today we don't offer a /proc/sys interface. - -To allow for a uniform interface between the legacy RTC and EFI time service, -we have created the include/linux/rtc.h header file to contain only the -"public" API of the two drivers. The specifics of the legacy RTC are still -in include/linux/mc146818rtc.h. - - -3. Time of day service -====================== - -The part of the driver gives access to the time of day service of EFI. -Two ioctl()s, compatible with the legacy RTC calls: - - Read the CMOS clock:: - - ioctl(d, RTC_RD_TIME, &rtc); - - Write the CMOS clock:: - - ioctl(d, RTC_SET_TIME, &rtc); - -The rtc is a pointer to a data structure defined in rtc.h which is close -to a struct tm:: - - struct rtc_time { - int tm_sec; - int tm_min; - int tm_hour; - int tm_mday; - int tm_mon; - int tm_year; - int tm_wday; - int tm_yday; - int tm_isdst; - }; - -The driver takes care of converting back an forth between the EFI time and -this format. - -Those two ioctl()s can be exercised with the hwclock command: - -For reading:: - - # /sbin/hwclock --show - Mon Mar 6 15:32:32 2000 -0.910248 seconds - -For setting:: - - # /sbin/hwclock --systohc - -Root privileges are required to be able to set the time of day. - -4. Wakeup Alarm service -======================= - -EFI provides an API by which one can program when a machine should wakeup, -i.e. reboot. This is very different from the alarm provided by the legacy -RTC which is some kind of interval timer alarm. For this reason we don't use -the same ioctl()s to get access to the service. Instead we have -introduced 2 news ioctl()s to the interface of an RTC. - -We have added 2 new ioctl()s that are specific to the EFI driver: - - Read the current state of the alarm:: - - ioctl(d, RTC_WKALM_RD, &wkt) - - Set the alarm or change its status:: - - ioctl(d, RTC_WKALM_SET, &wkt) - -The wkt structure encapsulates a struct rtc_time + 2 extra fields to get -status information:: - - struct rtc_wkalrm { - - unsigned char enabled; /* =1 if alarm is enabled */ - unsigned char pending; /* =1 if alarm is pending */ - - struct rtc_time time; - } - -As of today, none of the existing user-level apps supports this feature. -However writing such a program should be hard by simply using those two -ioctl(). - -Root privileges are required to be able to set the alarm. - -5. References -============= - -Checkout the following Web site for more information on EFI: - -http://developer.intel.com/technology/efi/ diff --git a/Documentation/arch/ia64/err_inject.rst b/Documentation/arch/ia64/err_inject.rst deleted file mode 100644 index 900f71e93a29..000000000000 --- a/Documentation/arch/ia64/err_inject.rst +++ /dev/null @@ -1,1067 +0,0 @@ -======================================== -IPF Machine Check (MC) error inject tool -======================================== - -IPF Machine Check (MC) error inject tool is used to inject MC -errors from Linux. The tool is a test bed for IPF MC work flow including -hardware correctable error handling, OS recoverable error handling, MC -event logging, etc. - -The tool includes two parts: a kernel driver and a user application -sample. The driver provides interface to PAL to inject error -and query error injection capabilities. The driver code is in -arch/ia64/kernel/err_inject.c. The application sample (shown below) -provides a combination of various errors and calls the driver's interface -(sysfs interface) to inject errors or query error injection capabilities. - -The tool can be used to test Intel IPF machine MC handling capabilities. -It's especially useful for people who can not access hardware MC injection -tool to inject error. It's also very useful to integrate with other -software test suits to do stressful testing on IPF. - -Below is a sample application as part of the whole tool. The sample -can be used as a working test tool. Or it can be expanded to include -more features. It also can be a integrated into a library or other user -application to have more thorough test. - -The sample application takes err.conf as error configuration input. GCC -compiles the code. After you install err_inject driver, you can run -this sample application to inject errors. - -Errata: Itanium 2 Processors Specification Update lists some errata against -the pal_mc_error_inject PAL procedure. The following err.conf has been tested -on latest Montecito PAL. - -err.conf:: - - #This is configuration file for err_inject_tool. - #The format of the each line is: - #cpu, loop, interval, err_type_info, err_struct_info, err_data_buffer - #where - # cpu: logical cpu number the error will be inject in. - # loop: times the error will be injected. - # interval: In second. every so often one error is injected. - # err_type_info, err_struct_info: PAL parameters. - # - #Note: All values are hex w/o or w/ 0x prefix. - - - #On cpu2, inject only total 0x10 errors, interval 5 seconds - #corrected, data cache, hier-2, physical addr(assigned by tool code). - #working on Montecito latest PAL. - 2, 10, 5, 4101, 95 - - #On cpu4, inject and consume total 0x10 errors, interval 5 seconds - #corrected, data cache, hier-2, physical addr(assigned by tool code). - #working on Montecito latest PAL. - 4, 10, 5, 4109, 95 - - #On cpu15, inject and consume total 0x10 errors, interval 5 seconds - #recoverable, DTR0, hier-2. - #working on Montecito latest PAL. - 0xf, 0x10, 5, 4249, 15 - -The sample application source code: - -err_injection_tool.c:: - - /* - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or - * NON INFRINGEMENT. See the GNU General Public License for more - * details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - * Copyright (C) 2006 Intel Co - * Fenghua Yu - * - */ - #include - #include - #include - #include - #include - #include - #include - #include - #include - #include - #include - #include - #include - #include - #include - #include - - #define MAX_FN_SIZE 256 - #define MAX_BUF_SIZE 256 - #define DATA_BUF_SIZE 256 - #define NR_CPUS 512 - #define MAX_TASK_NUM 2048 - #define MIN_INTERVAL 5 // seconds - #define ERR_DATA_BUFFER_SIZE 3 // Three 8-byte. - #define PARA_FIELD_NUM 5 - #define MASK_SIZE (NR_CPUS/64) - #define PATH_FORMAT "/sys/devices/system/cpu/cpu%d/err_inject/" - - int sched_setaffinity(pid_t pid, unsigned int len, unsigned long *mask); - - int verbose; - #define vbprintf if (verbose) printf - - int log_info(int cpu, const char *fmt, ...) - { - FILE *log; - char fn[MAX_FN_SIZE]; - char buf[MAX_BUF_SIZE]; - va_list args; - - sprintf(fn, "%d.log", cpu); - log=fopen(fn, "a+"); - if (log==NULL) { - perror("Error open:"); - return -1; - } - - va_start(args, fmt); - vprintf(fmt, args); - memset(buf, 0, MAX_BUF_SIZE); - vsprintf(buf, fmt, args); - va_end(args); - - fwrite(buf, sizeof(buf), 1, log); - fclose(log); - - return 0; - } - - typedef unsigned long u64; - typedef unsigned int u32; - - typedef union err_type_info_u { - struct { - u64 mode : 3, /* 0-2 */ - err_inj : 3, /* 3-5 */ - err_sev : 2, /* 6-7 */ - err_struct : 5, /* 8-12 */ - struct_hier : 3, /* 13-15 */ - reserved : 48; /* 16-63 */ - } err_type_info_u; - u64 err_type_info; - } err_type_info_t; - - typedef union err_struct_info_u { - struct { - u64 siv : 1, /* 0 */ - c_t : 2, /* 1-2 */ - cl_p : 3, /* 3-5 */ - cl_id : 3, /* 6-8 */ - cl_dp : 1, /* 9 */ - reserved1 : 22, /* 10-31 */ - tiv : 1, /* 32 */ - trigger : 4, /* 33-36 */ - trigger_pl : 3, /* 37-39 */ - reserved2 : 24; /* 40-63 */ - } err_struct_info_cache; - struct { - u64 siv : 1, /* 0 */ - tt : 2, /* 1-2 */ - tc_tr : 2, /* 3-4 */ - tr_slot : 8, /* 5-12 */ - reserved1 : 19, /* 13-31 */ - tiv : 1, /* 32 */ - trigger : 4, /* 33-36 */ - trigger_pl : 3, /* 37-39 */ - reserved2 : 24; /* 40-63 */ - } err_struct_info_tlb; - struct { - u64 siv : 1, /* 0 */ - regfile_id : 4, /* 1-4 */ - reg_num : 7, /* 5-11 */ - reserved1 : 20, /* 12-31 */ - tiv : 1, /* 32 */ - trigger : 4, /* 33-36 */ - trigger_pl : 3, /* 37-39 */ - reserved2 : 24; /* 40-63 */ - } err_struct_info_register; - struct { - u64 reserved; - } err_struct_info_bus_processor_interconnect; - u64 err_struct_info; - } err_struct_info_t; - - typedef union err_data_buffer_u { - struct { - u64 trigger_addr; /* 0-63 */ - u64 inj_addr; /* 64-127 */ - u64 way : 5, /* 128-132 */ - index : 20, /* 133-152 */ - : 39; /* 153-191 */ - } err_data_buffer_cache; - struct { - u64 trigger_addr; /* 0-63 */ - u64 inj_addr; /* 64-127 */ - u64 way : 5, /* 128-132 */ - index : 20, /* 133-152 */ - reserved : 39; /* 153-191 */ - } err_data_buffer_tlb; - struct { - u64 trigger_addr; /* 0-63 */ - } err_data_buffer_register; - struct { - u64 reserved; /* 0-63 */ - } err_data_buffer_bus_processor_interconnect; - u64 err_data_buffer[ERR_DATA_BUFFER_SIZE]; - } err_data_buffer_t; - - typedef union capabilities_u { - struct { - u64 i : 1, - d : 1, - rv : 1, - tag : 1, - data : 1, - mesi : 1, - dp : 1, - reserved1 : 3, - pa : 1, - va : 1, - wi : 1, - reserved2 : 20, - trigger : 1, - trigger_pl : 1, - reserved3 : 30; - } capabilities_cache; - struct { - u64 d : 1, - i : 1, - rv : 1, - tc : 1, - tr : 1, - reserved1 : 27, - trigger : 1, - trigger_pl : 1, - reserved2 : 30; - } capabilities_tlb; - struct { - u64 gr_b0 : 1, - gr_b1 : 1, - fr : 1, - br : 1, - pr : 1, - ar : 1, - cr : 1, - rr : 1, - pkr : 1, - dbr : 1, - ibr : 1, - pmc : 1, - pmd : 1, - reserved1 : 3, - regnum : 1, - reserved2 : 15, - trigger : 1, - trigger_pl : 1, - reserved3 : 30; - } capabilities_register; - struct { - u64 reserved; - } capabilities_bus_processor_interconnect; - } capabilities_t; - - typedef struct resources_s { - u64 ibr0 : 1, - ibr2 : 1, - ibr4 : 1, - ibr6 : 1, - dbr0 : 1, - dbr2 : 1, - dbr4 : 1, - dbr6 : 1, - reserved : 48; - } resources_t; - - - long get_page_size(void) - { - long page_size=sysconf(_SC_PAGESIZE); - return page_size; - } - - #define PAGE_SIZE (get_page_size()==-1?0x4000:get_page_size()) - #define SHM_SIZE (2*PAGE_SIZE*NR_CPUS) - #define SHM_VA 0x2000000100000000 - - int shmid; - void *shmaddr; - - int create_shm(void) - { - key_t key; - char fn[MAX_FN_SIZE]; - - /* cpu0 is always existing */ - sprintf(fn, PATH_FORMAT, 0); - if ((key = ftok(fn, 's')) == -1) { - perror("ftok"); - return -1; - } - - shmid = shmget(key, SHM_SIZE, 0644 | IPC_CREAT); - if (shmid == -1) { - if (errno==EEXIST) { - shmid = shmget(key, SHM_SIZE, 0); - if (shmid == -1) { - perror("shmget"); - return -1; - } - } - else { - perror("shmget"); - return -1; - } - } - vbprintf("shmid=%d", shmid); - - /* connect to the segment: */ - shmaddr = shmat(shmid, (void *)SHM_VA, 0); - if (shmaddr == (void*)-1) { - perror("shmat"); - return -1; - } - - memset(shmaddr, 0, SHM_SIZE); - mlock(shmaddr, SHM_SIZE); - - return 0; - } - - int free_shm() - { - munlock(shmaddr, SHM_SIZE); - shmdt(shmaddr); - semctl(shmid, 0, IPC_RMID); - - return 0; - } - - #ifdef _SEM_SEMUN_UNDEFINED - union semun - { - int val; - struct semid_ds *buf; - unsigned short int *array; - struct seminfo *__buf; - }; - #endif - - u32 mode=1; /* 1: physical mode; 2: virtual mode. */ - int one_lock=1; - key_t key[NR_CPUS]; - int semid[NR_CPUS]; - - int create_sem(int cpu) - { - union semun arg; - char fn[MAX_FN_SIZE]; - int sid; - - sprintf(fn, PATH_FORMAT, cpu); - sprintf(fn, "%s/%s", fn, "err_type_info"); - if ((key[cpu] = ftok(fn, 'e')) == -1) { - perror("ftok"); - return -1; - } - - if (semid[cpu]!=0) - return 0; - - /* clear old semaphore */ - if ((sid = semget(key[cpu], 1, 0)) != -1) - semctl(sid, 0, IPC_RMID); - - /* get one semaphore */ - if ((semid[cpu] = semget(key[cpu], 1, IPC_CREAT | IPC_EXCL)) == -1) { - perror("semget"); - printf("Please remove semaphore with key=0x%lx, then run the tool.\n", - (u64)key[cpu]); - return -1; - } - - vbprintf("semid[%d]=0x%lx, key[%d]=%lx\n",cpu,(u64)semid[cpu],cpu, - (u64)key[cpu]); - /* initialize the semaphore to 1: */ - arg.val = 1; - if (semctl(semid[cpu], 0, SETVAL, arg) == -1) { - perror("semctl"); - return -1; - } - - return 0; - } - - static int lock(int cpu) - { - struct sembuf lock; - - lock.sem_num = cpu; - lock.sem_op = 1; - semop(semid[cpu], &lock, 1); - - return 0; - } - - static int unlock(int cpu) - { - struct sembuf unlock; - - unlock.sem_num = cpu; - unlock.sem_op = -1; - semop(semid[cpu], &unlock, 1); - - return 0; - } - - void free_sem(int cpu) - { - semctl(semid[cpu], 0, IPC_RMID); - } - - int wr_multi(char *fn, unsigned long *data, int size) - { - int fd; - char buf[MAX_BUF_SIZE]; - int ret; - - if (size==1) - sprintf(buf, "%lx", *data); - else if (size==3) - sprintf(buf, "%lx,%lx,%lx", data[0], data[1], data[2]); - else { - fprintf(stderr,"write to file with wrong size!\n"); - return -1; - } - - fd=open(fn, O_RDWR); - if (!fd) { - perror("Error:"); - return -1; - } - ret=write(fd, buf, sizeof(buf)); - close(fd); - return ret; - } - - int wr(char *fn, unsigned long data) - { - return wr_multi(fn, &data, 1); - } - - int rd(char *fn, unsigned long *data) - { - int fd; - char buf[MAX_BUF_SIZE]; - - fd=open(fn, O_RDONLY); - if (fd<0) { - perror("Error:"); - return -1; - } - read(fd, buf, MAX_BUF_SIZE); - *data=strtoul(buf, NULL, 16); - close(fd); - return 0; - } - - int rd_status(char *path, int *status) - { - char fn[MAX_FN_SIZE]; - sprintf(fn, "%s/status", path); - if (rd(fn, (u64*)status)<0) { - perror("status reading error.\n"); - return -1; - } - - return 0; - } - - int rd_capabilities(char *path, u64 *capabilities) - { - char fn[MAX_FN_SIZE]; - sprintf(fn, "%s/capabilities", path); - if (rd(fn, capabilities)<0) { - perror("capabilities reading error.\n"); - return -1; - } - - return 0; - } - - int rd_all(char *path) - { - unsigned long err_type_info, err_struct_info, err_data_buffer; - int status; - unsigned long capabilities, resources; - char fn[MAX_FN_SIZE]; - - sprintf(fn, "%s/err_type_info", path); - if (rd(fn, &err_type_info)<0) { - perror("err_type_info reading error.\n"); - return -1; - } - printf("err_type_info=%lx\n", err_type_info); - - sprintf(fn, "%s/err_struct_info", path); - if (rd(fn, &err_struct_info)<0) { - perror("err_struct_info reading error.\n"); - return -1; - } - printf("err_struct_info=%lx\n", err_struct_info); - - sprintf(fn, "%s/err_data_buffer", path); - if (rd(fn, &err_data_buffer)<0) { - perror("err_data_buffer reading error.\n"); - return -1; - } - printf("err_data_buffer=%lx\n", err_data_buffer); - - sprintf(fn, "%s/status", path); - if (rd("status", (u64*)&status)<0) { - perror("status reading error.\n"); - return -1; - } - printf("status=%d\n", status); - - sprintf(fn, "%s/capabilities", path); - if (rd(fn,&capabilities)<0) { - perror("capabilities reading error.\n"); - return -1; - } - printf("capabilities=%lx\n", capabilities); - - sprintf(fn, "%s/resources", path); - if (rd(fn, &resources)<0) { - perror("resources reading error.\n"); - return -1; - } - printf("resources=%lx\n", resources); - - return 0; - } - - int query_capabilities(char *path, err_type_info_t err_type_info, - u64 *capabilities) - { - char fn[MAX_FN_SIZE]; - err_struct_info_t err_struct_info; - err_data_buffer_t err_data_buffer; - - err_struct_info.err_struct_info=0; - memset(err_data_buffer.err_data_buffer, -1, ERR_DATA_BUFFER_SIZE*8); - - sprintf(fn, "%s/err_type_info", path); - wr(fn, err_type_info.err_type_info); - sprintf(fn, "%s/err_struct_info", path); - wr(fn, 0x0); - sprintf(fn, "%s/err_data_buffer", path); - wr_multi(fn, err_data_buffer.err_data_buffer, ERR_DATA_BUFFER_SIZE); - - // Fire pal_mc_error_inject procedure. - sprintf(fn, "%s/call_start", path); - wr(fn, mode); - - if (rd_capabilities(path, capabilities)<0) - return -1; - - return 0; - } - - int query_all_capabilities() - { - int status; - err_type_info_t err_type_info; - int err_sev, err_struct, struct_hier; - int cap=0; - u64 capabilities; - char path[MAX_FN_SIZE]; - - err_type_info.err_type_info=0; // Initial - err_type_info.err_type_info_u.mode=0; // Query mode; - err_type_info.err_type_info_u.err_inj=0; - - printf("All capabilities implemented in pal_mc_error_inject:\n"); - sprintf(path, PATH_FORMAT ,0); - for (err_sev=0;err_sev<3;err_sev++) - for (err_struct=0;err_struct<5;err_struct++) - for (struct_hier=0;struct_hier<5;struct_hier++) - { - status=-1; - capabilities=0; - err_type_info.err_type_info_u.err_sev=err_sev; - err_type_info.err_type_info_u.err_struct=err_struct; - err_type_info.err_type_info_u.struct_hier=struct_hier; - - if (query_capabilities(path, err_type_info, &capabilities)<0) - continue; - - if (rd_status(path, &status)<0) - continue; - - if (status==0) { - cap=1; - printf("For err_sev=%d, err_struct=%d, struct_hier=%d: ", - err_sev, err_struct, struct_hier); - printf("capabilities 0x%lx\n", capabilities); - } - } - if (!cap) { - printf("No capabilities supported.\n"); - return 0; - } - - return 0; - } - - int err_inject(int cpu, char *path, err_type_info_t err_type_info, - err_struct_info_t err_struct_info, - err_data_buffer_t err_data_buffer) - { - int status; - char fn[MAX_FN_SIZE]; - - log_info(cpu, "err_type_info=%lx, err_struct_info=%lx, ", - err_type_info.err_type_info, - err_struct_info.err_struct_info); - log_info(cpu,"err_data_buffer=[%lx,%lx,%lx]\n", - err_data_buffer.err_data_buffer[0], - err_data_buffer.err_data_buffer[1], - err_data_buffer.err_data_buffer[2]); - sprintf(fn, "%s/err_type_info", path); - wr(fn, err_type_info.err_type_info); - sprintf(fn, "%s/err_struct_info", path); - wr(fn, err_struct_info.err_struct_info); - sprintf(fn, "%s/err_data_buffer", path); - wr_multi(fn, err_data_buffer.err_data_buffer, ERR_DATA_BUFFER_SIZE); - - // Fire pal_mc_error_inject procedure. - sprintf(fn, "%s/call_start", path); - wr(fn,mode); - - if (rd_status(path, &status)<0) { - vbprintf("fail: read status\n"); - return -100; - } - - if (status!=0) { - log_info(cpu, "fail: status=%d\n", status); - return status; - } - - return status; - } - - static int construct_data_buf(char *path, err_type_info_t err_type_info, - err_struct_info_t err_struct_info, - err_data_buffer_t *err_data_buffer, - void *va1) - { - char fn[MAX_FN_SIZE]; - u64 virt_addr=0, phys_addr=0; - - vbprintf("va1=%lx\n", (u64)va1); - memset(&err_data_buffer->err_data_buffer_cache, 0, ERR_DATA_BUFFER_SIZE*8); - - switch (err_type_info.err_type_info_u.err_struct) { - case 1: // Cache - switch (err_struct_info.err_struct_info_cache.cl_id) { - case 1: //Virtual addr - err_data_buffer->err_data_buffer_cache.inj_addr=(u64)va1; - break; - case 2: //Phys addr - sprintf(fn, "%s/virtual_to_phys", path); - virt_addr=(u64)va1; - if (wr(fn,virt_addr)<0) - return -1; - rd(fn, &phys_addr); - err_data_buffer->err_data_buffer_cache.inj_addr=phys_addr; - break; - default: - printf("Not supported cl_id\n"); - break; - } - break; - case 2: // TLB - break; - case 3: // Register file - break; - case 4: // Bus/system interconnect - default: - printf("Not supported err_struct\n"); - break; - } - - return 0; - } - - typedef struct { - u64 cpu; - u64 loop; - u64 interval; - u64 err_type_info; - u64 err_struct_info; - u64 err_data_buffer[ERR_DATA_BUFFER_SIZE]; - } parameters_t; - - parameters_t line_para; - int para; - - static int empty_data_buffer(u64 *err_data_buffer) - { - int empty=1; - int i; - - for (i=0;iMIN_INTERVAL - ?interval:MIN_INTERVAL; - parameters[num].err_type_info=err_type_info_conf; - parameters[num].err_struct_info=err_struct_info_conf; - memcpy(parameters[num++].err_data_buffer, - err_data_buffer_conf,ERR_DATA_BUFFER_SIZE*8) ; - - if (num>=MAX_TASK_NUM) - break; - } - } - else { - parameters[0].cpu=line_para.cpu; - parameters[0].loop=line_para.loop; - parameters[0].interval= line_para.interval>MIN_INTERVAL - ?line_para.interval:MIN_INTERVAL; - parameters[0].err_type_info=line_para.err_type_info; - parameters[0].err_struct_info=line_para.err_struct_info; - memcpy(parameters[0].err_data_buffer, - line_para.err_data_buffer,ERR_DATA_BUFFER_SIZE*8) ; - - num=1; - } - - /* Create semaphore: If one_lock, one semaphore for all processors. - Otherwise, one semaphore for each processor. */ - if (one_lock) { - if (create_sem(0)) { - printf("Can not create semaphore...exit\n"); - free_sem(0); - return -1; - } - } - else { - for (i=0;i - -Using the "epc" instruction effectively introduces a new mode of -execution to the ia64 linux kernel. We call this mode the -"fsys-mode". To recap, the normal states of execution are: - - - kernel mode: - Both the register stack and the memory stack have been - switched over to kernel memory. The user-level state is saved - in a pt-regs structure at the top of the kernel memory stack. - - - user mode: - Both the register stack and the kernel stack are in - user memory. The user-level state is contained in the - CPU registers. - - - bank 0 interruption-handling mode: - This is the non-interruptible state which all - interruption-handlers start execution in. The user-level - state remains in the CPU registers and some kernel state may - be stored in bank 0 of registers r16-r31. - -In contrast, fsys-mode has the following special properties: - - - execution is at privilege level 0 (most-privileged) - - - CPU registers may contain a mixture of user-level and kernel-level - state (it is the responsibility of the kernel to ensure that no - security-sensitive kernel-level state is leaked back to - user-level) - - - execution is interruptible and preemptible (an fsys-mode handler - can disable interrupts and avoid all other interruption-sources - to avoid preemption) - - - neither the memory-stack nor the register-stack can be trusted while - in fsys-mode (they point to the user-level stacks, which may - be invalid, or completely bogus addresses) - -In summary, fsys-mode is much more similar to running in user-mode -than it is to running in kernel-mode. Of course, given that the -privilege level is at level 0, this means that fsys-mode requires some -care (see below). - - -How to tell fsys-mode -===================== - -Linux operates in fsys-mode when (a) the privilege level is 0 (most -privileged) and (b) the stacks have NOT been switched to kernel memory -yet. For convenience, the header file provides -three macros:: - - user_mode(regs) - user_stack(task,regs) - fsys_mode(task,regs) - -The "regs" argument is a pointer to a pt_regs structure. The "task" -argument is a pointer to the task structure to which the "regs" -pointer belongs to. user_mode() returns TRUE if the CPU state pointed -to by "regs" was executing in user mode (privilege level 3). -user_stack() returns TRUE if the state pointed to by "regs" was -executing on the user-level stack(s). Finally, fsys_mode() returns -TRUE if the CPU state pointed to by "regs" was executing in fsys-mode. -The fsys_mode() macro is equivalent to the expression:: - - !user_mode(regs) && user_stack(task,regs) - -How to write an fsyscall handler -================================ - -The file arch/ia64/kernel/fsys.S contains a table of fsyscall-handlers -(fsyscall_table). This table contains one entry for each system call. -By default, a system call is handled by fsys_fallback_syscall(). This -routine takes care of entering (full) kernel mode and calling the -normal Linux system call handler. For performance-critical system -calls, it is possible to write a hand-tuned fsyscall_handler. For -example, fsys.S contains fsys_getpid(), which is a hand-tuned version -of the getpid() system call. - -The entry and exit-state of an fsyscall handler is as follows: - -Machine state on entry to fsyscall handler ------------------------------------------- - - ========= =============================================================== - r10 0 - r11 saved ar.pfs (a user-level value) - r15 system call number - r16 "current" task pointer (in normal kernel-mode, this is in r13) - r32-r39 system call arguments - b6 return address (a user-level value) - ar.pfs previous frame-state (a user-level value) - PSR.be cleared to zero (i.e., little-endian byte order is in effect) - - all other registers may contain values passed in from user-mode - ========= =============================================================== - -Required machine state on exit to fsyscall handler --------------------------------------------------- - - ========= =========================================================== - r11 saved ar.pfs (as passed into the fsyscall handler) - r15 system call number (as passed into the fsyscall handler) - r32-r39 system call arguments (as passed into the fsyscall handler) - b6 return address (as passed into the fsyscall handler) - ar.pfs previous frame-state (as passed into the fsyscall handler) - ========= =========================================================== - -Fsyscall handlers can execute with very little overhead, but with that -speed comes a set of restrictions: - - * Fsyscall-handlers MUST check for any pending work in the flags - member of the thread-info structure and if any of the - TIF_ALLWORK_MASK flags are set, the handler needs to fall back on - doing a full system call (by calling fsys_fallback_syscall). - - * Fsyscall-handlers MUST preserve incoming arguments (r32-r39, r11, - r15, b6, and ar.pfs) because they will be needed in case of a - system call restart. Of course, all "preserved" registers also - must be preserved, in accordance to the normal calling conventions. - - * Fsyscall-handlers MUST check argument registers for containing a - NaT value before using them in any way that could trigger a - NaT-consumption fault. If a system call argument is found to - contain a NaT value, an fsyscall-handler may return immediately - with r8=EINVAL, r10=-1. - - * Fsyscall-handlers MUST NOT use the "alloc" instruction or perform - any other operation that would trigger mandatory RSE - (register-stack engine) traffic. - - * Fsyscall-handlers MUST NOT write to any stacked registers because - it is not safe to assume that user-level called a handler with the - proper number of arguments. - - * Fsyscall-handlers need to be careful when accessing per-CPU variables: - unless proper safe-guards are taken (e.g., interruptions are avoided), - execution may be pre-empted and resumed on another CPU at any given - time. - - * Fsyscall-handlers must be careful not to leak sensitive kernel' - information back to user-level. In particular, before returning to - user-level, care needs to be taken to clear any scratch registers - that could contain sensitive information (note that the current - task pointer is not considered sensitive: it's already exposed - through ar.k6). - - * Fsyscall-handlers MUST NOT access user-memory without first - validating access-permission (this can be done typically via - probe.r.fault and/or probe.w.fault) and without guarding against - memory access exceptions (this can be done with the EX() macros - defined by asmmacro.h). - -The above restrictions may seem draconian, but remember that it's -possible to trade off some of the restrictions by paying a slightly -higher overhead. For example, if an fsyscall-handler could benefit -from the shadow register bank, it could temporarily disable PSR.i and -PSR.ic, switch to bank 0 (bsw.0) and then use the shadow registers as -needed. In other words, following the above rules yields extremely -fast system call execution (while fully preserving system call -semantics), but there is also a lot of flexibility in handling more -complicated cases. - -Signal handling -=============== - -The delivery of (asynchronous) signals must be delayed until fsys-mode -is exited. This is accomplished with the help of the lower-privilege -transfer trap: arch/ia64/kernel/process.c:do_notify_resume_user() -checks whether the interrupted task was in fsys-mode and, if so, sets -PSR.lp and returns immediately. When fsys-mode is exited via the -"br.ret" instruction that lowers the privilege level, a trap will -occur. The trap handler clears PSR.lp again and returns immediately. -The kernel exit path then checks for and delivers any pending signals. - -PSR Handling -============ - -The "epc" instruction doesn't change the contents of PSR at all. This -is in contrast to a regular interruption, which clears almost all -bits. Because of that, some care needs to be taken to ensure things -work as expected. The following discussion describes how each PSR bit -is handled. - -======= ======================================================================= -PSR.be Cleared when entering fsys-mode. A srlz.d instruction is used - to ensure the CPU is in little-endian mode before the first - load/store instruction is executed. PSR.be is normally NOT - restored upon return from an fsys-mode handler. In other - words, user-level code must not rely on PSR.be being preserved - across a system call. -PSR.up Unchanged. -PSR.ac Unchanged. -PSR.mfl Unchanged. Note: fsys-mode handlers must not write-registers! -PSR.mfh Unchanged. Note: fsys-mode handlers must not write-registers! -PSR.ic Unchanged. Note: fsys-mode handlers can clear the bit, if needed. -PSR.i Unchanged. Note: fsys-mode handlers can clear the bit, if needed. -PSR.pk Unchanged. -PSR.dt Unchanged. -PSR.dfl Unchanged. Note: fsys-mode handlers must not write-registers! -PSR.dfh Unchanged. Note: fsys-mode handlers must not write-registers! -PSR.sp Unchanged. -PSR.pp Unchanged. -PSR.di Unchanged. -PSR.si Unchanged. -PSR.db Unchanged. The kernel prevents user-level from setting a hardware - breakpoint that triggers at any privilege level other than - 3 (user-mode). -PSR.lp Unchanged. -PSR.tb Lazy redirect. If a taken-branch trap occurs while in - fsys-mode, the trap-handler modifies the saved machine state - such that execution resumes in the gate page at - syscall_via_break(), with privilege level 3. Note: the - taken branch would occur on the branch invoking the - fsyscall-handler, at which point, by definition, a syscall - restart is still safe. If the system call number is invalid, - the fsys-mode handler will return directly to user-level. This - return will trigger a taken-branch trap, but since the trap is - taken _after_ restoring the privilege level, the CPU has already - left fsys-mode, so no special treatment is needed. -PSR.rt Unchanged. -PSR.cpl Cleared to 0. -PSR.is Unchanged (guaranteed to be 0 on entry to the gate page). -PSR.mc Unchanged. -PSR.it Unchanged (guaranteed to be 1). -PSR.id Unchanged. Note: the ia64 linux kernel never sets this bit. -PSR.da Unchanged. Note: the ia64 linux kernel never sets this bit. -PSR.dd Unchanged. Note: the ia64 linux kernel never sets this bit. -PSR.ss Lazy redirect. If set, "epc" will cause a Single Step Trap to - be taken. The trap handler then modifies the saved machine - state such that execution resumes in the gate page at - syscall_via_break(), with privilege level 3. -PSR.ri Unchanged. -PSR.ed Unchanged. Note: This bit could only have an effect if an fsys-mode - handler performed a speculative load that gets NaTted. If so, this - would be the normal & expected behavior, so no special treatment is - needed. -PSR.bn Unchanged. Note: fsys-mode handlers may clear the bit, if needed. - Doing so requires clearing PSR.i and PSR.ic as well. -PSR.ia Unchanged. Note: the ia64 linux kernel never sets this bit. -======= ======================================================================= - -Using fast system calls -======================= - -To use fast system calls, userspace applications need simply call -__kernel_syscall_via_epc(). For example - --- example fgettimeofday() call -- - --- fgettimeofday.S -- - -:: - - #include - - GLOBAL_ENTRY(fgettimeofday) - .prologue - .save ar.pfs, r11 - mov r11 = ar.pfs - .body - - mov r2 = 0xa000000000020660;; // gate address - // found by inspection of System.map for the - // __kernel_syscall_via_epc() function. See - // below for how to do this for real. - - mov b7 = r2 - mov r15 = 1087 // gettimeofday syscall - ;; - br.call.sptk.many b6 = b7 - ;; - - .restore sp - - mov ar.pfs = r11 - br.ret.sptk.many rp;; // return to caller - END(fgettimeofday) - --- end fgettimeofday.S -- - -In reality, getting the gate address is accomplished by two extra -values passed via the ELF auxiliary vector (include/asm-ia64/elf.h) - - * AT_SYSINFO : is the address of __kernel_syscall_via_epc() - * AT_SYSINFO_EHDR : is the address of the kernel gate ELF DSO - -The ELF DSO is a pre-linked library that is mapped in by the kernel at -the gate page. It is a proper ELF shared object so, with a dynamic -loader that recognises the library, you should be able to make calls to -the exported functions within it as with any other shared library. -AT_SYSINFO points into the kernel DSO at the -__kernel_syscall_via_epc() function for historical reasons (it was -used before the kernel DSO) and as a convenience. diff --git a/Documentation/arch/ia64/ia64.rst b/Documentation/arch/ia64/ia64.rst deleted file mode 100644 index b725019a9492..000000000000 --- a/Documentation/arch/ia64/ia64.rst +++ /dev/null @@ -1,49 +0,0 @@ -=========================================== -Linux kernel release for the IA-64 Platform -=========================================== - - These are the release notes for Linux since version 2.4 for IA-64 - platform. This document provides information specific to IA-64 - ONLY, to get additional information about the Linux kernel also - read the original Linux README provided with the kernel. - -Installing the Kernel -===================== - - - IA-64 kernel installation is the same as the other platforms, see - original README for details. - - -Software Requirements -===================== - - Compiling and running this kernel requires an IA-64 compliant GCC - compiler. And various software packages also compiled with an - IA-64 compliant GCC compiler. - - -Configuring the Kernel -====================== - - Configuration is the same, see original README for details. - - -Compiling the Kernel: - - - Compiling this kernel doesn't differ from other platform so read - the original README for details BUT make sure you have an IA-64 - compliant GCC compiler. - -IA-64 Specifics -=============== - - - General issues: - - * Hardly any performance tuning has been done. Obvious targets - include the library routines (IP checksum, etc.). Less - obvious targets include making sure we don't flush the TLB - needlessly, etc. - - * SMP locks cleanup/optimization - - * IA32 support. Currently experimental. It mostly works. diff --git a/Documentation/arch/ia64/index.rst b/Documentation/arch/ia64/index.rst deleted file mode 100644 index 761f2154dfa2..000000000000 --- a/Documentation/arch/ia64/index.rst +++ /dev/null @@ -1,19 +0,0 @@ -.. SPDX-License-Identifier: GPL-2.0 - -================== -IA-64 Architecture -================== - -.. toctree:: - :maxdepth: 1 - - ia64 - aliasing - efirtc - err_inject - fsys - irq-redir - mca - serial - - features diff --git a/Documentation/arch/ia64/irq-redir.rst b/Documentation/arch/ia64/irq-redir.rst deleted file mode 100644 index 6bbbbe4f73ef..000000000000 --- a/Documentation/arch/ia64/irq-redir.rst +++ /dev/null @@ -1,80 +0,0 @@ -============================== -IRQ affinity on IA64 platforms -============================== - -07.01.2002, Erich Focht - - -By writing to /proc/irq/IRQ#/smp_affinity the interrupt routing can be -controlled. The behavior on IA64 platforms is slightly different from -that described in Documentation/core-api/irq/irq-affinity.rst for i386 systems. - -Because of the usage of SAPIC mode and physical destination mode the -IRQ target is one particular CPU and cannot be a mask of several -CPUs. Only the first non-zero bit is taken into account. - - -Usage examples -============== - -The target CPU has to be specified as a hexadecimal CPU mask. The -first non-zero bit is the selected CPU. This format has been kept for -compatibility reasons with i386. - -Set the delivery mode of interrupt 41 to fixed and route the -interrupts to CPU #3 (logical CPU number) (2^3=0x08):: - - echo "8" >/proc/irq/41/smp_affinity - -Set the default route for IRQ number 41 to CPU 6 in lowest priority -delivery mode (redirectable):: - - echo "r 40" >/proc/irq/41/smp_affinity - -The output of the command:: - - cat /proc/irq/IRQ#/smp_affinity - -gives the target CPU mask for the specified interrupt vector. If the CPU -mask is preceded by the character "r", the interrupt is redirectable -(i.e. lowest priority mode routing is used), otherwise its route is -fixed. - - - -Initialization and default behavior -=================================== - -If the platform features IRQ redirection (info provided by SAL) all -IO-SAPIC interrupts are initialized with CPU#0 as their default target -and the routing is the so called "lowest priority mode" (actually -fixed SAPIC mode with hint). The XTP chipset registers are used as hints -for the IRQ routing. Currently in Linux XTP registers can have three -values: - - - minimal for an idle task, - - normal if any other task runs, - - maximal if the CPU is going to be switched off. - -The IRQ is routed to the CPU with lowest XTP register value, the -search begins at the default CPU. Therefore most of the interrupts -will be handled by CPU #0. - -If the platform doesn't feature interrupt redirection IOSAPIC fixed -routing is used. The target CPUs are distributed in a round robin -manner. IRQs will be routed only to the selected target CPUs. Check -with:: - - cat /proc/interrupts - - - -Comments -======== - -On large (multi-node) systems it is recommended to route the IRQs to -the node to which the corresponding device is connected. -For systems like the NEC AzusA we get IRQ node-affinity for free. This -is because usually the chipsets on each node redirect the interrupts -only to their own CPUs (as they cannot see the XTP registers on the -other nodes). diff --git a/Documentation/arch/ia64/mca.rst b/Documentation/arch/ia64/mca.rst deleted file mode 100644 index 08270bba44a4..000000000000 --- a/Documentation/arch/ia64/mca.rst +++ /dev/null @@ -1,198 +0,0 @@ -============================================================= -An ad-hoc collection of notes on IA64 MCA and INIT processing -============================================================= - -Feel free to update it with notes about any area that is not clear. - ---- - -MCA/INIT are completely asynchronous. They can occur at any time, when -the OS is in any state. Including when one of the cpus is already -holding a spinlock. Trying to get any lock from MCA/INIT state is -asking for deadlock. Also the state of structures that are protected -by locks is indeterminate, including linked lists. - ---- - -The complicated ia64 MCA process. All of this is mandated by Intel's -specification for ia64 SAL, error recovery and unwind, it is not as -if we have a choice here. - -* MCA occurs on one cpu, usually due to a double bit memory error. - This is the monarch cpu. - -* SAL sends an MCA rendezvous interrupt (which is a normal interrupt) - to all the other cpus, the slaves. - -* Slave cpus that receive the MCA interrupt call down into SAL, they - end up spinning disabled while the MCA is being serviced. - -* If any slave cpu was already spinning disabled when the MCA occurred - then it cannot service the MCA interrupt. SAL waits ~20 seconds then - sends an unmaskable INIT event to the slave cpus that have not - already rendezvoused. - -* Because MCA/INIT can be delivered at any time, including when the cpu - is down in PAL in physical mode, the registers at the time of the - event are _completely_ undefined. In particular the MCA/INIT - handlers cannot rely on the thread pointer, PAL physical mode can - (and does) modify TP. It is allowed to do that as long as it resets - TP on return. However MCA/INIT events expose us to these PAL - internal TP changes. Hence curr_task(). - -* If an MCA/INIT event occurs while the kernel was running (not user - space) and the kernel has called PAL then the MCA/INIT handler cannot - assume that the kernel stack is in a fit state to be used. Mainly - because PAL may or may not maintain the stack pointer internally. - Because the MCA/INIT handlers cannot trust the kernel stack, they - have to use their own, per-cpu stacks. The MCA/INIT stacks are - preformatted with just enough task state to let the relevant handlers - do their job. - -* Unlike most other architectures, the ia64 struct task is embedded in - the kernel stack[1]. So switching to a new kernel stack means that - we switch to a new task as well. Because various bits of the kernel - assume that current points into the struct task, switching to a new - stack also means a new value for current. - -* Once all slaves have rendezvoused and are spinning disabled, the - monarch is entered. The monarch now tries to diagnose the problem - and decide if it can recover or not. - -* Part of the monarch's job is to look at the state of all the other - tasks. The only way to do that on ia64 is to call the unwinder, - as mandated by Intel. - -* The starting point for the unwind depends on whether a task is - running or not. That is, whether it is on a cpu or is blocked. The - monarch has to determine whether or not a task is on a cpu before it - knows how to start unwinding it. The tasks that received an MCA or - INIT event are no longer running, they have been converted to blocked - tasks. But (and its a big but), the cpus that received the MCA - rendezvous interrupt are still running on their normal kernel stacks! - -* To distinguish between these two cases, the monarch must know which - tasks are on a cpu and which are not. Hence each slave cpu that - switches to an MCA/INIT stack, registers its new stack using - set_curr_task(), so the monarch can tell that the _original_ task is - no longer running on that cpu. That gives us a decent chance of - getting a valid backtrace of the _original_ task. - -* MCA/INIT can be nested, to a depth of 2 on any cpu. In the case of a - nested error, we want diagnostics on the MCA/INIT handler that - failed, not on the task that was originally running. Again this - requires set_curr_task() so the MCA/INIT handlers can register their - own stack as running on that cpu. Then a recursive error gets a - trace of the failing handler's "task". - -[1] - My (Keith Owens) original design called for ia64 to separate its - struct task and the kernel stacks. Then the MCA/INIT data would be - chained stacks like i386 interrupt stacks. But that required - radical surgery on the rest of ia64, plus extra hard wired TLB - entries with its associated performance degradation. David - Mosberger vetoed that approach. Which meant that separate kernel - stacks meant separate "tasks" for the MCA/INIT handlers. - ---- - -INIT is less complicated than MCA. Pressing the nmi button or using -the equivalent command on the management console sends INIT to all -cpus. SAL picks one of the cpus as the monarch and the rest are -slaves. All the OS INIT handlers are entered at approximately the same -time. The OS monarch prints the state of all tasks and returns, after -which the slaves return and the system resumes. - -At least that is what is supposed to happen. Alas there are broken -versions of SAL out there. Some drive all the cpus as monarchs. Some -drive them all as slaves. Some drive one cpu as monarch, wait for that -cpu to return from the OS then drive the rest as slaves. Some versions -of SAL cannot even cope with returning from the OS, they spin inside -SAL on resume. The OS INIT code has workarounds for some of these -broken SAL symptoms, but some simply cannot be fixed from the OS side. - ---- - -The scheduler hooks used by ia64 (curr_task, set_curr_task) are layer -violations. Unfortunately MCA/INIT start off as massive layer -violations (can occur at _any_ time) and they build from there. - -At least ia64 makes an attempt at recovering from hardware errors, but -it is a difficult problem because of the asynchronous nature of these -errors. When processing an unmaskable interrupt we sometimes need -special code to cope with our inability to take any locks. - ---- - -How is ia64 MCA/INIT different from x86 NMI? - -* x86 NMI typically gets delivered to one cpu. MCA/INIT gets sent to - all cpus. - -* x86 NMI cannot be nested. MCA/INIT can be nested, to a depth of 2 - per cpu. - -* x86 has a separate struct task which points to one of multiple kernel - stacks. ia64 has the struct task embedded in the single kernel - stack, so switching stack means switching task. - -* x86 does not call the BIOS so the NMI handler does not have to worry - about any registers having changed. MCA/INIT can occur while the cpu - is in PAL in physical mode, with undefined registers and an undefined - kernel stack. - -* i386 backtrace is not very sensitive to whether a process is running - or not. ia64 unwind is very, very sensitive to whether a process is - running or not. - ---- - -What happens when MCA/INIT is delivered what a cpu is running user -space code? - -The user mode registers are stored in the RSE area of the MCA/INIT on -entry to the OS and are restored from there on return to SAL, so user -mode registers are preserved across a recoverable MCA/INIT. Since the -OS has no idea what unwind data is available for the user space stack, -MCA/INIT never tries to backtrace user space. Which means that the OS -does not bother making the user space process look like a blocked task, -i.e. the OS does not copy pt_regs and switch_stack to the user space -stack. Also the OS has no idea how big the user space RSE and memory -stacks are, which makes it too risky to copy the saved state to a user -mode stack. - ---- - -How do we get a backtrace on the tasks that were running when MCA/INIT -was delivered? - -mca.c:::ia64_mca_modify_original_stack(). That identifies and -verifies the original kernel stack, copies the dirty registers from -the MCA/INIT stack's RSE to the original stack's RSE, copies the -skeleton struct pt_regs and switch_stack to the original stack, fills -in the skeleton structures from the PAL minstate area and updates the -original stack's thread.ksp. That makes the original stack look -exactly like any other blocked task, i.e. it now appears to be -sleeping. To get a backtrace, just start with thread.ksp for the -original task and unwind like any other sleeping task. - ---- - -How do we identify the tasks that were running when MCA/INIT was -delivered? - -If the previous task has been verified and converted to a blocked -state, then sos->prev_task on the MCA/INIT stack is updated to point to -the previous task. You can look at that field in dumps or debuggers. -To help distinguish between the handler and the original tasks, -handlers have _TIF_MCA_INIT set in thread_info.flags. - -The sos data is always in the MCA/INIT handler stack, at offset -MCA_SOS_OFFSET. You can get that value from mca_asm.h or calculate it -as KERNEL_STACK_SIZE - sizeof(struct pt_regs) - sizeof(struct -ia64_sal_os_state), with 16 byte alignment for all structures. - -Also the comm field of the MCA/INIT task is modified to include the pid -of the original task, for humans to use. For example, a comm field of -'MCA 12159' means that pid 12159 was running when the MCA was -delivered. diff --git a/Documentation/arch/ia64/serial.rst b/Documentation/arch/ia64/serial.rst deleted file mode 100644 index 1de70c305a79..000000000000 --- a/Documentation/arch/ia64/serial.rst +++ /dev/null @@ -1,165 +0,0 @@ -============== -Serial Devices -============== - -Serial Device Naming -==================== - - As of 2.6.10, serial devices on ia64 are named based on the - order of ACPI and PCI enumeration. The first device in the - ACPI namespace (if any) becomes /dev/ttyS0, the second becomes - /dev/ttyS1, etc., and PCI devices are named sequentially - starting after the ACPI devices. - - Prior to 2.6.10, there were confusing exceptions to this: - - - Firmware on some machines (mostly from HP) provides an HCDP - table[1] that tells the kernel about devices that can be used - as a serial console. If the user specified "console=ttyS0" - or the EFI ConOut path contained only UART devices, the - kernel registered the device described by the HCDP as - /dev/ttyS0. - - - If there was no HCDP, we assumed there were UARTs at the - legacy COM port addresses (I/O ports 0x3f8 and 0x2f8), so - the kernel registered those as /dev/ttyS0 and /dev/ttyS1. - - Any additional ACPI or PCI devices were registered sequentially - after /dev/ttyS0 as they were discovered. - - With an HCDP, device names changed depending on EFI configuration - and "console=" arguments. Without an HCDP, device names didn't - change, but we registered devices that might not really exist. - - For example, an HP rx1600 with a single built-in serial port - (described in the ACPI namespace) plus an MP[2] (a PCI device) has - these ports: - - ========== ========== ============ ============ ======= - Type MMIO pre-2.6.10 pre-2.6.10 2.6.10+ - address - (EFI console (EFI console - on builtin) on MP port) - ========== ========== ============ ============ ======= - builtin 0xff5e0000 ttyS0 ttyS1 ttyS0 - MP UPS 0xf8031000 ttyS1 ttyS2 ttyS1 - MP Console 0xf8030000 ttyS2 ttyS0 ttyS2 - MP 2 0xf8030010 ttyS3 ttyS3 ttyS3 - MP 3 0xf8030038 ttyS4 ttyS4 ttyS4 - ========== ========== ============ ============ ======= - -Console Selection -================= - - EFI knows what your console devices are, but it doesn't tell the - kernel quite enough to actually locate them. The DIG64 HCDP - table[1] does tell the kernel where potential serial console - devices are, but not all firmware supplies it. Also, EFI supports - multiple simultaneous consoles and doesn't tell the kernel which - should be the "primary" one. - - So how do you tell Linux which console device to use? - - - If your firmware supplies the HCDP, it is simplest to - configure EFI with a single device (either a UART or a VGA - card) as the console. Then you don't need to tell Linux - anything; the kernel will automatically use the EFI console. - - (This works only in 2.6.6 or later; prior to that you had - to specify "console=ttyS0" to get a serial console.) - - - Without an HCDP, Linux defaults to a VGA console unless you - specify a "console=" argument. - - NOTE: Don't assume that a serial console device will be /dev/ttyS0. - It might be ttyS1, ttyS2, etc. Make sure you have the appropriate - entries in /etc/inittab (for getty) and /etc/securetty (to allow - root login). - -Early Serial Console -==================== - - The kernel can't start using a serial console until it knows where - the device lives. Normally this happens when the driver enumerates - all the serial devices, which can happen a minute or more after the - kernel starts booting. - - 2.6.10 and later kernels have an "early uart" driver that works - very early in the boot process. The kernel will automatically use - this if the user supplies an argument like "console=uart,io,0x3f8", - or if the EFI console path contains only a UART device and the - firmware supplies an HCDP. - -Troubleshooting Serial Console Problems -======================================= - - No kernel output after elilo prints "Uncompressing Linux... done": - - - You specified "console=ttyS0" but Linux changed the device - to which ttyS0 refers. Configure exactly one EFI console - device[3] and remove the "console=" option. - - - The EFI console path contains both a VGA device and a UART. - EFI and elilo use both, but Linux defaults to VGA. Remove - the VGA device from the EFI console path[3]. - - - Multiple UARTs selected as EFI console devices. EFI and - elilo use all selected devices, but Linux uses only one. - Make sure only one UART is selected in the EFI console - path[3]. - - - You're connected to an HP MP port[2] but have a non-MP UART - selected as EFI console device. EFI uses the MP as a - console device even when it isn't explicitly selected. - Either move the console cable to the non-MP UART, or change - the EFI console path[3] to the MP UART. - - Long pause (60+ seconds) between "Uncompressing Linux... done" and - start of kernel output: - - - No early console because you used "console=ttyS". Remove - the "console=" option if your firmware supplies an HCDP. - - - If you don't have an HCDP, the kernel doesn't know where - your console lives until the driver discovers serial - devices. Use "console=uart,io,0x3f8" (or appropriate - address for your machine). - - Kernel and init script output works fine, but no "login:" prompt: - - - Add getty entry to /etc/inittab for console tty. Look for - the "Adding console on ttyS" message that tells you which - device is the console. - - "login:" prompt, but can't login as root: - - - Add entry to /etc/securetty for console tty. - - No ACPI serial devices found in 2.6.17 or later: - - - Turn on CONFIG_PNP and CONFIG_PNPACPI. Prior to 2.6.17, ACPI - serial devices were discovered by 8250_acpi. In 2.6.17, - 8250_acpi was replaced by the combination of 8250_pnp and - CONFIG_PNPACPI. - - - -[1] - http://www.dig64.org/specifications/agreement - The table was originally defined as the "HCDP" for "Headless - Console/Debug Port." The current version is the "PCDP" for - "Primary Console and Debug Port Devices." - -[2] - The HP MP (management processor) is a PCI device that provides - several UARTs. One of the UARTs is often used as a console; the - EFI Boot Manager identifies it as "Acpi(HWP0002,700)/Pci(...)/Uart". - The external connection is usually a 25-pin connector, and a - special dongle converts that to three 9-pin connectors, one of - which is labelled "Console." - -[3] - EFI console devices are configured using the EFI Boot Manager - "Boot option maintenance" menu. You may have to interrupt the - boot sequence to use this menu, and you will have to reset the - box after changing console configuration. diff --git a/Documentation/core-api/cpu_hotplug.rst b/Documentation/core-api/cpu_hotplug.rst index 9511e405aabd..dcb0e379e5e8 100644 --- a/Documentation/core-api/cpu_hotplug.rst +++ b/Documentation/core-api/cpu_hotplug.rst @@ -40,12 +40,6 @@ Command Line Switches supplied here is lower than the number of physically available CPUs, then those CPUs can not be brought online later. -``additional_cpus=n`` - Use this to limit hotpluggable CPUs. This option sets - ``cpu_possible_mask = cpu_present_mask + additional_cpus`` - - This option is limited to the IA64 architecture. - ``possible_cpus=n`` This option sets ``possible_cpus`` bits in ``cpu_possible_mask``. diff --git a/MAINTAINERS b/MAINTAINERS index 90f13281d297..aac200bb0aca 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -9935,12 +9935,6 @@ F: Documentation/driver-api/i3c F: drivers/i3c/ F: include/linux/i3c/ -IA64 (Itanium) PLATFORM -L: linux-ia64@vger.kernel.org -S: Orphan -F: Documentation/arch/ia64/ -F: arch/ia64/ - IBM Operation Panel Input Driver M: Eddie James L: linux-input@vger.kernel.org @@ -16269,11 +16263,6 @@ L: linux-i2c@vger.kernel.org S: Maintained F: drivers/i2c/muxes/i2c-mux-pca9541.c -PCDP - PRIMARY CONSOLE AND DEBUG PORT -M: Khalid Aziz -S: Maintained -F: drivers/firmware/pcdp.* - PCI DRIVER FOR AARDVARK (Marvell Armada 3700) M: Thomas Petazzoni M: Pali Rohár diff --git a/arch/Kconfig b/arch/Kconfig index 12d51495caec..f4b210ab0612 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -1088,7 +1088,6 @@ config HAVE_ARCH_COMPAT_MMAP_BASES config PAGE_SIZE_LESS_THAN_64KB def_bool y depends on !ARM64_64K_PAGES - depends on !IA64_PAGE_SIZE_64KB depends on !PAGE_SIZE_64KB depends on !PARISC_PAGE_SIZE_64KB depends on PAGE_SIZE_LESS_THAN_256KB diff --git a/arch/ia64/Kbuild b/arch/ia64/Kbuild deleted file mode 100644 index e77cc76d228c..000000000000 --- a/arch/ia64/Kbuild +++ /dev/null @@ -1,3 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only -obj-y += kernel/ mm/ -obj-$(CONFIG_IA64_SGI_UV) += uv/ diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig deleted file mode 100644 index 53faa122b0f4..000000000000 --- a/arch/ia64/Kconfig +++ /dev/null @@ -1,394 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0 -config PGTABLE_LEVELS - int "Page Table Levels" if !IA64_PAGE_SIZE_64KB - range 3 4 if !IA64_PAGE_SIZE_64KB - default 3 - -menu "Processor type and features" - -config IA64 - bool - select ARCH_BINFMT_ELF_EXTRA_PHDRS - select ARCH_HAS_CPU_FINALIZE_INIT - select ARCH_HAS_DMA_MARK_CLEAN - select ARCH_HAS_STRNCPY_FROM_USER - select ARCH_HAS_STRNLEN_USER - select ARCH_MIGHT_HAVE_PC_PARPORT - select ARCH_MIGHT_HAVE_PC_SERIO - select ACPI - select ACPI_NUMA if NUMA - select ARCH_ENABLE_MEMORY_HOTPLUG - select ARCH_ENABLE_MEMORY_HOTREMOVE - select ARCH_SUPPORTS_ACPI - select ACPI_SYSTEM_POWER_STATES_SUPPORT if ACPI - select ARCH_MIGHT_HAVE_ACPI_PDC if ACPI - select FORCE_PCI - select PCI_DOMAINS if PCI - select PCI_MSI - select PCI_SYSCALL if PCI - select HAS_IOPORT - select HAVE_ASM_MODVERSIONS - select HAVE_UNSTABLE_SCHED_CLOCK - select HAVE_EXIT_THREAD - select HAVE_KPROBES - select HAVE_KRETPROBES - select HAVE_FTRACE_MCOUNT_RECORD - select HAVE_DYNAMIC_FTRACE if (!ITANIUM) - select HAVE_FUNCTION_TRACER - select HAVE_SETUP_PER_CPU_AREA - select TTY - select HAVE_ARCH_TRACEHOOK - select HAVE_FUNCTION_DESCRIPTORS - select HAVE_VIRT_CPU_ACCOUNTING - select HUGETLB_PAGE_SIZE_VARIABLE if HUGETLB_PAGE - select GENERIC_IRQ_PROBE - select GENERIC_PENDING_IRQ if SMP - select GENERIC_IRQ_SHOW - select GENERIC_IRQ_LEGACY - select ARCH_HAVE_NMI_SAFE_CMPXCHG - select GENERIC_IOMAP - select GENERIC_IOREMAP - select GENERIC_SMP_IDLE_THREAD - select ARCH_TASK_STRUCT_ON_STACK - select ARCH_TASK_STRUCT_ALLOCATOR - select ARCH_THREAD_STACK_ALLOCATOR - select ARCH_CLOCKSOURCE_DATA - select GENERIC_TIME_VSYSCALL - select LEGACY_TIMER_TICK - select SWIOTLB - select SYSCTL_ARCH_UNALIGN_NO_WARN - select HAVE_MOD_ARCH_SPECIFIC - select MODULES_USE_ELF_RELA - select ARCH_USE_CMPXCHG_LOCKREF - select HAVE_ARCH_AUDITSYSCALL - select NEED_DMA_MAP_STATE - select NEED_SG_DMA_LENGTH - select NUMA if !FLATMEM - select PCI_MSI_ARCH_FALLBACKS if PCI_MSI - select ZONE_DMA32 - select FUNCTION_ALIGNMENT_32B - default y - help - The Itanium Processor Family is Intel's 64-bit successor to - the 32-bit X86 line. The IA-64 Linux project has a home - page at and a mailing list at - . - -config 64BIT - bool - select ATA_NONSTANDARD if ATA - default y - -config MMU - bool - default y - -config STACKTRACE_SUPPORT - def_bool y - -config GENERIC_LOCKBREAK - def_bool n - -config GENERIC_CALIBRATE_DELAY - bool - default y - -config DMI - bool - default y - select DMI_SCAN_MACHINE_NON_EFI_FALLBACK - -config EFI - bool - select UCS2_STRING - default y - -config SCHED_OMIT_FRAME_POINTER - bool - default y - -config IA64_UNCACHED_ALLOCATOR - bool - select GENERIC_ALLOCATOR - -config ARCH_USES_PG_UNCACHED - def_bool y - depends on IA64_UNCACHED_ALLOCATOR - -config AUDIT_ARCH - bool - default y - -choice - prompt "Processor type" - default ITANIUM - -config ITANIUM - bool "Itanium" - help - Select your IA-64 processor type. The default is Itanium. - This choice is safe for all IA-64 systems, but may not perform - optimally on systems with, say, Itanium 2 or newer processors. - -config MCKINLEY - bool "Itanium 2" - help - Select this to configure for an Itanium 2 (McKinley) processor. - -endchoice - -choice - prompt "Kernel page size" - default IA64_PAGE_SIZE_16KB - -config IA64_PAGE_SIZE_4KB - bool "4KB" - help - This lets you select the page size of the kernel. For best IA-64 - performance, a page size of 8KB or 16KB is recommended. For best - IA-32 compatibility, a page size of 4KB should be selected (the vast - majority of IA-32 binaries work perfectly fine with a larger page - size). For Itanium 2 or newer systems, a page size of 64KB can also - be selected. - - 4KB For best IA-32 compatibility - 8KB For best IA-64 performance - 16KB For best IA-64 performance - 64KB Requires Itanium 2 or newer processor. - - If you don't know what to do, choose 16KB. - -config IA64_PAGE_SIZE_8KB - bool "8KB" - -config IA64_PAGE_SIZE_16KB - bool "16KB" - -config IA64_PAGE_SIZE_64KB - depends on !ITANIUM - bool "64KB" - -endchoice - -source "kernel/Kconfig.hz" - -config IA64_BRL_EMU - bool - depends on ITANIUM - default y - -# align cache-sensitive data to 128 bytes -config IA64_L1_CACHE_SHIFT - int - default "7" if MCKINLEY - default "6" if ITANIUM - -config IA64_SGI_UV - bool "SGI-UV support" - help - Selecting this option will add specific support for running on SGI - UV based systems. If you have an SGI UV system or are building a - distro kernel, select this option. - -config IA64_HP_SBA_IOMMU - bool "HP SBA IOMMU support" - select DMA_OPS - default y - help - Say Y here to add support for the SBA IOMMU found on HP zx1 and - sx1000 systems. If you're unsure, answer Y. - -config IA64_CYCLONE - bool "Cyclone (EXA) Time Source support" - help - Say Y here to enable support for IBM EXA Cyclone time source. - If you're unsure, answer N. - -config ARCH_FORCE_MAX_ORDER - int - default "16" if HUGETLB_PAGE - default "10" - -config SMP - bool "Symmetric multi-processing support" - help - This enables support for systems with more than one CPU. If you have - a system with only one CPU, say N. If you have a system with more - than one CPU, say Y. - - If you say N here, the kernel will run on single and multiprocessor - systems, but will use only one CPU of a multiprocessor system. If - you say Y here, the kernel will run on many, but not all, - single processor systems. On a single processor system, the kernel - will run faster if you say N here. - - See also the SMP-HOWTO available at - . - - If you don't know what to do here, say N. - -config NR_CPUS - int "Maximum number of CPUs (2-4096)" - range 2 4096 - depends on SMP - default "4096" - help - You should set this to the number of CPUs in your system, but - keep in mind that a kernel compiled for, e.g., 2 CPUs will boot but - only use 2 CPUs on a >2 CPU system. Setting this to a value larger - than 64 will cause the use of a CPU mask array, causing a small - performance hit. - -config HOTPLUG_CPU - bool "Support for hot-pluggable CPUs" - depends on SMP - default n - help - Say Y here to experiment with turning CPUs off and on. CPUs - can be controlled through /sys/devices/system/cpu/cpu#. - Say N if you want to disable CPU hotplug. - -config SCHED_SMT - bool "SMT scheduler support" - depends on SMP - help - Improves the CPU scheduler's decision making when dealing with - Intel IA64 chips with MultiThreading at a cost of slightly increased - overhead in some places. If unsure say N here. - -config PERMIT_BSP_REMOVE - bool "Support removal of Bootstrap Processor" - depends on HOTPLUG_CPU - default n - help - Say Y here if your platform SAL will support removal of BSP with HOTPLUG_CPU - support. - -config FORCE_CPEI_RETARGET - bool "Force assumption that CPEI can be re-targeted" - depends on PERMIT_BSP_REMOVE - default n - help - Say Y if you need to force the assumption that CPEI can be re-targeted to - any cpu in the system. This hint is available via ACPI 3.0 specifications. - Tiger4 systems are capable of re-directing CPEI to any CPU other than BSP. - This option it useful to enable this feature on older BIOS's as well. - You can also enable this by using boot command line option force_cpei=1. - -config ARCH_SELECT_MEMORY_MODEL - def_bool y - -config ARCH_FLATMEM_ENABLE - def_bool y - -config ARCH_SPARSEMEM_ENABLE - def_bool y - select SPARSEMEM_VMEMMAP_ENABLE - -config ARCH_SPARSEMEM_DEFAULT - def_bool y - depends on ARCH_SPARSEMEM_ENABLE - -config NUMA - bool "NUMA support" - depends on !FLATMEM - select SMP - select USE_PERCPU_NUMA_NODE_ID - help - Say Y to compile the kernel to support NUMA (Non-Uniform Memory - Access). This option is for configuring high-end multiprocessor - server systems. If in doubt, say N. - -config NODES_SHIFT - int "Max num nodes shift(3-10)" - range 3 10 - default "10" - depends on NUMA - help - This option specifies the maximum number of nodes in your SSI system. - MAX_NUMNODES will be 2^(This value). - If in doubt, use the default. - -config HAVE_ARCH_NODEDATA_EXTENSION - def_bool y - depends on NUMA - -config HAVE_MEMORYLESS_NODES - def_bool NUMA - -config ARCH_PROC_KCORE_TEXT - def_bool y - depends on PROC_KCORE - -config IA64_MCA_RECOVERY - bool "MCA recovery from errors other than TLB." - -config IA64_PALINFO - tristate "/proc/pal support" - help - If you say Y here, you are able to get PAL (Processor Abstraction - Layer) information in /proc/pal. This contains useful information - about the processors in your systems, such as cache and TLB sizes - and the PAL firmware version in use. - - To use this option, you have to ensure that the "/proc file system - support" (CONFIG_PROC_FS) is enabled, too. - -config IA64_MC_ERR_INJECT - tristate "MC error injection support" - help - Adds support for MC error injection. If enabled, the kernel - will provide a sysfs interface for user applications to - call MC error injection PAL procedures to inject various errors. - This is a useful tool for MCA testing. - - If you're unsure, do not select this option. - -config IA64_ESI - bool "ESI (Extensible SAL Interface) support" - help - If you say Y here, support is built into the kernel to - make ESI calls. ESI calls are used to support vendor-specific - firmware extensions, such as the ability to inject memory-errors - for test-purposes. If you're unsure, say N. - -config IA64_HP_AML_NFW - bool "Support ACPI AML calls to native firmware" - help - This driver installs a global ACPI Operation Region handler for - region 0xA1. AML methods can use this OpRegion to call arbitrary - native firmware functions. The driver installs the OpRegion - handler if there is an HPQ5001 device or if the user supplies - the "force" module parameter, e.g., with the "aml_nfw.force" - kernel command line option. - -endmenu - -config ARCH_SUPPORTS_KEXEC - def_bool !SMP || HOTPLUG_CPU - -config ARCH_SUPPORTS_CRASH_DUMP - def_bool IA64_MCA_RECOVERY && (!SMP || HOTPLUG_CPU) - -menu "Power management and ACPI options" - -source "kernel/power/Kconfig" - -source "drivers/acpi/Kconfig" - -if PM -menu "CPU Frequency scaling" -source "drivers/cpufreq/Kconfig" -endmenu -endif - -endmenu - -config MSPEC - tristate "Memory special operations driver" - depends on IA64 - select IA64_UNCACHED_ALLOCATOR - help - If you have an ia64 and you want to enable memory special - operations support (formerly known as fetchop), say Y here, - otherwise say N. diff --git a/arch/ia64/Kconfig.debug b/arch/ia64/Kconfig.debug deleted file mode 100644 index 2ce008e2d164..000000000000 --- a/arch/ia64/Kconfig.debug +++ /dev/null @@ -1,55 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0 - -choice - prompt "Physical memory granularity" - default IA64_GRANULE_64MB - -config IA64_GRANULE_16MB - bool "16MB" - help - IA-64 identity-mapped regions use a large page size called "granules". - - Select "16MB" for a small granule size. - Select "64MB" for a large granule size. This is the current default. - -config IA64_GRANULE_64MB - bool "64MB" - depends on BROKEN - -endchoice - -config IA64_PRINT_HAZARDS - bool "Print possible IA-64 dependency violations to console" - depends on DEBUG_KERNEL - help - Selecting this option prints more information for Illegal Dependency - Faults, that is, for Read-after-Write (RAW), Write-after-Write (WAW), - or Write-after-Read (WAR) violations. This option is ignored if you - are compiling for an Itanium A step processor - (CONFIG_ITANIUM_ASTEP_SPECIFIC). If you're unsure, select Y. - -config DISABLE_VHPT - bool "Disable VHPT" - depends on DEBUG_KERNEL - help - The Virtual Hash Page Table (VHPT) enhances virtual address - translation performance. Normally you want the VHPT active but you - can select this option to disable the VHPT for debugging. If you're - unsure, answer N. - -config IA64_DEBUG_CMPXCHG - bool "Turn on compare-and-exchange bug checking (slow!)" - depends on DEBUG_KERNEL && PRINTK - help - Selecting this option turns on bug checking for the IA-64 - compare-and-exchange instructions. This is slow! Itaniums - from step B3 or later don't have this problem. If you're unsure, - select N. - -config IA64_DEBUG_IRQ - bool "Turn on irq debug checks (slow!)" - depends on DEBUG_KERNEL - help - Selecting this option turns on bug checking for the IA-64 irq_save - and restore instructions. It's useful for tracking down spinlock - problems, but slow! If you're unsure, select N. diff --git a/arch/ia64/Makefile b/arch/ia64/Makefile deleted file mode 100644 index d553ab7022fe..000000000000 --- a/arch/ia64/Makefile +++ /dev/null @@ -1,82 +0,0 @@ -# -# ia64/Makefile -# -# This file is included by the global makefile so that you can add your own -# architecture-specific flags and dependencies. -# -# This file is subject to the terms and conditions of the GNU General Public -# License. See the file "COPYING" in the main directory of this archive -# for more details. -# -# Copyright (C) 1998-2004 by David Mosberger-Tang -# - -KBUILD_DEFCONFIG := generic_defconfig - -NM := $(CROSS_COMPILE)nm -B - -CHECKFLAGS += -D__ia64=1 -D__ia64__=1 -D_LP64 -D__LP64__ - -OBJCOPYFLAGS := --strip-all -LDFLAGS_vmlinux := -static -KBUILD_AFLAGS_KERNEL := -mconstant-gp -EXTRA := - -cflags-y := -pipe $(EXTRA) -ffixed-r13 -mfixed-range=f12-f15,f32-f127 \ - -frename-registers -fno-optimize-sibling-calls -KBUILD_CFLAGS_KERNEL := -mconstant-gp - -GAS_STATUS = $(shell $(srctree)/arch/ia64/scripts/check-gas "$(CC)" "$(OBJDUMP)") -KBUILD_CPPFLAGS += $(shell $(srctree)/arch/ia64/scripts/toolchain-flags "$(CC)" "$(OBJDUMP)" "$(READELF)") - -ifeq ($(GAS_STATUS),buggy) -$(error Sorry, you need a newer version of the assember, one that is built from \ - a source-tree that post-dates 18-Dec-2002. You can find a pre-compiled \ - static binary of such an assembler at: \ - \ - ftp://ftp.hpl.hp.com/pub/linux-ia64/gas-030124.tar.gz) -endif - -quiet_cmd_gzip = GZIP $@ -cmd_gzip = cat $(real-prereqs) | $(KGZIP) -n -f -9 > $@ - -quiet_cmd_objcopy = OBJCOPY $@ -cmd_objcopy = $(OBJCOPY) $(OBJCOPYFLAGS) $(OBJCOPYFLAGS_$(@F)) $< $@ - -KBUILD_CFLAGS += $(cflags-y) - -libs-y += arch/ia64/lib/ - -drivers-y += arch/ia64/pci/ arch/ia64/hp/common/ - -PHONY += compressed check - -all: compressed unwcheck - -compressed: vmlinux.gz - -vmlinuz: vmlinux.gz - -vmlinux.gz: vmlinux.bin FORCE - $(call if_changed,gzip) - -vmlinux.bin: vmlinux FORCE - $(call if_changed,objcopy) - -unwcheck: vmlinux - -$(Q)READELF=$(READELF) $(PYTHON3) $(srctree)/arch/ia64/scripts/unwcheck.py $< - -archheaders: - $(Q)$(MAKE) $(build)=arch/ia64/kernel/syscalls all - -CLEAN_FILES += vmlinux.gz - -install: KBUILD_IMAGE := vmlinux.gz -install: - $(call cmd,install) - -define archhelp - echo '* compressed - Build compressed kernel image' - echo ' install - Install compressed kernel image' - echo '* unwcheck - Check vmlinux for invalid unwind info' -endef diff --git a/arch/ia64/configs/bigsur_defconfig b/arch/ia64/configs/bigsur_defconfig deleted file mode 100644 index 7cb96db9a25d..000000000000 --- a/arch/ia64/configs/bigsur_defconfig +++ /dev/null @@ -1,102 +0,0 @@ -CONFIG_SYSVIPC=y -CONFIG_POSIX_MQUEUE=y -CONFIG_LOG_BUF_SHIFT=16 -CONFIG_PROFILING=y -CONFIG_MODULES=y -CONFIG_MODULE_UNLOAD=y -CONFIG_PARTITION_ADVANCED=y -CONFIG_SGI_PARTITION=y -CONFIG_SMP=y -CONFIG_NR_CPUS=2 -CONFIG_PREEMPT=y -CONFIG_IA64_PALINFO=y -CONFIG_BINFMT_MISC=m -CONFIG_ACPI_BUTTON=m -CONFIG_ACPI_FAN=m -CONFIG_ACPI_PROCESSOR=m -CONFIG_NET=y -CONFIG_PACKET=y -CONFIG_UNIX=y -CONFIG_INET=y -# CONFIG_IPV6 is not set -CONFIG_BLK_DEV_LOOP=m -CONFIG_BLK_DEV_NBD=m -CONFIG_BLK_DEV_RAM=m -CONFIG_ATA=m -CONFIG_ATA_GENERIC=m -CONFIG_ATA_PIIX=m -CONFIG_SCSI=y -CONFIG_BLK_DEV_SD=y -CONFIG_SCSI_CONSTANTS=y -CONFIG_SCSI_LOGGING=y -CONFIG_SCSI_SPI_ATTRS=m -CONFIG_SCSI_QLOGIC_1280=y -CONFIG_MD=y -CONFIG_BLK_DEV_MD=m -CONFIG_MD_LINEAR=m -CONFIG_MD_RAID0=m -CONFIG_MD_RAID1=m -CONFIG_MD_RAID10=m -CONFIG_MD_MULTIPATH=m -CONFIG_BLK_DEV_DM=m -CONFIG_DM_CRYPT=m -CONFIG_DM_SNAPSHOT=m -CONFIG_DM_MIRROR=m -CONFIG_DM_ZERO=m -CONFIG_NETDEVICES=y -CONFIG_DUMMY=y -CONFIG_INPUT_EVDEV=y -CONFIG_SERIAL_8250=y -CONFIG_SERIAL_8250_CONSOLE=y -CONFIG_SERIAL_8250_EXTENDED=y -CONFIG_SERIAL_8250_SHARE_IRQ=y -# CONFIG_HW_RANDOM is not set -CONFIG_RTC_CLASS=y -CONFIG_RTC_DRV_EFI=y -CONFIG_I2C=y -CONFIG_I2C_CHARDEV=y -CONFIG_AGP=m -CONFIG_AGP_I460=m -CONFIG_DRM=m -CONFIG_DRM_R128=m -CONFIG_SOUND=m -CONFIG_SND=m -CONFIG_SND_SEQUENCER=m -CONFIG_SND_MIXER_OSS=m -CONFIG_SND_PCM_OSS=m -CONFIG_SND_CS4281=m -CONFIG_USB_HIDDEV=y -CONFIG_USB=m -CONFIG_USB_MON=m -CONFIG_USB_UHCI_HCD=m -CONFIG_USB_ACM=m -CONFIG_USB_PRINTER=m -CONFIG_USB_STORAGE=m -CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y -CONFIG_XFS_FS=y -CONFIG_XFS_QUOTA=y -CONFIG_XFS_POSIX_ACL=y -CONFIG_AUTOFS_FS=m -CONFIG_ISO9660_FS=m -CONFIG_JOLIET=y -CONFIG_UDF_FS=m -CONFIG_VFAT_FS=y -CONFIG_PROC_KCORE=y -CONFIG_TMPFS=y -CONFIG_HUGETLBFS=y -CONFIG_NFS_FS=m -CONFIG_NFS_V4=m -CONFIG_NFSD=m -CONFIG_NFSD_V4=y -CONFIG_CIFS=m -CONFIG_CIFS_XATTR=y -CONFIG_CIFS_POSIX=y -CONFIG_NLS_CODEPAGE_437=y -CONFIG_NLS_ISO8859_1=y -CONFIG_NLS_UTF8=m -CONFIG_MAGIC_SYSRQ=y -CONFIG_DEBUG_KERNEL=y -CONFIG_DEBUG_MUTEXES=y -CONFIG_CRYPTO_MD5=y -CONFIG_CRYPTO_DES=y diff --git a/arch/ia64/configs/generic_defconfig b/arch/ia64/configs/generic_defconfig deleted file mode 100644 index 4581240013dd..000000000000 --- a/arch/ia64/configs/generic_defconfig +++ /dev/null @@ -1,206 +0,0 @@ -CONFIG_SYSVIPC=y -CONFIG_POSIX_MQUEUE=y -CONFIG_IKCONFIG=y -CONFIG_IKCONFIG_PROC=y -CONFIG_LOG_BUF_SHIFT=20 -CONFIG_CGROUPS=y -CONFIG_CPUSETS=y -CONFIG_BLK_DEV_INITRD=y -CONFIG_KALLSYMS_ALL=y -CONFIG_MODULES=y -CONFIG_MODULE_UNLOAD=y -CONFIG_MODVERSIONS=y -CONFIG_PARTITION_ADVANCED=y -CONFIG_SGI_PARTITION=y -CONFIG_MCKINLEY=y -CONFIG_IA64_PAGE_SIZE_64KB=y -CONFIG_IA64_CYCLONE=y -CONFIG_SMP=y -CONFIG_HOTPLUG_CPU=y -CONFIG_IA64_MCA_RECOVERY=y -CONFIG_IA64_PALINFO=y -CONFIG_KEXEC=y -CONFIG_CRASH_DUMP=y -CONFIG_BINFMT_MISC=m -CONFIG_ACPI_BUTTON=m -CONFIG_ACPI_FAN=m -CONFIG_ACPI_DOCK=y -CONFIG_ACPI_PROCESSOR=m -CONFIG_HOTPLUG_PCI=y -CONFIG_HOTPLUG_PCI_ACPI=y -CONFIG_NET=y -CONFIG_PACKET=y -CONFIG_UNIX=y -CONFIG_INET=y -CONFIG_IP_MULTICAST=y -CONFIG_SYN_COOKIES=y -# CONFIG_IPV6 is not set -CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" -CONFIG_CONNECTOR=y -# CONFIG_PNP_DEBUG_MESSAGES is not set -CONFIG_BLK_DEV_LOOP=m -CONFIG_BLK_DEV_NBD=m -CONFIG_BLK_DEV_RAM=y -CONFIG_SGI_XP=m -CONFIG_ATA=y -CONFIG_ATA_GENERIC=y -CONFIG_PATA_CMD64X=y -CONFIG_ATA_PIIX=y -CONFIG_BLK_DEV_SD=y -CONFIG_CHR_DEV_ST=m -CONFIG_BLK_DEV_SR=m -CONFIG_CHR_DEV_SG=m -CONFIG_SCSI_FC_ATTRS=y -CONFIG_SCSI_SYM53C8XX_2=y -CONFIG_SCSI_QLOGIC_1280=y -CONFIG_SATA_VITESSE=y -CONFIG_MD=y -CONFIG_BLK_DEV_MD=m -CONFIG_MD_LINEAR=m -CONFIG_MD_RAID0=m -CONFIG_MD_RAID1=m -CONFIG_MD_MULTIPATH=m -CONFIG_BLK_DEV_DM=m -CONFIG_DM_CRYPT=m -CONFIG_DM_SNAPSHOT=m -CONFIG_DM_MIRROR=m -CONFIG_DM_ZERO=m -CONFIG_DM_MULTIPATH=m -CONFIG_FUSION=y -CONFIG_FUSION_SPI=y -CONFIG_FUSION_FC=m -CONFIG_FUSION_SAS=y -CONFIG_NETDEVICES=y -CONFIG_DUMMY=m -CONFIG_NETCONSOLE=y -CONFIG_TIGON3=y -CONFIG_NET_TULIP=y -CONFIG_TULIP=m -CONFIG_E100=m -CONFIG_E1000=y -CONFIG_IGB=y -# CONFIG_SERIO_SERPORT is not set -CONFIG_GAMEPORT=m -CONFIG_SERIAL_NONSTANDARD=y -CONFIG_SERIAL_8250=y -CONFIG_SERIAL_8250_CONSOLE=y -CONFIG_SERIAL_8250_NR_UARTS=6 -CONFIG_SERIAL_8250_EXTENDED=y -CONFIG_SERIAL_8250_SHARE_IRQ=y -# CONFIG_HW_RANDOM is not set -CONFIG_RTC_CLASS=y -CONFIG_RTC_DRV_EFI=y -CONFIG_HPET=y -CONFIG_AGP=m -CONFIG_AGP_I460=m -CONFIG_AGP_HP_ZX1=m -CONFIG_DRM=m -CONFIG_DRM_TDFX=m -CONFIG_DRM_R128=m -CONFIG_DRM_RADEON=m -CONFIG_DRM_MGA=m -CONFIG_DRM_SIS=m -CONFIG_SOUND=m -CONFIG_SND=m -CONFIG_SND_SEQUENCER=m -CONFIG_SND_SEQ_DUMMY=m -CONFIG_SND_MIXER_OSS=m -CONFIG_SND_PCM_OSS=m -CONFIG_SND_SEQUENCER_OSS=y -CONFIG_SND_VERBOSE_PRINTK=y -CONFIG_SND_DUMMY=m -CONFIG_SND_VIRMIDI=m -CONFIG_SND_MTPAV=m -CONFIG_SND_SERIAL_U16550=m -CONFIG_SND_MPU401=m -CONFIG_SND_CS4281=m -CONFIG_SND_CS46XX=m -CONFIG_SND_EMU10K1=m -CONFIG_SND_FM801=m -CONFIG_HID_GYRATION=m -CONFIG_HID_PANTHERLORD=m -CONFIG_HID_PETALYNX=m -CONFIG_HID_SAMSUNG=m -CONFIG_HID_SONY=m -CONFIG_HID_SUNPLUS=m -CONFIG_USB=m -CONFIG_USB_MON=m -CONFIG_USB_EHCI_HCD=m -CONFIG_USB_OHCI_HCD=m -CONFIG_USB_UHCI_HCD=m -CONFIG_USB_STORAGE=m -CONFIG_INFINIBAND=m -CONFIG_INFINIBAND_MTHCA=m -CONFIG_INFINIBAND_IPOIB=m -CONFIG_INTEL_IOMMU=y -CONFIG_MSPEC=m -CONFIG_EXT2_FS=y -CONFIG_EXT2_FS_XATTR=y -CONFIG_EXT2_FS_POSIX_ACL=y -CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT3_FS=y -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_EXT3_FS_SECURITY=y -CONFIG_REISERFS_FS=y -CONFIG_REISERFS_FS_XATTR=y -CONFIG_REISERFS_FS_POSIX_ACL=y -CONFIG_REISERFS_FS_SECURITY=y -CONFIG_XFS_FS=y -CONFIG_AUTOFS_FS=m -CONFIG_ISO9660_FS=m -CONFIG_JOLIET=y -CONFIG_UDF_FS=m -CONFIG_VFAT_FS=y -CONFIG_NTFS_FS=m -CONFIG_PROC_KCORE=y -CONFIG_TMPFS=y -CONFIG_HUGETLBFS=y -CONFIG_NFS_FS=m -CONFIG_NFS_V4=m -CONFIG_NFSD=m -CONFIG_NFSD_V4=y -CONFIG_CIFS=m -CONFIG_NLS_CODEPAGE_437=y -CONFIG_NLS_CODEPAGE_737=m -CONFIG_NLS_CODEPAGE_775=m -CONFIG_NLS_CODEPAGE_850=m -CONFIG_NLS_CODEPAGE_852=m -CONFIG_NLS_CODEPAGE_855=m -CONFIG_NLS_CODEPAGE_857=m -CONFIG_NLS_CODEPAGE_860=m -CONFIG_NLS_CODEPAGE_861=m -CONFIG_NLS_CODEPAGE_862=m -CONFIG_NLS_CODEPAGE_863=m -CONFIG_NLS_CODEPAGE_864=m -CONFIG_NLS_CODEPAGE_865=m -CONFIG_NLS_CODEPAGE_866=m -CONFIG_NLS_CODEPAGE_869=m -CONFIG_NLS_CODEPAGE_936=m -CONFIG_NLS_CODEPAGE_950=m -CONFIG_NLS_CODEPAGE_932=m -CONFIG_NLS_CODEPAGE_949=m -CONFIG_NLS_CODEPAGE_874=m -CONFIG_NLS_ISO8859_8=m -CONFIG_NLS_CODEPAGE_1250=m -CONFIG_NLS_CODEPAGE_1251=m -CONFIG_NLS_ISO8859_1=y -CONFIG_NLS_ISO8859_2=m -CONFIG_NLS_ISO8859_3=m -CONFIG_NLS_ISO8859_4=m -CONFIG_NLS_ISO8859_5=m -CONFIG_NLS_ISO8859_6=m -CONFIG_NLS_ISO8859_7=m -CONFIG_NLS_ISO8859_9=m -CONFIG_NLS_ISO8859_13=m -CONFIG_NLS_ISO8859_14=m -CONFIG_NLS_ISO8859_15=m -CONFIG_NLS_KOI8_R=m -CONFIG_NLS_KOI8_U=m -CONFIG_NLS_UTF8=m -CONFIG_MAGIC_SYSRQ=y -CONFIG_DEBUG_KERNEL=y -CONFIG_DEBUG_MUTEXES=y -CONFIG_CRYPTO_PCBC=m -CONFIG_CRYPTO_MD5=y -# CONFIG_CRYPTO_ANSI_CPRNG is not set -CONFIG_CRC_T10DIF=y diff --git a/arch/ia64/configs/gensparse_defconfig b/arch/ia64/configs/gensparse_defconfig deleted file mode 100644 index c9e806616544..000000000000 --- a/arch/ia64/configs/gensparse_defconfig +++ /dev/null @@ -1,184 +0,0 @@ -CONFIG_SYSVIPC=y -CONFIG_POSIX_MQUEUE=y -CONFIG_IKCONFIG=y -CONFIG_IKCONFIG_PROC=y -CONFIG_LOG_BUF_SHIFT=20 -CONFIG_BLK_DEV_INITRD=y -CONFIG_KALLSYMS_ALL=y -CONFIG_MODULES=y -CONFIG_MODULE_UNLOAD=y -CONFIG_MODVERSIONS=y -CONFIG_PARTITION_ADVANCED=y -CONFIG_SGI_PARTITION=y -CONFIG_MCKINLEY=y -CONFIG_IA64_CYCLONE=y -CONFIG_SMP=y -CONFIG_NR_CPUS=512 -CONFIG_HOTPLUG_CPU=y -CONFIG_SPARSEMEM_MANUAL=y -CONFIG_IA64_MCA_RECOVERY=y -CONFIG_IA64_PALINFO=y -CONFIG_BINFMT_MISC=m -CONFIG_ACPI_BUTTON=m -CONFIG_ACPI_FAN=m -CONFIG_ACPI_PROCESSOR=m -CONFIG_HOTPLUG_PCI=y -CONFIG_NET=y -CONFIG_PACKET=y -CONFIG_UNIX=y -CONFIG_INET=y -CONFIG_IP_MULTICAST=y -CONFIG_SYN_COOKIES=y -# CONFIG_IPV6 is not set -CONFIG_BLK_DEV_LOOP=m -CONFIG_BLK_DEV_NBD=m -CONFIG_BLK_DEV_RAM=y -CONFIG_ATA=y -CONFIG_ATA_GENERIC=y -CONFIG_PATA_CMD64X=y -CONFIG_ATA_PIIX=y -CONFIG_SCSI=y -CONFIG_BLK_DEV_SD=y -CONFIG_CHR_DEV_ST=m -CONFIG_BLK_DEV_SR=m -CONFIG_CHR_DEV_SG=m -CONFIG_SCSI_FC_ATTRS=y -CONFIG_SCSI_SYM53C8XX_2=y -CONFIG_SCSI_QLOGIC_1280=y -CONFIG_MD=y -CONFIG_BLK_DEV_MD=m -CONFIG_MD_LINEAR=m -CONFIG_MD_RAID0=m -CONFIG_MD_RAID1=m -CONFIG_MD_MULTIPATH=m -CONFIG_BLK_DEV_DM=m -CONFIG_DM_CRYPT=m -CONFIG_DM_SNAPSHOT=m -CONFIG_DM_MIRROR=m -CONFIG_DM_ZERO=m -CONFIG_DM_MULTIPATH=m -CONFIG_FUSION=y -CONFIG_FUSION_SPI=y -CONFIG_FUSION_FC=m -CONFIG_NETDEVICES=y -CONFIG_DUMMY=m -CONFIG_NETCONSOLE=y -CONFIG_TIGON3=y -CONFIG_NET_TULIP=y -CONFIG_TULIP=m -CONFIG_E100=m -CONFIG_E1000=y -# CONFIG_SERIO_SERPORT is not set -CONFIG_GAMEPORT=m -CONFIG_SERIAL_NONSTANDARD=y -CONFIG_SERIAL_8250=y -CONFIG_SERIAL_8250_CONSOLE=y -CONFIG_SERIAL_8250_NR_UARTS=6 -CONFIG_SERIAL_8250_EXTENDED=y -CONFIG_SERIAL_8250_SHARE_IRQ=y -# CONFIG_HW_RANDOM is not set -CONFIG_RTC_CLASS=y -CONFIG_RTC_DRV_EFI=y -CONFIG_HPET=y -CONFIG_AGP=m -CONFIG_AGP_I460=m -CONFIG_AGP_HP_ZX1=m -CONFIG_DRM=m -CONFIG_DRM_TDFX=m -CONFIG_DRM_R128=m -CONFIG_DRM_RADEON=m -CONFIG_DRM_MGA=m -CONFIG_DRM_SIS=m -CONFIG_SOUND=m -CONFIG_SND=m -CONFIG_SND_SEQUENCER=m -CONFIG_SND_SEQ_DUMMY=m -CONFIG_SND_MIXER_OSS=m -CONFIG_SND_PCM_OSS=m -CONFIG_SND_SEQUENCER_OSS=y -CONFIG_SND_VERBOSE_PRINTK=y -CONFIG_SND_DUMMY=m -CONFIG_SND_VIRMIDI=m -CONFIG_SND_MTPAV=m -CONFIG_SND_SERIAL_U16550=m -CONFIG_SND_MPU401=m -CONFIG_SND_CS4281=m -CONFIG_SND_CS46XX=m -CONFIG_SND_EMU10K1=m -CONFIG_SND_FM801=m -CONFIG_USB=m -CONFIG_USB_MON=m -CONFIG_USB_EHCI_HCD=m -CONFIG_USB_OHCI_HCD=m -CONFIG_USB_UHCI_HCD=m -CONFIG_USB_STORAGE=m -CONFIG_INFINIBAND=m -CONFIG_INFINIBAND_MTHCA=m -CONFIG_INFINIBAND_IPOIB=m -CONFIG_EXT2_FS=y -CONFIG_EXT2_FS_XATTR=y -CONFIG_EXT2_FS_POSIX_ACL=y -CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT3_FS=y -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_EXT3_FS_SECURITY=y -CONFIG_REISERFS_FS=y -CONFIG_REISERFS_FS_XATTR=y -CONFIG_REISERFS_FS_POSIX_ACL=y -CONFIG_REISERFS_FS_SECURITY=y -CONFIG_XFS_FS=y -CONFIG_AUTOFS_FS=y -CONFIG_ISO9660_FS=m -CONFIG_JOLIET=y -CONFIG_UDF_FS=m -CONFIG_VFAT_FS=y -CONFIG_NTFS_FS=m -CONFIG_PROC_KCORE=y -CONFIG_TMPFS=y -CONFIG_HUGETLBFS=y -CONFIG_NFS_FS=m -CONFIG_NFS_V4=m -CONFIG_NFSD=m -CONFIG_NFSD_V4=y -CONFIG_CIFS=m -CONFIG_NLS_CODEPAGE_437=y -CONFIG_NLS_CODEPAGE_737=m -CONFIG_NLS_CODEPAGE_775=m -CONFIG_NLS_CODEPAGE_850=m -CONFIG_NLS_CODEPAGE_852=m -CONFIG_NLS_CODEPAGE_855=m -CONFIG_NLS_CODEPAGE_857=m -CONFIG_NLS_CODEPAGE_860=m -CONFIG_NLS_CODEPAGE_861=m -CONFIG_NLS_CODEPAGE_862=m -CONFIG_NLS_CODEPAGE_863=m -CONFIG_NLS_CODEPAGE_864=m -CONFIG_NLS_CODEPAGE_865=m -CONFIG_NLS_CODEPAGE_866=m -CONFIG_NLS_CODEPAGE_869=m -CONFIG_NLS_CODEPAGE_936=m -CONFIG_NLS_CODEPAGE_950=m -CONFIG_NLS_CODEPAGE_932=m -CONFIG_NLS_CODEPAGE_949=m -CONFIG_NLS_CODEPAGE_874=m -CONFIG_NLS_ISO8859_8=m -CONFIG_NLS_CODEPAGE_1250=m -CONFIG_NLS_CODEPAGE_1251=m -CONFIG_NLS_ISO8859_1=y -CONFIG_NLS_ISO8859_2=m -CONFIG_NLS_ISO8859_3=m -CONFIG_NLS_ISO8859_4=m -CONFIG_NLS_ISO8859_5=m -CONFIG_NLS_ISO8859_6=m -CONFIG_NLS_ISO8859_7=m -CONFIG_NLS_ISO8859_9=m -CONFIG_NLS_ISO8859_13=m -CONFIG_NLS_ISO8859_14=m -CONFIG_NLS_ISO8859_15=m -CONFIG_NLS_KOI8_R=m -CONFIG_NLS_KOI8_U=m -CONFIG_NLS_UTF8=m -CONFIG_MAGIC_SYSRQ=y -CONFIG_DEBUG_KERNEL=y -CONFIG_DEBUG_MUTEXES=y -CONFIG_CRYPTO_MD5=y diff --git a/arch/ia64/configs/tiger_defconfig b/arch/ia64/configs/tiger_defconfig deleted file mode 100644 index d7d8fb5c7b71..000000000000 --- a/arch/ia64/configs/tiger_defconfig +++ /dev/null @@ -1,169 +0,0 @@ -CONFIG_SYSVIPC=y -CONFIG_POSIX_MQUEUE=y -CONFIG_IKCONFIG=y -CONFIG_IKCONFIG_PROC=y -CONFIG_LOG_BUF_SHIFT=20 -CONFIG_BLK_DEV_INITRD=y -CONFIG_KALLSYMS_ALL=y -CONFIG_MODULES=y -CONFIG_MODULE_UNLOAD=y -CONFIG_MODVERSIONS=y -CONFIG_MODULE_SRCVERSION_ALL=y -# CONFIG_BLK_DEV_BSG is not set -CONFIG_PARTITION_ADVANCED=y -CONFIG_SGI_PARTITION=y -CONFIG_MCKINLEY=y -CONFIG_IA64_PAGE_SIZE_64KB=y -CONFIG_IA64_CYCLONE=y -CONFIG_SMP=y -CONFIG_NR_CPUS=16 -CONFIG_HOTPLUG_CPU=y -CONFIG_PERMIT_BSP_REMOVE=y -CONFIG_FORCE_CPEI_RETARGET=y -CONFIG_IA64_MCA_RECOVERY=y -CONFIG_IA64_PALINFO=y -CONFIG_KEXEC=y -CONFIG_BINFMT_MISC=m -CONFIG_ACPI_BUTTON=m -CONFIG_ACPI_FAN=m -CONFIG_ACPI_PROCESSOR=m -CONFIG_HOTPLUG_PCI=y -CONFIG_NET=y -CONFIG_PACKET=y -CONFIG_UNIX=y -CONFIG_INET=y -CONFIG_IP_MULTICAST=y -CONFIG_SYN_COOKIES=y -# CONFIG_IPV6 is not set -CONFIG_BLK_DEV_LOOP=m -CONFIG_BLK_DEV_NBD=m -CONFIG_BLK_DEV_RAM=y -CONFIG_ATA=y -CONFIG_ATA_GENERIC=y -CONFIG_PATA_CMD64X=y -CONFIG_ATA_PIIX=y -CONFIG_SCSI=y -CONFIG_BLK_DEV_SD=y -CONFIG_CHR_DEV_ST=m -CONFIG_BLK_DEV_SR=m -CONFIG_CHR_DEV_SG=m -CONFIG_SCSI_FC_ATTRS=y -CONFIG_SCSI_SYM53C8XX_2=y -CONFIG_SCSI_QLOGIC_1280=y -CONFIG_MD=y -CONFIG_BLK_DEV_MD=m -CONFIG_MD_LINEAR=m -CONFIG_MD_RAID0=m -CONFIG_MD_RAID1=m -CONFIG_MD_MULTIPATH=m -CONFIG_BLK_DEV_DM=m -CONFIG_DM_CRYPT=m -CONFIG_DM_SNAPSHOT=m -CONFIG_DM_MIRROR=m -CONFIG_DM_ZERO=m -CONFIG_FUSION=y -CONFIG_FUSION_SPI=y -CONFIG_FUSION_FC=y -CONFIG_FUSION_CTL=y -CONFIG_NETDEVICES=y -CONFIG_DUMMY=m -CONFIG_NETCONSOLE=y -CONFIG_TIGON3=y -CONFIG_NET_TULIP=y -CONFIG_TULIP=m -CONFIG_E100=m -CONFIG_E1000=y -# CONFIG_SERIO_SERPORT is not set -CONFIG_GAMEPORT=m -CONFIG_SERIAL_NONSTANDARD=y -CONFIG_SERIAL_8250=y -CONFIG_SERIAL_8250_CONSOLE=y -CONFIG_SERIAL_8250_NR_UARTS=6 -CONFIG_SERIAL_8250_EXTENDED=y -CONFIG_SERIAL_8250_SHARE_IRQ=y -# CONFIG_HW_RANDOM is not set -CONFIG_RTC_CLASS=y -CONFIG_RTC_DRV_EFI=y -CONFIG_HPET=y -CONFIG_AGP=m -CONFIG_AGP_I460=m -CONFIG_DRM=m -CONFIG_DRM_TDFX=m -CONFIG_DRM_R128=m -CONFIG_DRM_RADEON=m -CONFIG_DRM_MGA=m -CONFIG_DRM_SIS=m -CONFIG_USB=y -CONFIG_USB_EHCI_HCD=m -CONFIG_USB_OHCI_HCD=m -CONFIG_USB_UHCI_HCD=y -CONFIG_USB_STORAGE=m -CONFIG_EXT2_FS=y -CONFIG_EXT2_FS_XATTR=y -CONFIG_EXT2_FS_POSIX_ACL=y -CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT3_FS=y -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_EXT3_FS_SECURITY=y -CONFIG_REISERFS_FS=y -CONFIG_REISERFS_FS_XATTR=y -CONFIG_REISERFS_FS_POSIX_ACL=y -CONFIG_REISERFS_FS_SECURITY=y -CONFIG_XFS_FS=y -CONFIG_AUTOFS_FS=y -CONFIG_ISO9660_FS=m -CONFIG_JOLIET=y -CONFIG_UDF_FS=m -CONFIG_VFAT_FS=y -CONFIG_NTFS_FS=m -CONFIG_PROC_KCORE=y -CONFIG_TMPFS=y -CONFIG_HUGETLBFS=y -CONFIG_NFS_FS=m -CONFIG_NFS_V4=m -CONFIG_NFSD=m -CONFIG_NFSD_V4=y -CONFIG_CIFS=m -CONFIG_NLS_CODEPAGE_437=y -CONFIG_NLS_CODEPAGE_737=m -CONFIG_NLS_CODEPAGE_775=m -CONFIG_NLS_CODEPAGE_850=m -CONFIG_NLS_CODEPAGE_852=m -CONFIG_NLS_CODEPAGE_855=m -CONFIG_NLS_CODEPAGE_857=m -CONFIG_NLS_CODEPAGE_860=m -CONFIG_NLS_CODEPAGE_861=m -CONFIG_NLS_CODEPAGE_862=m -CONFIG_NLS_CODEPAGE_863=m -CONFIG_NLS_CODEPAGE_864=m -CONFIG_NLS_CODEPAGE_865=m -CONFIG_NLS_CODEPAGE_866=m -CONFIG_NLS_CODEPAGE_869=m -CONFIG_NLS_CODEPAGE_936=m -CONFIG_NLS_CODEPAGE_950=m -CONFIG_NLS_CODEPAGE_932=m -CONFIG_NLS_CODEPAGE_949=m -CONFIG_NLS_CODEPAGE_874=m -CONFIG_NLS_ISO8859_8=m -CONFIG_NLS_CODEPAGE_1250=m -CONFIG_NLS_CODEPAGE_1251=m -CONFIG_NLS_ISO8859_1=y -CONFIG_NLS_ISO8859_2=m -CONFIG_NLS_ISO8859_3=m -CONFIG_NLS_ISO8859_4=m -CONFIG_NLS_ISO8859_5=m -CONFIG_NLS_ISO8859_6=m -CONFIG_NLS_ISO8859_7=m -CONFIG_NLS_ISO8859_9=m -CONFIG_NLS_ISO8859_13=m -CONFIG_NLS_ISO8859_14=m -CONFIG_NLS_ISO8859_15=m -CONFIG_NLS_KOI8_R=m -CONFIG_NLS_KOI8_U=m -CONFIG_NLS_UTF8=m -CONFIG_MAGIC_SYSRQ=y -CONFIG_DEBUG_KERNEL=y -CONFIG_DEBUG_MUTEXES=y -CONFIG_IA64_GRANULE_16MB=y -CONFIG_CRYPTO_PCBC=m -CONFIG_CRYPTO_MD5=y diff --git a/arch/ia64/configs/zx1_defconfig b/arch/ia64/configs/zx1_defconfig deleted file mode 100644 index ed104550d0d5..000000000000 --- a/arch/ia64/configs/zx1_defconfig +++ /dev/null @@ -1,148 +0,0 @@ -CONFIG_SYSVIPC=y -CONFIG_BSD_PROCESS_ACCT=y -CONFIG_BLK_DEV_INITRD=y -CONFIG_KPROBES=y -CONFIG_MODULES=y -CONFIG_PARTITION_ADVANCED=y -CONFIG_MCKINLEY=y -CONFIG_SMP=y -CONFIG_NR_CPUS=16 -CONFIG_HOTPLUG_CPU=y -CONFIG_FLATMEM_MANUAL=y -CONFIG_IA64_MCA_RECOVERY=y -CONFIG_IA64_PALINFO=y -CONFIG_CRASH_DUMP=y -CONFIG_BINFMT_MISC=y -CONFIG_HOTPLUG_PCI=y -CONFIG_HOTPLUG_PCI_ACPI=y -CONFIG_NET=y -CONFIG_PACKET=y -CONFIG_UNIX=y -CONFIG_INET=y -CONFIG_IP_MULTICAST=y -# CONFIG_IPV6 is not set -CONFIG_NETFILTER=y -CONFIG_BLK_DEV_LOOP=y -CONFIG_BLK_DEV_RAM=y -CONFIG_ATA=y -CONFIG_ATA_GENERIC=y -CONFIG_PATA_CMD64X=y -CONFIG_SCSI=y -CONFIG_BLK_DEV_SD=y -CONFIG_CHR_DEV_ST=y -CONFIG_BLK_DEV_SR=y -CONFIG_CHR_DEV_SG=y -CONFIG_SCSI_CONSTANTS=y -CONFIG_SCSI_LOGGING=y -CONFIG_SCSI_FC_ATTRS=y -CONFIG_SCSI_SYM53C8XX_2=y -CONFIG_SCSI_QLOGIC_1280=y -CONFIG_FUSION=y -CONFIG_FUSION_SPI=y -CONFIG_FUSION_FC=y -CONFIG_FUSION_CTL=m -CONFIG_NETDEVICES=y -CONFIG_DUMMY=y -CONFIG_TIGON3=y -CONFIG_NET_TULIP=y -CONFIG_TULIP=y -CONFIG_TULIP_MWI=y -CONFIG_TULIP_MMIO=y -CONFIG_TULIP_NAPI=y -CONFIG_TULIP_NAPI_HW_MITIGATION=y -CONFIG_E100=y -CONFIG_E1000=y -CONFIG_INPUT_JOYDEV=y -CONFIG_INPUT_EVDEV=y -# CONFIG_INPUT_KEYBOARD is not set -# CONFIG_INPUT_MOUSE is not set -# CONFIG_SERIO_I8042 is not set -# CONFIG_SERIO_SERPORT is not set -CONFIG_SERIAL_8250=y -CONFIG_SERIAL_8250_CONSOLE=y -CONFIG_SERIAL_8250_NR_UARTS=8 -CONFIG_SERIAL_8250_EXTENDED=y -CONFIG_SERIAL_8250_SHARE_IRQ=y -# CONFIG_HW_RANDOM is not set -CONFIG_RTC_CLASS=y -CONFIG_RTC_DRV_EFI=y -CONFIG_I2C_CHARDEV=y -CONFIG_AGP=y -CONFIG_AGP_HP_ZX1=y -CONFIG_DRM=y -CONFIG_DRM_RADEON=y -CONFIG_FB_RADEON=y -CONFIG_FB_RADEON_DEBUG=y -CONFIG_LOGO=y -# CONFIG_LOGO_LINUX_MONO is not set -# CONFIG_LOGO_LINUX_VGA16 is not set -CONFIG_SOUND=y -CONFIG_SND=y -CONFIG_SND_SEQUENCER=y -CONFIG_SND_MIXER_OSS=y -CONFIG_SND_PCM_OSS=y -CONFIG_SND_SEQUENCER_OSS=y -CONFIG_SND_FM801=y -CONFIG_USB_HIDDEV=y -CONFIG_USB=y -CONFIG_USB_MON=y -CONFIG_USB_EHCI_HCD=y -CONFIG_USB_OHCI_HCD=y -CONFIG_USB_UHCI_HCD=y -CONFIG_USB_STORAGE=y -CONFIG_EXT2_FS=y -CONFIG_EXT2_FS_XATTR=y -CONFIG_EXT3_FS=y -CONFIG_ISO9660_FS=y -CONFIG_JOLIET=y -CONFIG_UDF_FS=y -CONFIG_MSDOS_FS=y -CONFIG_VFAT_FS=y -CONFIG_PROC_KCORE=y -CONFIG_TMPFS=y -CONFIG_HUGETLBFS=y -CONFIG_NFS_FS=y -CONFIG_NFS_V4=y -CONFIG_NFSD=y -CONFIG_NLS_CODEPAGE_437=y -CONFIG_NLS_CODEPAGE_737=y -CONFIG_NLS_CODEPAGE_775=y -CONFIG_NLS_CODEPAGE_850=y -CONFIG_NLS_CODEPAGE_852=y -CONFIG_NLS_CODEPAGE_855=y -CONFIG_NLS_CODEPAGE_857=y -CONFIG_NLS_CODEPAGE_860=y -CONFIG_NLS_CODEPAGE_861=y -CONFIG_NLS_CODEPAGE_862=y -CONFIG_NLS_CODEPAGE_863=y -CONFIG_NLS_CODEPAGE_864=y -CONFIG_NLS_CODEPAGE_865=y -CONFIG_NLS_CODEPAGE_866=y -CONFIG_NLS_CODEPAGE_869=y -CONFIG_NLS_CODEPAGE_936=y -CONFIG_NLS_CODEPAGE_950=y -CONFIG_NLS_CODEPAGE_932=y -CONFIG_NLS_CODEPAGE_949=y -CONFIG_NLS_CODEPAGE_874=y -CONFIG_NLS_ISO8859_8=y -CONFIG_NLS_CODEPAGE_1251=y -CONFIG_NLS_ISO8859_1=y -CONFIG_NLS_ISO8859_2=y -CONFIG_NLS_ISO8859_3=y -CONFIG_NLS_ISO8859_4=y -CONFIG_NLS_ISO8859_5=y -CONFIG_NLS_ISO8859_6=y -CONFIG_NLS_ISO8859_7=y -CONFIG_NLS_ISO8859_9=y -CONFIG_NLS_ISO8859_13=y -CONFIG_NLS_ISO8859_14=y -CONFIG_NLS_ISO8859_15=y -CONFIG_NLS_KOI8_R=y -CONFIG_NLS_KOI8_U=y -CONFIG_NLS_UTF8=y -CONFIG_MAGIC_SYSRQ=y -CONFIG_DEBUG_KERNEL=y -CONFIG_DEBUG_MUTEXES=y -CONFIG_IA64_PRINT_HAZARDS=y -CONFIG_CRYPTO_ECB=m -CONFIG_CRYPTO_PCBC=m diff --git a/arch/ia64/hp/common/Makefile b/arch/ia64/hp/common/Makefile deleted file mode 100644 index 11a56ed38229..000000000000 --- a/arch/ia64/hp/common/Makefile +++ /dev/null @@ -1,10 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only -# -# ia64/platform/hp/common/Makefile -# -# Copyright (C) 2002 Hewlett Packard -# Copyright (C) Alex Williamson (alex_williamson@hp.com) -# - -obj-$(CONFIG_IA64_HP_SBA_IOMMU) += sba_iommu.o -obj-$(CONFIG_IA64_HP_AML_NFW) += aml_nfw.o diff --git a/arch/ia64/hp/common/aml_nfw.c b/arch/ia64/hp/common/aml_nfw.c deleted file mode 100644 index 901df49461a0..000000000000 --- a/arch/ia64/hp/common/aml_nfw.c +++ /dev/null @@ -1,232 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * OpRegion handler to allow AML to call native firmware - * - * (c) Copyright 2007 Hewlett-Packard Development Company, L.P. - * Bjorn Helgaas - * - * This driver implements HP Open Source Review Board proposal 1842, - * which was approved on 9/20/2006. - * - * For technical documentation, see the HP SPPA Firmware EAS, Appendix F. - * - * ACPI does not define a mechanism for AML methods to call native firmware - * interfaces such as PAL or SAL. This OpRegion handler adds such a mechanism. - * After the handler is installed, an AML method can call native firmware by - * storing the arguments and firmware entry point to specific offsets in the - * OpRegion. When AML reads the "return value" offset from the OpRegion, this - * handler loads up the arguments, makes the firmware call, and returns the - * result. - */ - -#include -#include -#include - -MODULE_AUTHOR("Bjorn Helgaas "); -MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION("ACPI opregion handler for native firmware calls"); - -static bool force_register; -module_param_named(force, force_register, bool, 0); -MODULE_PARM_DESC(force, "Install opregion handler even without HPQ5001 device"); - -#define AML_NFW_SPACE 0xA1 - -struct ia64_pdesc { - void *ip; - void *gp; -}; - -/* - * N.B. The layout of this structure is defined in the HP SPPA FW EAS, and - * the member offsets are embedded in AML methods. - */ -struct ia64_nfw_context { - u64 arg[8]; - struct ia64_sal_retval ret; - u64 ip; - u64 gp; - u64 pad[2]; -}; - -static void *virt_map(u64 address) -{ - if (address & (1UL << 63)) - return (void *) (__IA64_UNCACHED_OFFSET | address); - - return __va(address); -} - -static void aml_nfw_execute(struct ia64_nfw_context *c) -{ - struct ia64_pdesc virt_entry; - ia64_sal_handler entry; - - virt_entry.ip = virt_map(c->ip); - virt_entry.gp = virt_map(c->gp); - - entry = (ia64_sal_handler) &virt_entry; - - IA64_FW_CALL(entry, c->ret, - c->arg[0], c->arg[1], c->arg[2], c->arg[3], - c->arg[4], c->arg[5], c->arg[6], c->arg[7]); -} - -static void aml_nfw_read_arg(u8 *offset, u32 bit_width, u64 *value) -{ - switch (bit_width) { - case 8: - *value = *(u8 *)offset; - break; - case 16: - *value = *(u16 *)offset; - break; - case 32: - *value = *(u32 *)offset; - break; - case 64: - *value = *(u64 *)offset; - break; - } -} - -static void aml_nfw_write_arg(u8 *offset, u32 bit_width, u64 *value) -{ - switch (bit_width) { - case 8: - *(u8 *) offset = *value; - break; - case 16: - *(u16 *) offset = *value; - break; - case 32: - *(u32 *) offset = *value; - break; - case 64: - *(u64 *) offset = *value; - break; - } -} - -static acpi_status aml_nfw_handler(u32 function, acpi_physical_address address, - u32 bit_width, u64 *value, void *handler_context, - void *region_context) -{ - struct ia64_nfw_context *context = handler_context; - u8 *offset = (u8 *) context + address; - - if (bit_width != 8 && bit_width != 16 && - bit_width != 32 && bit_width != 64) - return AE_BAD_PARAMETER; - - if (address + (bit_width >> 3) > sizeof(struct ia64_nfw_context)) - return AE_BAD_PARAMETER; - - switch (function) { - case ACPI_READ: - if (address == offsetof(struct ia64_nfw_context, ret)) - aml_nfw_execute(context); - aml_nfw_read_arg(offset, bit_width, value); - break; - case ACPI_WRITE: - aml_nfw_write_arg(offset, bit_width, value); - break; - } - - return AE_OK; -} - -static struct ia64_nfw_context global_context; -static int global_handler_registered; - -static int aml_nfw_add_global_handler(void) -{ - acpi_status status; - - if (global_handler_registered) - return 0; - - status = acpi_install_address_space_handler(ACPI_ROOT_OBJECT, - AML_NFW_SPACE, aml_nfw_handler, NULL, &global_context); - if (ACPI_FAILURE(status)) - return -ENODEV; - - global_handler_registered = 1; - printk(KERN_INFO "Global 0x%02X opregion handler registered\n", - AML_NFW_SPACE); - return 0; -} - -static int aml_nfw_remove_global_handler(void) -{ - acpi_status status; - - if (!global_handler_registered) - return 0; - - status = acpi_remove_address_space_handler(ACPI_ROOT_OBJECT, - AML_NFW_SPACE, aml_nfw_handler); - if (ACPI_FAILURE(status)) - return -ENODEV; - - global_handler_registered = 0; - printk(KERN_INFO "Global 0x%02X opregion handler removed\n", - AML_NFW_SPACE); - return 0; -} - -static int aml_nfw_add(struct acpi_device *device) -{ - /* - * We would normally allocate a new context structure and install - * the address space handler for the specific device we found. - * But the HP-UX implementation shares a single global context - * and always puts the handler at the root, so we'll do the same. - */ - return aml_nfw_add_global_handler(); -} - -static void aml_nfw_remove(struct acpi_device *device) -{ - aml_nfw_remove_global_handler(); -} - -static const struct acpi_device_id aml_nfw_ids[] = { - {"HPQ5001", 0}, - {"", 0} -}; - -static struct acpi_driver acpi_aml_nfw_driver = { - .name = "native firmware", - .ids = aml_nfw_ids, - .ops = { - .add = aml_nfw_add, - .remove = aml_nfw_remove, - }, -}; - -static int __init aml_nfw_init(void) -{ - int result; - - if (force_register) - aml_nfw_add_global_handler(); - - result = acpi_bus_register_driver(&acpi_aml_nfw_driver); - if (result < 0) { - aml_nfw_remove_global_handler(); - return result; - } - - return 0; -} - -static void __exit aml_nfw_exit(void) -{ - acpi_bus_unregister_driver(&acpi_aml_nfw_driver); - aml_nfw_remove_global_handler(); -} - -module_init(aml_nfw_init); -module_exit(aml_nfw_exit); diff --git a/arch/ia64/hp/common/sba_iommu.c b/arch/ia64/hp/common/sba_iommu.c deleted file mode 100644 index c4d477e8bcd4..000000000000 --- a/arch/ia64/hp/common/sba_iommu.c +++ /dev/null @@ -1,2155 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* -** IA64 System Bus Adapter (SBA) I/O MMU manager -** -** (c) Copyright 2002-2005 Alex Williamson -** (c) Copyright 2002-2003 Grant Grundler -** (c) Copyright 2002-2005 Hewlett-Packard Company -** -** Portions (c) 2000 Grant Grundler (from parisc I/O MMU code) -** Portions (c) 1999 Dave S. Miller (from sparc64 I/O MMU code) -** -** -** -** This module initializes the IOC (I/O Controller) found on HP -** McKinley machines and their successors. -** -*/ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include /* hweight64() */ -#include -#include -#include -#include -#include - -#include /* ia64_get_itc() */ -#include -#include /* PAGE_OFFSET */ -#include - -#include - -#define PFX "IOC: " - -/* -** Enabling timing search of the pdir resource map. Output in /proc. -** Disabled by default to optimize performance. -*/ -#undef PDIR_SEARCH_TIMING - -/* -** This option allows cards capable of 64bit DMA to bypass the IOMMU. If -** not defined, all DMA will be 32bit and go through the TLB. -** There's potentially a conflict in the bio merge code with us -** advertising an iommu, but then bypassing it. Since I/O MMU bypassing -** appears to give more performance than bio-level virtual merging, we'll -** do the former for now. NOTE: BYPASS_SG also needs to be undef'd to -** completely restrict DMA to the IOMMU. -*/ -#define ALLOW_IOV_BYPASS - -/* -** This option specifically allows/disallows bypassing scatterlists with -** multiple entries. Coalescing these entries can allow better DMA streaming -** and in some cases shows better performance than entirely bypassing the -** IOMMU. Performance increase on the order of 1-2% sequential output/input -** using bonnie++ on a RAID0 MD device (sym2 & mpt). -*/ -#undef ALLOW_IOV_BYPASS_SG - -/* -** If a device prefetches beyond the end of a valid pdir entry, it will cause -** a hard failure, ie. MCA. Version 3.0 and later of the zx1 LBA should -** disconnect on 4k boundaries and prevent such issues. If the device is -** particularly aggressive, this option will keep the entire pdir valid such -** that prefetching will hit a valid address. This could severely impact -** error containment, and is therefore off by default. The page that is -** used for spill-over is poisoned, so that should help debugging somewhat. -*/ -#undef FULL_VALID_PDIR - -#define ENABLE_MARK_CLEAN - -/* -** The number of debug flags is a clue - this code is fragile. NOTE: since -** tightening the use of res_lock the resource bitmap and actual pdir are no -** longer guaranteed to stay in sync. The sanity checking code isn't going to -** like that. -*/ -#undef DEBUG_SBA_INIT -#undef DEBUG_SBA_RUN -#undef DEBUG_SBA_RUN_SG -#undef DEBUG_SBA_RESOURCE -#undef ASSERT_PDIR_SANITY -#undef DEBUG_LARGE_SG_ENTRIES -#undef DEBUG_BYPASS - -#if defined(FULL_VALID_PDIR) && defined(ASSERT_PDIR_SANITY) -#error FULL_VALID_PDIR and ASSERT_PDIR_SANITY are mutually exclusive -#endif - -#define SBA_INLINE __inline__ -/* #define SBA_INLINE */ - -#ifdef DEBUG_SBA_INIT -#define DBG_INIT(x...) printk(x) -#else -#define DBG_INIT(x...) -#endif - -#ifdef DEBUG_SBA_RUN -#define DBG_RUN(x...) printk(x) -#else -#define DBG_RUN(x...) -#endif - -#ifdef DEBUG_SBA_RUN_SG -#define DBG_RUN_SG(x...) printk(x) -#else -#define DBG_RUN_SG(x...) -#endif - - -#ifdef DEBUG_SBA_RESOURCE -#define DBG_RES(x...) printk(x) -#else -#define DBG_RES(x...) -#endif - -#ifdef DEBUG_BYPASS -#define DBG_BYPASS(x...) printk(x) -#else -#define DBG_BYPASS(x...) -#endif - -#ifdef ASSERT_PDIR_SANITY -#define ASSERT(expr) \ - if(!(expr)) { \ - printk( "\n" __FILE__ ":%d: Assertion " #expr " failed!\n",__LINE__); \ - panic(#expr); \ - } -#else -#define ASSERT(expr) -#endif - -/* -** The number of pdir entries to "free" before issuing -** a read to PCOM register to flush out PCOM writes. -** Interacts with allocation granularity (ie 4 or 8 entries -** allocated and free'd/purged at a time might make this -** less interesting). -*/ -#define DELAYED_RESOURCE_CNT 64 - -#define PCI_DEVICE_ID_HP_SX2000_IOC 0x12ec - -#define ZX1_IOC_ID ((PCI_DEVICE_ID_HP_ZX1_IOC << 16) | PCI_VENDOR_ID_HP) -#define ZX2_IOC_ID ((PCI_DEVICE_ID_HP_ZX2_IOC << 16) | PCI_VENDOR_ID_HP) -#define REO_IOC_ID ((PCI_DEVICE_ID_HP_REO_IOC << 16) | PCI_VENDOR_ID_HP) -#define SX1000_IOC_ID ((PCI_DEVICE_ID_HP_SX1000_IOC << 16) | PCI_VENDOR_ID_HP) -#define SX2000_IOC_ID ((PCI_DEVICE_ID_HP_SX2000_IOC << 16) | PCI_VENDOR_ID_HP) - -#define ZX1_IOC_OFFSET 0x1000 /* ACPI reports SBA, we want IOC */ - -#define IOC_FUNC_ID 0x000 -#define IOC_FCLASS 0x008 /* function class, bist, header, rev... */ -#define IOC_IBASE 0x300 /* IO TLB */ -#define IOC_IMASK 0x308 -#define IOC_PCOM 0x310 -#define IOC_TCNFG 0x318 -#define IOC_PDIR_BASE 0x320 - -#define IOC_ROPE0_CFG 0x500 -#define IOC_ROPE_AO 0x10 /* Allow "Relaxed Ordering" */ - - -/* AGP GART driver looks for this */ -#define ZX1_SBA_IOMMU_COOKIE 0x0000badbadc0ffeeUL - -/* -** The zx1 IOC supports 4/8/16/64KB page sizes (see TCNFG register) -** -** Some IOCs (sx1000) can run at the above pages sizes, but are -** really only supported using the IOC at a 4k page size. -** -** iovp_size could only be greater than PAGE_SIZE if we are -** confident the drivers really only touch the next physical -** page iff that driver instance owns it. -*/ -static unsigned long iovp_size; -static unsigned long iovp_shift; -static unsigned long iovp_mask; - -struct ioc { - void __iomem *ioc_hpa; /* I/O MMU base address */ - char *res_map; /* resource map, bit == pdir entry */ - u64 *pdir_base; /* physical base address */ - unsigned long ibase; /* pdir IOV Space base */ - unsigned long imask; /* pdir IOV Space mask */ - - unsigned long *res_hint; /* next avail IOVP - circular search */ - unsigned long dma_mask; - spinlock_t res_lock; /* protects the resource bitmap, but must be held when */ - /* clearing pdir to prevent races with allocations. */ - unsigned int res_bitshift; /* from the RIGHT! */ - unsigned int res_size; /* size of resource map in bytes */ -#ifdef CONFIG_NUMA - unsigned int node; /* node where this IOC lives */ -#endif -#if DELAYED_RESOURCE_CNT > 0 - spinlock_t saved_lock; /* may want to try to get this on a separate cacheline */ - /* than res_lock for bigger systems. */ - int saved_cnt; - struct sba_dma_pair { - dma_addr_t iova; - size_t size; - } saved[DELAYED_RESOURCE_CNT]; -#endif - -#ifdef PDIR_SEARCH_TIMING -#define SBA_SEARCH_SAMPLE 0x100 - unsigned long avg_search[SBA_SEARCH_SAMPLE]; - unsigned long avg_idx; /* current index into avg_search */ -#endif - - /* Stuff we don't need in performance path */ - struct ioc *next; /* list of IOC's in system */ - acpi_handle handle; /* for multiple IOC's */ - const char *name; - unsigned int func_id; - unsigned int rev; /* HW revision of chip */ - u32 iov_size; - unsigned int pdir_size; /* in bytes, determined by IOV Space size */ - struct pci_dev *sac_only_dev; -}; - -static struct ioc *ioc_list, *ioc_found; -static int reserve_sba_gart = 1; - -static SBA_INLINE void sba_mark_invalid(struct ioc *, dma_addr_t, size_t); -static SBA_INLINE void sba_free_range(struct ioc *, dma_addr_t, size_t); - -#define sba_sg_address(sg) sg_virt((sg)) - -#ifdef FULL_VALID_PDIR -static u64 prefetch_spill_page; -#endif - -#define GET_IOC(dev) ((dev_is_pci(dev)) \ - ? ((struct ioc *) PCI_CONTROLLER(to_pci_dev(dev))->iommu) : NULL) - -/* -** DMA_CHUNK_SIZE is used by the SCSI mid-layer to break up -** (or rather not merge) DMAs into manageable chunks. -** On parisc, this is more of the software/tuning constraint -** rather than the HW. I/O MMU allocation algorithms can be -** faster with smaller sizes (to some degree). -*/ -#define DMA_CHUNK_SIZE (BITS_PER_LONG*iovp_size) - -#define ROUNDUP(x,y) ((x + ((y)-1)) & ~((y)-1)) - -/************************************ -** SBA register read and write support -** -** BE WARNED: register writes are posted. -** (ie follow writes which must reach HW with a read) -** -*/ -#define READ_REG(addr) __raw_readq(addr) -#define WRITE_REG(val, addr) __raw_writeq(val, addr) - -#ifdef DEBUG_SBA_INIT - -/** - * sba_dump_tlb - debugging only - print IOMMU operating parameters - * @hpa: base address of the IOMMU - * - * Print the size/location of the IO MMU PDIR. - */ -static void -sba_dump_tlb(char *hpa) -{ - DBG_INIT("IO TLB at 0x%p\n", (void *)hpa); - DBG_INIT("IOC_IBASE : %016lx\n", READ_REG(hpa+IOC_IBASE)); - DBG_INIT("IOC_IMASK : %016lx\n", READ_REG(hpa+IOC_IMASK)); - DBG_INIT("IOC_TCNFG : %016lx\n", READ_REG(hpa+IOC_TCNFG)); - DBG_INIT("IOC_PDIR_BASE: %016lx\n", READ_REG(hpa+IOC_PDIR_BASE)); - DBG_INIT("\n"); -} -#endif - - -#ifdef ASSERT_PDIR_SANITY - -/** - * sba_dump_pdir_entry - debugging only - print one IOMMU PDIR entry - * @ioc: IO MMU structure which owns the pdir we are interested in. - * @msg: text to print ont the output line. - * @pide: pdir index. - * - * Print one entry of the IO MMU PDIR in human readable form. - */ -static void -sba_dump_pdir_entry(struct ioc *ioc, char *msg, uint pide) -{ - /* start printing from lowest pde in rval */ - u64 *ptr = &ioc->pdir_base[pide & ~(BITS_PER_LONG - 1)]; - unsigned long *rptr = (unsigned long *) &ioc->res_map[(pide >>3) & -sizeof(unsigned long)]; - uint rcnt; - - printk(KERN_DEBUG "SBA: %s rp %p bit %d rval 0x%lx\n", - msg, rptr, pide & (BITS_PER_LONG - 1), *rptr); - - rcnt = 0; - while (rcnt < BITS_PER_LONG) { - printk(KERN_DEBUG "%s %2d %p %016Lx\n", - (rcnt == (pide & (BITS_PER_LONG - 1))) - ? " -->" : " ", - rcnt, ptr, (unsigned long long) *ptr ); - rcnt++; - ptr++; - } - printk(KERN_DEBUG "%s", msg); -} - - -/** - * sba_check_pdir - debugging only - consistency checker - * @ioc: IO MMU structure which owns the pdir we are interested in. - * @msg: text to print ont the output line. - * - * Verify the resource map and pdir state is consistent - */ -static int -sba_check_pdir(struct ioc *ioc, char *msg) -{ - u64 *rptr_end = (u64 *) &(ioc->res_map[ioc->res_size]); - u64 *rptr = (u64 *) ioc->res_map; /* resource map ptr */ - u64 *pptr = ioc->pdir_base; /* pdir ptr */ - uint pide = 0; - - while (rptr < rptr_end) { - u64 rval; - int rcnt; /* number of bits we might check */ - - rval = *rptr; - rcnt = 64; - - while (rcnt) { - /* Get last byte and highest bit from that */ - u32 pde = ((u32)((*pptr >> (63)) & 0x1)); - if ((rval & 0x1) ^ pde) - { - /* - ** BUMMER! -- res_map != pdir -- - ** Dump rval and matching pdir entries - */ - sba_dump_pdir_entry(ioc, msg, pide); - return(1); - } - rcnt--; - rval >>= 1; /* try the next bit */ - pptr++; - pide++; - } - rptr++; /* look at next word of res_map */ - } - /* It'd be nice if we always got here :^) */ - return 0; -} - - -/** - * sba_dump_sg - debugging only - print Scatter-Gather list - * @ioc: IO MMU structure which owns the pdir we are interested in. - * @startsg: head of the SG list - * @nents: number of entries in SG list - * - * print the SG list so we can verify it's correct by hand. - */ -static void -sba_dump_sg( struct ioc *ioc, struct scatterlist *startsg, int nents) -{ - while (nents-- > 0) { - printk(KERN_DEBUG " %d : DMA %08lx/%05x CPU %p\n", nents, - startsg->dma_address, startsg->dma_length, - sba_sg_address(startsg)); - startsg = sg_next(startsg); - } -} - -static void -sba_check_sg( struct ioc *ioc, struct scatterlist *startsg, int nents) -{ - struct scatterlist *the_sg = startsg; - int the_nents = nents; - - while (the_nents-- > 0) { - if (sba_sg_address(the_sg) == 0x0UL) - sba_dump_sg(NULL, startsg, nents); - the_sg = sg_next(the_sg); - } -} - -#endif /* ASSERT_PDIR_SANITY */ - - - - -/************************************************************** -* -* I/O Pdir Resource Management -* -* Bits set in the resource map are in use. -* Each bit can represent a number of pages. -* LSbs represent lower addresses (IOVA's). -* -***************************************************************/ -#define PAGES_PER_RANGE 1 /* could increase this to 4 or 8 if needed */ - -/* Convert from IOVP to IOVA and vice versa. */ -#define SBA_IOVA(ioc,iovp,offset) ((ioc->ibase) | (iovp) | (offset)) -#define SBA_IOVP(ioc,iova) ((iova) & ~(ioc->ibase)) - -#define PDIR_ENTRY_SIZE sizeof(u64) - -#define PDIR_INDEX(iovp) ((iovp)>>iovp_shift) - -#define RESMAP_MASK(n) ~(~0UL << (n)) -#define RESMAP_IDX_MASK (sizeof(unsigned long) - 1) - - -/** - * For most cases the normal get_order is sufficient, however it limits us - * to PAGE_SIZE being the minimum mapping alignment and TC flush granularity. - * It only incurs about 1 clock cycle to use this one with the static variable - * and makes the code more intuitive. - */ -static SBA_INLINE int -get_iovp_order (unsigned long size) -{ - long double d = size - 1; - long order; - - order = ia64_getf_exp(d); - order = order - iovp_shift - 0xffff + 1; - if (order < 0) - order = 0; - return order; -} - -static unsigned long ptr_to_pide(struct ioc *ioc, unsigned long *res_ptr, - unsigned int bitshiftcnt) -{ - return (((unsigned long)res_ptr - (unsigned long)ioc->res_map) << 3) - + bitshiftcnt; -} - -/** - * sba_search_bitmap - find free space in IO PDIR resource bitmap - * @ioc: IO MMU structure which owns the pdir we are interested in. - * @bits_wanted: number of entries we need. - * @use_hint: use res_hint to indicate where to start looking - * - * Find consecutive free bits in resource bitmap. - * Each bit represents one entry in the IO Pdir. - * Cool perf optimization: search for log2(size) bits at a time. - */ -static SBA_INLINE unsigned long -sba_search_bitmap(struct ioc *ioc, struct device *dev, - unsigned long bits_wanted, int use_hint) -{ - unsigned long *res_ptr; - unsigned long *res_end = (unsigned long *) &(ioc->res_map[ioc->res_size]); - unsigned long flags, pide = ~0UL, tpide; - unsigned long boundary_size; - unsigned long shift; - int ret; - - ASSERT(((unsigned long) ioc->res_hint & (sizeof(unsigned long) - 1UL)) == 0); - ASSERT(res_ptr < res_end); - - boundary_size = dma_get_seg_boundary_nr_pages(dev, iovp_shift); - - BUG_ON(ioc->ibase & ~iovp_mask); - shift = ioc->ibase >> iovp_shift; - - spin_lock_irqsave(&ioc->res_lock, flags); - - /* Allow caller to force a search through the entire resource space */ - if (likely(use_hint)) { - res_ptr = ioc->res_hint; - } else { - res_ptr = (ulong *)ioc->res_map; - ioc->res_bitshift = 0; - } - - /* - * N.B. REO/Grande defect AR2305 can cause TLB fetch timeouts - * if a TLB entry is purged while in use. sba_mark_invalid() - * purges IOTLB entries in power-of-two sizes, so we also - * allocate IOVA space in power-of-two sizes. - */ - bits_wanted = 1UL << get_iovp_order(bits_wanted << iovp_shift); - - if (likely(bits_wanted == 1)) { - unsigned int bitshiftcnt; - for(; res_ptr < res_end ; res_ptr++) { - if (likely(*res_ptr != ~0UL)) { - bitshiftcnt = ffz(*res_ptr); - *res_ptr |= (1UL << bitshiftcnt); - pide = ptr_to_pide(ioc, res_ptr, bitshiftcnt); - ioc->res_bitshift = bitshiftcnt + bits_wanted; - goto found_it; - } - } - goto not_found; - - } - - if (likely(bits_wanted <= BITS_PER_LONG/2)) { - /* - ** Search the resource bit map on well-aligned values. - ** "o" is the alignment. - ** We need the alignment to invalidate I/O TLB using - ** SBA HW features in the unmap path. - */ - unsigned long o = 1 << get_iovp_order(bits_wanted << iovp_shift); - uint bitshiftcnt = ROUNDUP(ioc->res_bitshift, o); - unsigned long mask, base_mask; - - base_mask = RESMAP_MASK(bits_wanted); - mask = base_mask << bitshiftcnt; - - DBG_RES("%s() o %ld %p", __func__, o, res_ptr); - for(; res_ptr < res_end ; res_ptr++) - { - DBG_RES(" %p %lx %lx\n", res_ptr, mask, *res_ptr); - ASSERT(0 != mask); - for (; mask ; mask <<= o, bitshiftcnt += o) { - tpide = ptr_to_pide(ioc, res_ptr, bitshiftcnt); - ret = iommu_is_span_boundary(tpide, bits_wanted, - shift, - boundary_size); - if ((0 == ((*res_ptr) & mask)) && !ret) { - *res_ptr |= mask; /* mark resources busy! */ - pide = tpide; - ioc->res_bitshift = bitshiftcnt + bits_wanted; - goto found_it; - } - } - - bitshiftcnt = 0; - mask = base_mask; - - } - - } else { - int qwords, bits, i; - unsigned long *end; - - qwords = bits_wanted >> 6; /* /64 */ - bits = bits_wanted - (qwords * BITS_PER_LONG); - - end = res_end - qwords; - - for (; res_ptr < end; res_ptr++) { - tpide = ptr_to_pide(ioc, res_ptr, 0); - ret = iommu_is_span_boundary(tpide, bits_wanted, - shift, boundary_size); - if (ret) - goto next_ptr; - for (i = 0 ; i < qwords ; i++) { - if (res_ptr[i] != 0) - goto next_ptr; - } - if (bits && res_ptr[i] && (__ffs(res_ptr[i]) < bits)) - continue; - - /* Found it, mark it */ - for (i = 0 ; i < qwords ; i++) - res_ptr[i] = ~0UL; - res_ptr[i] |= RESMAP_MASK(bits); - - pide = tpide; - res_ptr += qwords; - ioc->res_bitshift = bits; - goto found_it; -next_ptr: - ; - } - } - -not_found: - prefetch(ioc->res_map); - ioc->res_hint = (unsigned long *) ioc->res_map; - ioc->res_bitshift = 0; - spin_unlock_irqrestore(&ioc->res_lock, flags); - return (pide); - -found_it: - ioc->res_hint = res_ptr; - spin_unlock_irqrestore(&ioc->res_lock, flags); - return (pide); -} - - -/** - * sba_alloc_range - find free bits and mark them in IO PDIR resource bitmap - * @ioc: IO MMU structure which owns the pdir we are interested in. - * @size: number of bytes to create a mapping for - * - * Given a size, find consecutive unmarked and then mark those bits in the - * resource bit map. - */ -static int -sba_alloc_range(struct ioc *ioc, struct device *dev, size_t size) -{ - unsigned int pages_needed = size >> iovp_shift; -#ifdef PDIR_SEARCH_TIMING - unsigned long itc_start; -#endif - unsigned long pide; - - ASSERT(pages_needed); - ASSERT(0 == (size & ~iovp_mask)); - -#ifdef PDIR_SEARCH_TIMING - itc_start = ia64_get_itc(); -#endif - /* - ** "seek and ye shall find"...praying never hurts either... - */ - pide = sba_search_bitmap(ioc, dev, pages_needed, 1); - if (unlikely(pide >= (ioc->res_size << 3))) { - pide = sba_search_bitmap(ioc, dev, pages_needed, 0); - if (unlikely(pide >= (ioc->res_size << 3))) { -#if DELAYED_RESOURCE_CNT > 0 - unsigned long flags; - - /* - ** With delayed resource freeing, we can give this one more shot. We're - ** getting close to being in trouble here, so do what we can to make this - ** one count. - */ - spin_lock_irqsave(&ioc->saved_lock, flags); - if (ioc->saved_cnt > 0) { - struct sba_dma_pair *d; - int cnt = ioc->saved_cnt; - - d = &(ioc->saved[ioc->saved_cnt - 1]); - - spin_lock(&ioc->res_lock); - while (cnt--) { - sba_mark_invalid(ioc, d->iova, d->size); - sba_free_range(ioc, d->iova, d->size); - d--; - } - ioc->saved_cnt = 0; - READ_REG(ioc->ioc_hpa+IOC_PCOM); /* flush purges */ - spin_unlock(&ioc->res_lock); - } - spin_unlock_irqrestore(&ioc->saved_lock, flags); - - pide = sba_search_bitmap(ioc, dev, pages_needed, 0); - if (unlikely(pide >= (ioc->res_size << 3))) { - printk(KERN_WARNING "%s: I/O MMU @ %p is" - "out of mapping resources, %u %u %lx\n", - __func__, ioc->ioc_hpa, ioc->res_size, - pages_needed, dma_get_seg_boundary(dev)); - return -1; - } -#else - printk(KERN_WARNING "%s: I/O MMU @ %p is" - "out of mapping resources, %u %u %lx\n", - __func__, ioc->ioc_hpa, ioc->res_size, - pages_needed, dma_get_seg_boundary(dev)); - return -1; -#endif - } - } - -#ifdef PDIR_SEARCH_TIMING - ioc->avg_search[ioc->avg_idx++] = (ia64_get_itc() - itc_start) / pages_needed; - ioc->avg_idx &= SBA_SEARCH_SAMPLE - 1; -#endif - - prefetchw(&(ioc->pdir_base[pide])); - -#ifdef ASSERT_PDIR_SANITY - /* verify the first enable bit is clear */ - if(0x00 != ((u8 *) ioc->pdir_base)[pide*PDIR_ENTRY_SIZE + 7]) { - sba_dump_pdir_entry(ioc, "sba_search_bitmap() botched it?", pide); - } -#endif - - DBG_RES("%s(%x) %d -> %lx hint %x/%x\n", - __func__, size, pages_needed, pide, - (uint) ((unsigned long) ioc->res_hint - (unsigned long) ioc->res_map), - ioc->res_bitshift ); - - return (pide); -} - - -/** - * sba_free_range - unmark bits in IO PDIR resource bitmap - * @ioc: IO MMU structure which owns the pdir we are interested in. - * @iova: IO virtual address which was previously allocated. - * @size: number of bytes to create a mapping for - * - * clear bits in the ioc's resource map - */ -static SBA_INLINE void -sba_free_range(struct ioc *ioc, dma_addr_t iova, size_t size) -{ - unsigned long iovp = SBA_IOVP(ioc, iova); - unsigned int pide = PDIR_INDEX(iovp); - unsigned int ridx = pide >> 3; /* convert bit to byte address */ - unsigned long *res_ptr = (unsigned long *) &((ioc)->res_map[ridx & ~RESMAP_IDX_MASK]); - int bits_not_wanted = size >> iovp_shift; - unsigned long m; - - /* Round up to power-of-two size: see AR2305 note above */ - bits_not_wanted = 1UL << get_iovp_order(bits_not_wanted << iovp_shift); - for (; bits_not_wanted > 0 ; res_ptr++) { - - if (unlikely(bits_not_wanted > BITS_PER_LONG)) { - - /* these mappings start 64bit aligned */ - *res_ptr = 0UL; - bits_not_wanted -= BITS_PER_LONG; - pide += BITS_PER_LONG; - - } else { - - /* 3-bits "bit" address plus 2 (or 3) bits for "byte" == bit in word */ - m = RESMAP_MASK(bits_not_wanted) << (pide & (BITS_PER_LONG - 1)); - bits_not_wanted = 0; - - DBG_RES("%s( ,%x,%x) %x/%lx %x %p %lx\n", __func__, (uint) iova, size, - bits_not_wanted, m, pide, res_ptr, *res_ptr); - - ASSERT(m != 0); - ASSERT(bits_not_wanted); - ASSERT((*res_ptr & m) == m); /* verify same bits are set */ - *res_ptr &= ~m; - } - } -} - - -/************************************************************** -* -* "Dynamic DMA Mapping" support (aka "Coherent I/O") -* -***************************************************************/ - -/** - * sba_io_pdir_entry - fill in one IO PDIR entry - * @pdir_ptr: pointer to IO PDIR entry - * @vba: Virtual CPU address of buffer to map - * - * SBA Mapping Routine - * - * Given a virtual address (vba, arg1) sba_io_pdir_entry() - * loads the I/O PDIR entry pointed to by pdir_ptr (arg0). - * Each IO Pdir entry consists of 8 bytes as shown below - * (LSB == bit 0): - * - * 63 40 11 7 0 - * +-+---------------------+----------------------------------+----+--------+ - * |V| U | PPN[39:12] | U | FF | - * +-+---------------------+----------------------------------+----+--------+ - * - * V == Valid Bit - * U == Unused - * PPN == Physical Page Number - * - * The physical address fields are filled with the results of virt_to_phys() - * on the vba. - */ - -#if 1 -#define sba_io_pdir_entry(pdir_ptr, vba) *pdir_ptr = ((vba & ~0xE000000000000FFFULL) \ - | 0x8000000000000000ULL) -#else -void SBA_INLINE -sba_io_pdir_entry(u64 *pdir_ptr, unsigned long vba) -{ - *pdir_ptr = ((vba & ~0xE000000000000FFFULL) | 0x80000000000000FFULL); -} -#endif - -#ifdef ENABLE_MARK_CLEAN -/* - * Since DMA is i-cache coherent, any (complete) pages that were written via - * DMA can be marked as "clean" so that lazy_mmu_prot_update() doesn't have to - * flush them when they get mapped into an executable vm-area. - */ -static void mark_clean(void *addr, size_t size) -{ - struct folio *folio = virt_to_folio(addr); - ssize_t left = size; - size_t offset = offset_in_folio(folio, addr); - - if (offset) { - left -= folio_size(folio) - offset; - if (left <= 0) - return; - folio = folio_next(folio); - } - - while (left >= folio_size(folio)) { - left -= folio_size(folio); - set_bit(PG_arch_1, &folio->flags); - if (!left) - break; - folio = folio_next(folio); - } -} -#endif - -/** - * sba_mark_invalid - invalidate one or more IO PDIR entries - * @ioc: IO MMU structure which owns the pdir we are interested in. - * @iova: IO Virtual Address mapped earlier - * @byte_cnt: number of bytes this mapping covers. - * - * Marking the IO PDIR entry(ies) as Invalid and invalidate - * corresponding IO TLB entry. The PCOM (Purge Command Register) - * is to purge stale entries in the IO TLB when unmapping entries. - * - * The PCOM register supports purging of multiple pages, with a minium - * of 1 page and a maximum of 2GB. Hardware requires the address be - * aligned to the size of the range being purged. The size of the range - * must be a power of 2. The "Cool perf optimization" in the - * allocation routine helps keep that true. - */ -static SBA_INLINE void -sba_mark_invalid(struct ioc *ioc, dma_addr_t iova, size_t byte_cnt) -{ - u32 iovp = (u32) SBA_IOVP(ioc,iova); - - int off = PDIR_INDEX(iovp); - - /* Must be non-zero and rounded up */ - ASSERT(byte_cnt > 0); - ASSERT(0 == (byte_cnt & ~iovp_mask)); - -#ifdef ASSERT_PDIR_SANITY - /* Assert first pdir entry is set */ - if (!(ioc->pdir_base[off] >> 60)) { - sba_dump_pdir_entry(ioc,"sba_mark_invalid()", PDIR_INDEX(iovp)); - } -#endif - - if (byte_cnt <= iovp_size) - { - ASSERT(off < ioc->pdir_size); - - iovp |= iovp_shift; /* set "size" field for PCOM */ - -#ifndef FULL_VALID_PDIR - /* - ** clear I/O PDIR entry "valid" bit - ** Do NOT clear the rest - save it for debugging. - ** We should only clear bits that have previously - ** been enabled. - */ - ioc->pdir_base[off] &= ~(0x80000000000000FFULL); -#else - /* - ** If we want to maintain the PDIR as valid, put in - ** the spill page so devices prefetching won't - ** cause a hard fail. - */ - ioc->pdir_base[off] = (0x80000000000000FFULL | prefetch_spill_page); -#endif - } else { - u32 t = get_iovp_order(byte_cnt) + iovp_shift; - - iovp |= t; - ASSERT(t <= 31); /* 2GB! Max value of "size" field */ - - do { - /* verify this pdir entry is enabled */ - ASSERT(ioc->pdir_base[off] >> 63); -#ifndef FULL_VALID_PDIR - /* clear I/O Pdir entry "valid" bit first */ - ioc->pdir_base[off] &= ~(0x80000000000000FFULL); -#else - ioc->pdir_base[off] = (0x80000000000000FFULL | prefetch_spill_page); -#endif - off++; - byte_cnt -= iovp_size; - } while (byte_cnt > 0); - } - - WRITE_REG(iovp | ioc->ibase, ioc->ioc_hpa+IOC_PCOM); -} - -/** - * sba_map_page - map one buffer and return IOVA for DMA - * @dev: instance of PCI owned by the driver that's asking. - * @page: page to map - * @poff: offset into page - * @size: number of bytes to map - * @dir: dma direction - * @attrs: optional dma attributes - * - * See Documentation/core-api/dma-api-howto.rst - */ -static dma_addr_t sba_map_page(struct device *dev, struct page *page, - unsigned long poff, size_t size, - enum dma_data_direction dir, - unsigned long attrs) -{ - struct ioc *ioc; - void *addr = page_address(page) + poff; - dma_addr_t iovp; - dma_addr_t offset; - u64 *pdir_start; - int pide; -#ifdef ASSERT_PDIR_SANITY - unsigned long flags; -#endif -#ifdef ALLOW_IOV_BYPASS - unsigned long pci_addr = virt_to_phys(addr); -#endif - -#ifdef ALLOW_IOV_BYPASS - ASSERT(to_pci_dev(dev)->dma_mask); - /* - ** Check if the PCI device can DMA to ptr... if so, just return ptr - */ - if (likely((pci_addr & ~to_pci_dev(dev)->dma_mask) == 0)) { - /* - ** Device is bit capable of DMA'ing to the buffer... - ** just return the PCI address of ptr - */ - DBG_BYPASS("sba_map_page() bypass mask/addr: " - "0x%lx/0x%lx\n", - to_pci_dev(dev)->dma_mask, pci_addr); - return pci_addr; - } -#endif - ioc = GET_IOC(dev); - ASSERT(ioc); - - prefetch(ioc->res_hint); - - ASSERT(size > 0); - ASSERT(size <= DMA_CHUNK_SIZE); - - /* save offset bits */ - offset = ((dma_addr_t) (long) addr) & ~iovp_mask; - - /* round up to nearest iovp_size */ - size = (size + offset + ~iovp_mask) & iovp_mask; - -#ifdef ASSERT_PDIR_SANITY - spin_lock_irqsave(&ioc->res_lock, flags); - if (sba_check_pdir(ioc,"Check before sba_map_page()")) - panic("Sanity check failed"); - spin_unlock_irqrestore(&ioc->res_lock, flags); -#endif - - pide = sba_alloc_range(ioc, dev, size); - if (pide < 0) - return DMA_MAPPING_ERROR; - - iovp = (dma_addr_t) pide << iovp_shift; - - DBG_RUN("%s() 0x%p -> 0x%lx\n", __func__, addr, (long) iovp | offset); - - pdir_start = &(ioc->pdir_base[pide]); - - while (size > 0) { - ASSERT(((u8 *)pdir_start)[7] == 0); /* verify availability */ - sba_io_pdir_entry(pdir_start, (unsigned long) addr); - - DBG_RUN(" pdir 0x%p %lx\n", pdir_start, *pdir_start); - - addr += iovp_size; - size -= iovp_size; - pdir_start++; - } - /* force pdir update */ - wmb(); - - /* form complete address */ -#ifdef ASSERT_PDIR_SANITY - spin_lock_irqsave(&ioc->res_lock, flags); - sba_check_pdir(ioc,"Check after sba_map_page()"); - spin_unlock_irqrestore(&ioc->res_lock, flags); -#endif - return SBA_IOVA(ioc, iovp, offset); -} - -#ifdef ENABLE_MARK_CLEAN -static SBA_INLINE void -sba_mark_clean(struct ioc *ioc, dma_addr_t iova, size_t size) -{ - u32 iovp = (u32) SBA_IOVP(ioc,iova); - int off = PDIR_INDEX(iovp); - void *addr; - - if (size <= iovp_size) { - addr = phys_to_virt(ioc->pdir_base[off] & - ~0xE000000000000FFFULL); - mark_clean(addr, size); - } else { - do { - addr = phys_to_virt(ioc->pdir_base[off] & - ~0xE000000000000FFFULL); - mark_clean(addr, min(size, iovp_size)); - off++; - size -= iovp_size; - } while (size > 0); - } -} -#endif - -/** - * sba_unmap_page - unmap one IOVA and free resources - * @dev: instance of PCI owned by the driver that's asking. - * @iova: IOVA of driver buffer previously mapped. - * @size: number of bytes mapped in driver buffer. - * @dir: R/W or both. - * @attrs: optional dma attributes - * - * See Documentation/core-api/dma-api-howto.rst - */ -static void sba_unmap_page(struct device *dev, dma_addr_t iova, size_t size, - enum dma_data_direction dir, unsigned long attrs) -{ - struct ioc *ioc; -#if DELAYED_RESOURCE_CNT > 0 - struct sba_dma_pair *d; -#endif - unsigned long flags; - dma_addr_t offset; - - ioc = GET_IOC(dev); - ASSERT(ioc); - -#ifdef ALLOW_IOV_BYPASS - if (likely((iova & ioc->imask) != ioc->ibase)) { - /* - ** Address does not fall w/in IOVA, must be bypassing - */ - DBG_BYPASS("sba_unmap_page() bypass addr: 0x%lx\n", - iova); - -#ifdef ENABLE_MARK_CLEAN - if (dir == DMA_FROM_DEVICE) { - mark_clean(phys_to_virt(iova), size); - } -#endif - return; - } -#endif - offset = iova & ~iovp_mask; - - DBG_RUN("%s() iovp 0x%lx/%x\n", __func__, (long) iova, size); - - iova ^= offset; /* clear offset bits */ - size += offset; - size = ROUNDUP(size, iovp_size); - -#ifdef ENABLE_MARK_CLEAN - if (dir == DMA_FROM_DEVICE) - sba_mark_clean(ioc, iova, size); -#endif - -#if DELAYED_RESOURCE_CNT > 0 - spin_lock_irqsave(&ioc->saved_lock, flags); - d = &(ioc->saved[ioc->saved_cnt]); - d->iova = iova; - d->size = size; - if (unlikely(++(ioc->saved_cnt) >= DELAYED_RESOURCE_CNT)) { - int cnt = ioc->saved_cnt; - spin_lock(&ioc->res_lock); - while (cnt--) { - sba_mark_invalid(ioc, d->iova, d->size); - sba_free_range(ioc, d->iova, d->size); - d--; - } - ioc->saved_cnt = 0; - READ_REG(ioc->ioc_hpa+IOC_PCOM); /* flush purges */ - spin_unlock(&ioc->res_lock); - } - spin_unlock_irqrestore(&ioc->saved_lock, flags); -#else /* DELAYED_RESOURCE_CNT == 0 */ - spin_lock_irqsave(&ioc->res_lock, flags); - sba_mark_invalid(ioc, iova, size); - sba_free_range(ioc, iova, size); - READ_REG(ioc->ioc_hpa+IOC_PCOM); /* flush purges */ - spin_unlock_irqrestore(&ioc->res_lock, flags); -#endif /* DELAYED_RESOURCE_CNT == 0 */ -} - -/** - * sba_alloc_coherent - allocate/map shared mem for DMA - * @dev: instance of PCI owned by the driver that's asking. - * @size: number of bytes mapped in driver buffer. - * @dma_handle: IOVA of new buffer. - * - * See Documentation/core-api/dma-api-howto.rst - */ -static void * -sba_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, - gfp_t flags, unsigned long attrs) -{ - struct page *page; - struct ioc *ioc; - int node = -1; - void *addr; - - ioc = GET_IOC(dev); - ASSERT(ioc); -#ifdef CONFIG_NUMA - node = ioc->node; -#endif - - page = alloc_pages_node(node, flags, get_order(size)); - if (unlikely(!page)) - return NULL; - - addr = page_address(page); - memset(addr, 0, size); - *dma_handle = page_to_phys(page); - -#ifdef ALLOW_IOV_BYPASS - ASSERT(dev->coherent_dma_mask); - /* - ** Check if the PCI device can DMA to ptr... if so, just return ptr - */ - if (likely((*dma_handle & ~dev->coherent_dma_mask) == 0)) { - DBG_BYPASS("sba_alloc_coherent() bypass mask/addr: 0x%lx/0x%lx\n", - dev->coherent_dma_mask, *dma_handle); - - return addr; - } -#endif - - /* - * If device can't bypass or bypass is disabled, pass the 32bit fake - * device to map single to get an iova mapping. - */ - *dma_handle = sba_map_page(&ioc->sac_only_dev->dev, page, 0, size, - DMA_BIDIRECTIONAL, 0); - if (dma_mapping_error(dev, *dma_handle)) - return NULL; - return addr; -} - - -/** - * sba_free_coherent - free/unmap shared mem for DMA - * @dev: instance of PCI owned by the driver that's asking. - * @size: number of bytes mapped in driver buffer. - * @vaddr: virtual address IOVA of "consistent" buffer. - * @dma_handler: IO virtual address of "consistent" buffer. - * - * See Documentation/core-api/dma-api-howto.rst - */ -static void sba_free_coherent(struct device *dev, size_t size, void *vaddr, - dma_addr_t dma_handle, unsigned long attrs) -{ - sba_unmap_page(dev, dma_handle, size, 0, 0); - free_pages((unsigned long) vaddr, get_order(size)); -} - - -/* -** Since 0 is a valid pdir_base index value, can't use that -** to determine if a value is valid or not. Use a flag to indicate -** the SG list entry contains a valid pdir index. -*/ -#define PIDE_FLAG 0x1UL - -#ifdef DEBUG_LARGE_SG_ENTRIES -int dump_run_sg = 0; -#endif - - -/** - * sba_fill_pdir - write allocated SG entries into IO PDIR - * @ioc: IO MMU structure which owns the pdir we are interested in. - * @startsg: list of IOVA/size pairs - * @nents: number of entries in startsg list - * - * Take preprocessed SG list and write corresponding entries - * in the IO PDIR. - */ - -static SBA_INLINE int -sba_fill_pdir( - struct ioc *ioc, - struct scatterlist *startsg, - int nents) -{ - struct scatterlist *dma_sg = startsg; /* pointer to current DMA */ - int n_mappings = 0; - u64 *pdirp = NULL; - unsigned long dma_offset = 0; - - while (nents-- > 0) { - int cnt = startsg->dma_length; - startsg->dma_length = 0; - -#ifdef DEBUG_LARGE_SG_ENTRIES - if (dump_run_sg) - printk(" %2d : %08lx/%05x %p\n", - nents, startsg->dma_address, cnt, - sba_sg_address(startsg)); -#else - DBG_RUN_SG(" %d : %08lx/%05x %p\n", - nents, startsg->dma_address, cnt, - sba_sg_address(startsg)); -#endif - /* - ** Look for the start of a new DMA stream - */ - if (startsg->dma_address & PIDE_FLAG) { - u32 pide = startsg->dma_address & ~PIDE_FLAG; - dma_offset = (unsigned long) pide & ~iovp_mask; - startsg->dma_address = 0; - if (n_mappings) - dma_sg = sg_next(dma_sg); - dma_sg->dma_address = pide | ioc->ibase; - pdirp = &(ioc->pdir_base[pide >> iovp_shift]); - n_mappings++; - } - - /* - ** Look for a VCONTIG chunk - */ - if (cnt) { - unsigned long vaddr = (unsigned long) sba_sg_address(startsg); - ASSERT(pdirp); - - /* Since multiple Vcontig blocks could make up - ** one DMA stream, *add* cnt to dma_len. - */ - dma_sg->dma_length += cnt; - cnt += dma_offset; - dma_offset=0; /* only want offset on first chunk */ - cnt = ROUNDUP(cnt, iovp_size); - do { - sba_io_pdir_entry(pdirp, vaddr); - vaddr += iovp_size; - cnt -= iovp_size; - pdirp++; - } while (cnt > 0); - } - startsg = sg_next(startsg); - } - /* force pdir update */ - wmb(); - -#ifdef DEBUG_LARGE_SG_ENTRIES - dump_run_sg = 0; -#endif - return(n_mappings); -} - - -/* -** Two address ranges are DMA contiguous *iff* "end of prev" and -** "start of next" are both on an IOV page boundary. -** -** (shift left is a quick trick to mask off upper bits) -*/ -#define DMA_CONTIG(__X, __Y) \ - (((((unsigned long) __X) | ((unsigned long) __Y)) << (BITS_PER_LONG - iovp_shift)) == 0UL) - - -/** - * sba_coalesce_chunks - preprocess the SG list - * @ioc: IO MMU structure which owns the pdir we are interested in. - * @startsg: list of IOVA/size pairs - * @nents: number of entries in startsg list - * - * First pass is to walk the SG list and determine where the breaks are - * in the DMA stream. Allocates PDIR entries but does not fill them. - * Returns the number of DMA chunks. - * - * Doing the fill separate from the coalescing/allocation keeps the - * code simpler. Future enhancement could make one pass through - * the sglist do both. - */ -static SBA_INLINE int -sba_coalesce_chunks(struct ioc *ioc, struct device *dev, - struct scatterlist *startsg, - int nents) -{ - struct scatterlist *vcontig_sg; /* VCONTIG chunk head */ - unsigned long vcontig_len; /* len of VCONTIG chunk */ - unsigned long vcontig_end; - struct scatterlist *dma_sg; /* next DMA stream head */ - unsigned long dma_offset, dma_len; /* start/len of DMA stream */ - int n_mappings = 0; - unsigned int max_seg_size = dma_get_max_seg_size(dev); - int idx; - - while (nents > 0) { - unsigned long vaddr = (unsigned long) sba_sg_address(startsg); - - /* - ** Prepare for first/next DMA stream - */ - dma_sg = vcontig_sg = startsg; - dma_len = vcontig_len = vcontig_end = startsg->length; - vcontig_end += vaddr; - dma_offset = vaddr & ~iovp_mask; - - /* PARANOID: clear entries */ - startsg->dma_address = startsg->dma_length = 0; - - /* - ** This loop terminates one iteration "early" since - ** it's always looking one "ahead". - */ - while (--nents > 0) { - unsigned long vaddr; /* tmp */ - - startsg = sg_next(startsg); - - /* PARANOID */ - startsg->dma_address = startsg->dma_length = 0; - - /* catch brokenness in SCSI layer */ - ASSERT(startsg->length <= DMA_CHUNK_SIZE); - - /* - ** First make sure current dma stream won't - ** exceed DMA_CHUNK_SIZE if we coalesce the - ** next entry. - */ - if (((dma_len + dma_offset + startsg->length + ~iovp_mask) & iovp_mask) - > DMA_CHUNK_SIZE) - break; - - if (dma_len + startsg->length > max_seg_size) - break; - - /* - ** Then look for virtually contiguous blocks. - ** - ** append the next transaction? - */ - vaddr = (unsigned long) sba_sg_address(startsg); - if (vcontig_end == vaddr) - { - vcontig_len += startsg->length; - vcontig_end += startsg->length; - dma_len += startsg->length; - continue; - } - -#ifdef DEBUG_LARGE_SG_ENTRIES - dump_run_sg = (vcontig_len > iovp_size); -#endif - - /* - ** Not virtually contiguous. - ** Terminate prev chunk. - ** Start a new chunk. - ** - ** Once we start a new VCONTIG chunk, dma_offset - ** can't change. And we need the offset from the first - ** chunk - not the last one. Ergo Successive chunks - ** must start on page boundaries and dove tail - ** with it's predecessor. - */ - vcontig_sg->dma_length = vcontig_len; - - vcontig_sg = startsg; - vcontig_len = startsg->length; - - /* - ** 3) do the entries end/start on page boundaries? - ** Don't update vcontig_end until we've checked. - */ - if (DMA_CONTIG(vcontig_end, vaddr)) - { - vcontig_end = vcontig_len + vaddr; - dma_len += vcontig_len; - continue; - } else { - break; - } - } - - /* - ** End of DMA Stream - ** Terminate last VCONTIG block. - ** Allocate space for DMA stream. - */ - vcontig_sg->dma_length = vcontig_len; - dma_len = (dma_len + dma_offset + ~iovp_mask) & iovp_mask; - ASSERT(dma_len <= DMA_CHUNK_SIZE); - idx = sba_alloc_range(ioc, dev, dma_len); - if (idx < 0) { - dma_sg->dma_length = 0; - return -1; - } - dma_sg->dma_address = (dma_addr_t)(PIDE_FLAG | (idx << iovp_shift) - | dma_offset); - n_mappings++; - } - - return n_mappings; -} - -static void sba_unmap_sg_attrs(struct device *dev, struct scatterlist *sglist, - int nents, enum dma_data_direction dir, - unsigned long attrs); -/** - * sba_map_sg - map Scatter/Gather list - * @dev: instance of PCI owned by the driver that's asking. - * @sglist: array of buffer/length pairs - * @nents: number of entries in list - * @dir: R/W or both. - * @attrs: optional dma attributes - * - * See Documentation/core-api/dma-api-howto.rst - */ -static int sba_map_sg_attrs(struct device *dev, struct scatterlist *sglist, - int nents, enum dma_data_direction dir, - unsigned long attrs) -{ - struct ioc *ioc; - int coalesced, filled = 0; -#ifdef ASSERT_PDIR_SANITY - unsigned long flags; -#endif -#ifdef ALLOW_IOV_BYPASS_SG - struct scatterlist *sg; -#endif - - DBG_RUN_SG("%s() START %d entries\n", __func__, nents); - ioc = GET_IOC(dev); - ASSERT(ioc); - -#ifdef ALLOW_IOV_BYPASS_SG - ASSERT(to_pci_dev(dev)->dma_mask); - if (likely((ioc->dma_mask & ~to_pci_dev(dev)->dma_mask) == 0)) { - for_each_sg(sglist, sg, nents, filled) { - sg->dma_length = sg->length; - sg->dma_address = virt_to_phys(sba_sg_address(sg)); - } - return filled; - } -#endif - /* Fast path single entry scatterlists. */ - if (nents == 1) { - sglist->dma_length = sglist->length; - sglist->dma_address = sba_map_page(dev, sg_page(sglist), - sglist->offset, sglist->length, dir, attrs); - if (dma_mapping_error(dev, sglist->dma_address)) - return -EIO; - return 1; - } - -#ifdef ASSERT_PDIR_SANITY - spin_lock_irqsave(&ioc->res_lock, flags); - if (sba_check_pdir(ioc,"Check before sba_map_sg_attrs()")) - { - sba_dump_sg(ioc, sglist, nents); - panic("Check before sba_map_sg_attrs()"); - } - spin_unlock_irqrestore(&ioc->res_lock, flags); -#endif - - prefetch(ioc->res_hint); - - /* - ** First coalesce the chunks and allocate I/O pdir space - ** - ** If this is one DMA stream, we can properly map using the - ** correct virtual address associated with each DMA page. - ** w/o this association, we wouldn't have coherent DMA! - ** Access to the virtual address is what forces a two pass algorithm. - */ - coalesced = sba_coalesce_chunks(ioc, dev, sglist, nents); - if (coalesced < 0) { - sba_unmap_sg_attrs(dev, sglist, nents, dir, attrs); - return -ENOMEM; - } - - /* - ** Program the I/O Pdir - ** - ** map the virtual addresses to the I/O Pdir - ** o dma_address will contain the pdir index - ** o dma_len will contain the number of bytes to map - ** o address contains the virtual address. - */ - filled = sba_fill_pdir(ioc, sglist, nents); - -#ifdef ASSERT_PDIR_SANITY - spin_lock_irqsave(&ioc->res_lock, flags); - if (sba_check_pdir(ioc,"Check after sba_map_sg_attrs()")) - { - sba_dump_sg(ioc, sglist, nents); - panic("Check after sba_map_sg_attrs()\n"); - } - spin_unlock_irqrestore(&ioc->res_lock, flags); -#endif - - ASSERT(coalesced == filled); - DBG_RUN_SG("%s() DONE %d mappings\n", __func__, filled); - - return filled; -} - -/** - * sba_unmap_sg_attrs - unmap Scatter/Gather list - * @dev: instance of PCI owned by the driver that's asking. - * @sglist: array of buffer/length pairs - * @nents: number of entries in list - * @dir: R/W or both. - * @attrs: optional dma attributes - * - * See Documentation/core-api/dma-api-howto.rst - */ -static void sba_unmap_sg_attrs(struct device *dev, struct scatterlist *sglist, - int nents, enum dma_data_direction dir, - unsigned long attrs) -{ -#ifdef ASSERT_PDIR_SANITY - struct ioc *ioc; - unsigned long flags; -#endif - - DBG_RUN_SG("%s() START %d entries, %p,%x\n", - __func__, nents, sba_sg_address(sglist), sglist->length); - -#ifdef ASSERT_PDIR_SANITY - ioc = GET_IOC(dev); - ASSERT(ioc); - - spin_lock_irqsave(&ioc->res_lock, flags); - sba_check_pdir(ioc,"Check before sba_unmap_sg_attrs()"); - spin_unlock_irqrestore(&ioc->res_lock, flags); -#endif - - while (nents && sglist->dma_length) { - - sba_unmap_page(dev, sglist->dma_address, sglist->dma_length, - dir, attrs); - sglist = sg_next(sglist); - nents--; - } - - DBG_RUN_SG("%s() DONE (nents %d)\n", __func__, nents); - -#ifdef ASSERT_PDIR_SANITY - spin_lock_irqsave(&ioc->res_lock, flags); - sba_check_pdir(ioc,"Check after sba_unmap_sg_attrs()"); - spin_unlock_irqrestore(&ioc->res_lock, flags); -#endif - -} - -/************************************************************** -* -* Initialization and claim -* -***************************************************************/ - -static void -ioc_iova_init(struct ioc *ioc) -{ - int tcnfg; - int agp_found = 0; - struct pci_dev *device = NULL; -#ifdef FULL_VALID_PDIR - unsigned long index; -#endif - - /* - ** Firmware programs the base and size of a "safe IOVA space" - ** (one that doesn't overlap memory or LMMIO space) in the - ** IBASE and IMASK registers. - */ - ioc->ibase = READ_REG(ioc->ioc_hpa + IOC_IBASE) & ~0x1UL; - ioc->imask = READ_REG(ioc->ioc_hpa + IOC_IMASK) | 0xFFFFFFFF00000000UL; - - ioc->iov_size = ~ioc->imask + 1; - - DBG_INIT("%s() hpa %p IOV base 0x%lx mask 0x%lx (%dMB)\n", - __func__, ioc->ioc_hpa, ioc->ibase, ioc->imask, - ioc->iov_size >> 20); - - switch (iovp_size) { - case 4*1024: tcnfg = 0; break; - case 8*1024: tcnfg = 1; break; - case 16*1024: tcnfg = 2; break; - case 64*1024: tcnfg = 3; break; - default: - panic(PFX "Unsupported IOTLB page size %ldK", - iovp_size >> 10); - break; - } - WRITE_REG(tcnfg, ioc->ioc_hpa + IOC_TCNFG); - - ioc->pdir_size = (ioc->iov_size / iovp_size) * PDIR_ENTRY_SIZE; - ioc->pdir_base = (void *) __get_free_pages(GFP_KERNEL, - get_order(ioc->pdir_size)); - if (!ioc->pdir_base) - panic(PFX "Couldn't allocate I/O Page Table\n"); - - memset(ioc->pdir_base, 0, ioc->pdir_size); - - DBG_INIT("%s() IOV page size %ldK pdir %p size %x\n", __func__, - iovp_size >> 10, ioc->pdir_base, ioc->pdir_size); - - ASSERT(ALIGN((unsigned long) ioc->pdir_base, 4*1024) == (unsigned long) ioc->pdir_base); - WRITE_REG(virt_to_phys(ioc->pdir_base), ioc->ioc_hpa + IOC_PDIR_BASE); - - /* - ** If an AGP device is present, only use half of the IOV space - ** for PCI DMA. Unfortunately we can't know ahead of time - ** whether GART support will actually be used, for now we - ** can just key on an AGP device found in the system. - ** We program the next pdir index after we stop w/ a key for - ** the GART code to handshake on. - */ - for_each_pci_dev(device) - agp_found |= pci_find_capability(device, PCI_CAP_ID_AGP); - - if (agp_found && reserve_sba_gart) { - printk(KERN_INFO PFX "reserving %dMb of IOVA space at 0x%lx for agpgart\n", - ioc->iov_size/2 >> 20, ioc->ibase + ioc->iov_size/2); - ioc->pdir_size /= 2; - ((u64 *)ioc->pdir_base)[PDIR_INDEX(ioc->iov_size/2)] = ZX1_SBA_IOMMU_COOKIE; - } -#ifdef FULL_VALID_PDIR - /* - ** Check to see if the spill page has been allocated, we don't need more than - ** one across multiple SBAs. - */ - if (!prefetch_spill_page) { - char *spill_poison = "SBAIOMMU POISON"; - int poison_size = 16; - void *poison_addr, *addr; - - addr = (void *)__get_free_pages(GFP_KERNEL, get_order(iovp_size)); - if (!addr) - panic(PFX "Couldn't allocate PDIR spill page\n"); - - poison_addr = addr; - for ( ; (u64) poison_addr < addr + iovp_size; poison_addr += poison_size) - memcpy(poison_addr, spill_poison, poison_size); - - prefetch_spill_page = virt_to_phys(addr); - - DBG_INIT("%s() prefetch spill addr: 0x%lx\n", __func__, prefetch_spill_page); - } - /* - ** Set all the PDIR entries valid w/ the spill page as the target - */ - for (index = 0 ; index < (ioc->pdir_size / PDIR_ENTRY_SIZE) ; index++) - ((u64 *)ioc->pdir_base)[index] = (0x80000000000000FF | prefetch_spill_page); -#endif - - /* Clear I/O TLB of any possible entries */ - WRITE_REG(ioc->ibase | (get_iovp_order(ioc->iov_size) + iovp_shift), ioc->ioc_hpa + IOC_PCOM); - READ_REG(ioc->ioc_hpa + IOC_PCOM); - - /* Enable IOVA translation */ - WRITE_REG(ioc->ibase | 1, ioc->ioc_hpa + IOC_IBASE); - READ_REG(ioc->ioc_hpa + IOC_IBASE); -} - -static void __init -ioc_resource_init(struct ioc *ioc) -{ - spin_lock_init(&ioc->res_lock); -#if DELAYED_RESOURCE_CNT > 0 - spin_lock_init(&ioc->saved_lock); -#endif - - /* resource map size dictated by pdir_size */ - ioc->res_size = ioc->pdir_size / PDIR_ENTRY_SIZE; /* entries */ - ioc->res_size >>= 3; /* convert bit count to byte count */ - DBG_INIT("%s() res_size 0x%x\n", __func__, ioc->res_size); - - ioc->res_map = (char *) __get_free_pages(GFP_KERNEL, - get_order(ioc->res_size)); - if (!ioc->res_map) - panic(PFX "Couldn't allocate resource map\n"); - - memset(ioc->res_map, 0, ioc->res_size); - /* next available IOVP - circular search */ - ioc->res_hint = (unsigned long *) ioc->res_map; - -#ifdef ASSERT_PDIR_SANITY - /* Mark first bit busy - ie no IOVA 0 */ - ioc->res_map[0] = 0x1; - ioc->pdir_base[0] = 0x8000000000000000ULL | ZX1_SBA_IOMMU_COOKIE; -#endif -#ifdef FULL_VALID_PDIR - /* Mark the last resource used so we don't prefetch beyond IOVA space */ - ioc->res_map[ioc->res_size - 1] |= 0x80UL; /* res_map is chars */ - ioc->pdir_base[(ioc->pdir_size / PDIR_ENTRY_SIZE) - 1] = (0x80000000000000FF - | prefetch_spill_page); -#endif - - DBG_INIT("%s() res_map %x %p\n", __func__, - ioc->res_size, (void *) ioc->res_map); -} - -static void __init -ioc_sac_init(struct ioc *ioc) -{ - struct pci_dev *sac = NULL; - struct pci_controller *controller = NULL; - - /* - * pci_alloc_coherent() must return a DMA address which is - * SAC (single address cycle) addressable, so allocate a - * pseudo-device to enforce that. - */ - sac = kzalloc(sizeof(*sac), GFP_KERNEL); - if (!sac) - panic(PFX "Couldn't allocate struct pci_dev"); - - controller = kzalloc(sizeof(*controller), GFP_KERNEL); - if (!controller) - panic(PFX "Couldn't allocate struct pci_controller"); - - controller->iommu = ioc; - sac->sysdata = controller; - sac->dma_mask = 0xFFFFFFFFUL; - sac->dev.bus = &pci_bus_type; - ioc->sac_only_dev = sac; -} - -static void __init -ioc_zx1_init(struct ioc *ioc) -{ - unsigned long rope_config; - unsigned int i; - - if (ioc->rev < 0x20) - panic(PFX "IOC 2.0 or later required for IOMMU support\n"); - - /* 38 bit memory controller + extra bit for range displaced by MMIO */ - ioc->dma_mask = (0x1UL << 39) - 1; - - /* - ** Clear ROPE(N)_CONFIG AO bit. - ** Disables "NT Ordering" (~= !"Relaxed Ordering") - ** Overrides bit 1 in DMA Hint Sets. - ** Improves netperf UDP_STREAM by ~10% for tg3 on bcm5701. - */ - for (i=0; i<(8*8); i+=8) { - rope_config = READ_REG(ioc->ioc_hpa + IOC_ROPE0_CFG + i); - rope_config &= ~IOC_ROPE_AO; - WRITE_REG(rope_config, ioc->ioc_hpa + IOC_ROPE0_CFG + i); - } -} - -typedef void (initfunc)(struct ioc *); - -struct ioc_iommu { - u32 func_id; - char *name; - initfunc *init; -}; - -static struct ioc_iommu ioc_iommu_info[] __initdata = { - { ZX1_IOC_ID, "zx1", ioc_zx1_init }, - { ZX2_IOC_ID, "zx2", NULL }, - { SX1000_IOC_ID, "sx1000", NULL }, - { SX2000_IOC_ID, "sx2000", NULL }, -}; - -static void __init ioc_init(unsigned long hpa, struct ioc *ioc) -{ - struct ioc_iommu *info; - - ioc->next = ioc_list; - ioc_list = ioc; - - ioc->ioc_hpa = ioremap(hpa, 0x1000); - - ioc->func_id = READ_REG(ioc->ioc_hpa + IOC_FUNC_ID); - ioc->rev = READ_REG(ioc->ioc_hpa + IOC_FCLASS) & 0xFFUL; - ioc->dma_mask = 0xFFFFFFFFFFFFFFFFUL; /* conservative */ - - for (info = ioc_iommu_info; info < ioc_iommu_info + ARRAY_SIZE(ioc_iommu_info); info++) { - if (ioc->func_id == info->func_id) { - ioc->name = info->name; - if (info->init) - (info->init)(ioc); - } - } - - iovp_size = (1 << iovp_shift); - iovp_mask = ~(iovp_size - 1); - - DBG_INIT("%s: PAGE_SIZE %ldK, iovp_size %ldK\n", __func__, - PAGE_SIZE >> 10, iovp_size >> 10); - - if (!ioc->name) { - ioc->name = kmalloc(24, GFP_KERNEL); - if (ioc->name) - sprintf((char *) ioc->name, "Unknown (%04x:%04x)", - ioc->func_id & 0xFFFF, (ioc->func_id >> 16) & 0xFFFF); - else - ioc->name = "Unknown"; - } - - ioc_iova_init(ioc); - ioc_resource_init(ioc); - ioc_sac_init(ioc); - - printk(KERN_INFO PFX - "%s %d.%d HPA 0x%lx IOVA space %dMb at 0x%lx\n", - ioc->name, (ioc->rev >> 4) & 0xF, ioc->rev & 0xF, - hpa, ioc->iov_size >> 20, ioc->ibase); -} - - - -/************************************************************************** -** -** SBA initialization code (HW and SW) -** -** o identify SBA chip itself -** o FIXME: initialize DMA hints for reasonable defaults -** -**************************************************************************/ - -#ifdef CONFIG_PROC_FS -static void * -ioc_start(struct seq_file *s, loff_t *pos) -{ - struct ioc *ioc; - loff_t n = *pos; - - for (ioc = ioc_list; ioc; ioc = ioc->next) - if (!n--) - return ioc; - - return NULL; -} - -static void * -ioc_next(struct seq_file *s, void *v, loff_t *pos) -{ - struct ioc *ioc = v; - - ++*pos; - return ioc->next; -} - -static void -ioc_stop(struct seq_file *s, void *v) -{ -} - -static int -ioc_show(struct seq_file *s, void *v) -{ - struct ioc *ioc = v; - unsigned long *res_ptr = (unsigned long *)ioc->res_map; - int i, used = 0; - - seq_printf(s, "Hewlett Packard %s IOC rev %d.%d\n", - ioc->name, ((ioc->rev >> 4) & 0xF), (ioc->rev & 0xF)); -#ifdef CONFIG_NUMA - if (ioc->node != NUMA_NO_NODE) - seq_printf(s, "NUMA node : %d\n", ioc->node); -#endif - seq_printf(s, "IOVA size : %ld MB\n", ((ioc->pdir_size >> 3) * iovp_size)/(1024*1024)); - seq_printf(s, "IOVA page size : %ld kb\n", iovp_size/1024); - - for (i = 0; i < (ioc->res_size / sizeof(unsigned long)); ++i, ++res_ptr) - used += hweight64(*res_ptr); - - seq_printf(s, "PDIR size : %d entries\n", ioc->pdir_size >> 3); - seq_printf(s, "PDIR used : %d entries\n", used); - -#ifdef PDIR_SEARCH_TIMING - { - unsigned long i = 0, avg = 0, min, max; - min = max = ioc->avg_search[0]; - for (i = 0; i < SBA_SEARCH_SAMPLE; i++) { - avg += ioc->avg_search[i]; - if (ioc->avg_search[i] > max) max = ioc->avg_search[i]; - if (ioc->avg_search[i] < min) min = ioc->avg_search[i]; - } - avg /= SBA_SEARCH_SAMPLE; - seq_printf(s, "Bitmap search : %ld/%ld/%ld (min/avg/max CPU Cycles/IOVA page)\n", - min, avg, max); - } -#endif -#ifndef ALLOW_IOV_BYPASS - seq_printf(s, "IOVA bypass disabled\n"); -#endif - return 0; -} - -static const struct seq_operations ioc_seq_ops = { - .start = ioc_start, - .next = ioc_next, - .stop = ioc_stop, - .show = ioc_show -}; - -static void __init -ioc_proc_init(void) -{ - struct proc_dir_entry *dir; - - dir = proc_mkdir("bus/mckinley", NULL); - if (!dir) - return; - - proc_create_seq(ioc_list->name, 0, dir, &ioc_seq_ops); -} -#endif - -static void -sba_connect_bus(struct pci_bus *bus) -{ - acpi_handle handle, parent; - acpi_status status; - struct ioc *ioc; - - if (!PCI_CONTROLLER(bus)) - panic(PFX "no sysdata on bus %d!\n", bus->number); - - if (PCI_CONTROLLER(bus)->iommu) - return; - - handle = acpi_device_handle(PCI_CONTROLLER(bus)->companion); - if (!handle) - return; - - /* - * The IOC scope encloses PCI root bridges in the ACPI - * namespace, so work our way out until we find an IOC we - * claimed previously. - */ - do { - for (ioc = ioc_list; ioc; ioc = ioc->next) - if (ioc->handle == handle) { - PCI_CONTROLLER(bus)->iommu = ioc; - return; - } - - status = acpi_get_parent(handle, &parent); - handle = parent; - } while (ACPI_SUCCESS(status)); - - printk(KERN_WARNING "No IOC for PCI Bus %04x:%02x in ACPI\n", pci_domain_nr(bus), bus->number); -} - -static void __init -sba_map_ioc_to_node(struct ioc *ioc, acpi_handle handle) -{ -#ifdef CONFIG_NUMA - unsigned int node; - - node = acpi_get_node(handle); - if (node != NUMA_NO_NODE && !node_online(node)) - node = NUMA_NO_NODE; - - ioc->node = node; -#endif -} - -static void __init acpi_sba_ioc_add(struct ioc *ioc) -{ - acpi_handle handle = ioc->handle; - acpi_status status; - u64 hpa, length; - struct acpi_device_info *adi; - - ioc_found = ioc->next; - status = hp_acpi_csr_space(handle, &hpa, &length); - if (ACPI_FAILURE(status)) - goto err; - - status = acpi_get_object_info(handle, &adi); - if (ACPI_FAILURE(status)) - goto err; - - /* - * For HWP0001, only SBA appears in ACPI namespace. It encloses the PCI - * root bridges, and its CSR space includes the IOC function. - */ - if (strncmp("HWP0001", adi->hardware_id.string, 7) == 0) { - hpa += ZX1_IOC_OFFSET; - /* zx1 based systems default to kernel page size iommu pages */ - if (!iovp_shift) - iovp_shift = min(PAGE_SHIFT, 16); - } - kfree(adi); - - /* - * default anything not caught above or specified on cmdline to 4k - * iommu page size - */ - if (!iovp_shift) - iovp_shift = 12; - - ioc_init(hpa, ioc); - /* setup NUMA node association */ - sba_map_ioc_to_node(ioc, handle); - return; - - err: - kfree(ioc); -} - -static const struct acpi_device_id hp_ioc_iommu_device_ids[] = { - {"HWP0001", 0}, - {"HWP0004", 0}, - {"", 0}, -}; - -static int acpi_sba_ioc_attach(struct acpi_device *device, - const struct acpi_device_id *not_used) -{ - struct ioc *ioc; - - ioc = kzalloc(sizeof(*ioc), GFP_KERNEL); - if (!ioc) - return -ENOMEM; - - ioc->next = ioc_found; - ioc_found = ioc; - ioc->handle = device->handle; - return 1; -} - - -static struct acpi_scan_handler acpi_sba_ioc_handler = { - .ids = hp_ioc_iommu_device_ids, - .attach = acpi_sba_ioc_attach, -}; - -static int __init acpi_sba_ioc_init_acpi(void) -{ - return acpi_scan_add_handler(&acpi_sba_ioc_handler); -} -/* This has to run before acpi_scan_init(). */ -arch_initcall(acpi_sba_ioc_init_acpi); - -static int sba_dma_supported (struct device *dev, u64 mask) -{ - /* make sure it's at least 32bit capable */ - return ((mask & 0xFFFFFFFFUL) == 0xFFFFFFFFUL); -} - -static const struct dma_map_ops sba_dma_ops = { - .alloc = sba_alloc_coherent, - .free = sba_free_coherent, - .map_page = sba_map_page, - .unmap_page = sba_unmap_page, - .map_sg = sba_map_sg_attrs, - .unmap_sg = sba_unmap_sg_attrs, - .dma_supported = sba_dma_supported, - .mmap = dma_common_mmap, - .get_sgtable = dma_common_get_sgtable, - .alloc_pages = dma_common_alloc_pages, - .free_pages = dma_common_free_pages, -}; - -static int __init -sba_init(void) -{ - /* - * If we are booting a kdump kernel, the sba_iommu will cause devices - * that were not shutdown properly to MCA as soon as they are turned - * back on. Our only option for a successful kdump kernel boot is to - * use swiotlb. - */ - if (is_kdump_kernel()) - return 0; - - /* - * ioc_found should be populated by the acpi_sba_ioc_handler's .attach() - * routine, but that only happens if acpi_scan_init() has already run. - */ - while (ioc_found) - acpi_sba_ioc_add(ioc_found); - - if (!ioc_list) - return 0; - - { - struct pci_bus *b = NULL; - while ((b = pci_find_next_bus(b)) != NULL) - sba_connect_bus(b); - } - - /* no need for swiotlb with the iommu */ - swiotlb_exit(); - dma_ops = &sba_dma_ops; - -#ifdef CONFIG_PROC_FS - ioc_proc_init(); -#endif - return 0; -} - -subsys_initcall(sba_init); /* must be initialized after ACPI etc., but before any drivers... */ - -static int __init -nosbagart(char *str) -{ - reserve_sba_gart = 0; - return 1; -} - -__setup("nosbagart", nosbagart); - -static int __init -sba_page_override(char *str) -{ - unsigned long page_size; - - page_size = memparse(str, &str); - switch (page_size) { - case 4096: - case 8192: - case 16384: - case 65536: - iovp_shift = ffs(page_size) - 1; - break; - default: - printk("%s: unknown/unsupported iommu page size %ld\n", - __func__, page_size); - } - - return 1; -} - -__setup("sbapagesize=",sba_page_override); diff --git a/arch/ia64/include/asm/Kbuild b/arch/ia64/include/asm/Kbuild deleted file mode 100644 index aefae2efde9f..000000000000 --- a/arch/ia64/include/asm/Kbuild +++ /dev/null @@ -1,6 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0 -generated-y += syscall_table.h -generic-y += agp.h -generic-y += kvm_para.h -generic-y += mcs_spinlock.h -generic-y += vtime.h diff --git a/arch/ia64/include/asm/acenv.h b/arch/ia64/include/asm/acenv.h deleted file mode 100644 index 9d673cd4c2ad..000000000000 --- a/arch/ia64/include/asm/acenv.h +++ /dev/null @@ -1,49 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * IA64 specific ACPICA environments and implementation - * - * Copyright (C) 2014, Intel Corporation - * Author: Lv Zheng - */ - -#ifndef _ASM_IA64_ACENV_H -#define _ASM_IA64_ACENV_H - -#include - -#define COMPILER_DEPENDENT_INT64 long -#define COMPILER_DEPENDENT_UINT64 unsigned long - -/* Asm macros */ - -static inline int -ia64_acpi_acquire_global_lock(unsigned int *lock) -{ - unsigned int old, new, val; - do { - old = *lock; - new = (((old & ~0x3) + 2) + ((old >> 1) & 0x1)); - val = ia64_cmpxchg4_acq(lock, new, old); - } while (unlikely (val != old)); - return (new < 3) ? -1 : 0; -} - -static inline int -ia64_acpi_release_global_lock(unsigned int *lock) -{ - unsigned int old, new, val; - do { - old = *lock; - new = old & ~0x3; - val = ia64_cmpxchg4_acq(lock, new, old); - } while (unlikely (val != old)); - return old & 0x1; -} - -#define ACPI_ACQUIRE_GLOBAL_LOCK(facs, Acq) \ - ((Acq) = ia64_acpi_acquire_global_lock(&facs->global_lock)) - -#define ACPI_RELEASE_GLOBAL_LOCK(facs, Acq) \ - ((Acq) = ia64_acpi_release_global_lock(&facs->global_lock)) - -#endif /* _ASM_IA64_ACENV_H */ diff --git a/arch/ia64/include/asm/acpi-ext.h b/arch/ia64/include/asm/acpi-ext.h deleted file mode 100644 index eaa57583d151..000000000000 --- a/arch/ia64/include/asm/acpi-ext.h +++ /dev/null @@ -1,17 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * (c) Copyright 2003, 2006 Hewlett-Packard Development Company, L.P. - * Alex Williamson - * Bjorn Helgaas - * - * Vendor specific extensions to ACPI. - */ - -#ifndef _ASM_IA64_ACPI_EXT_H -#define _ASM_IA64_ACPI_EXT_H - -#include - -extern acpi_status hp_acpi_csr_space (acpi_handle, u64 *base, u64 *length); - -#endif /* _ASM_IA64_ACPI_EXT_H */ diff --git a/arch/ia64/include/asm/acpi.h b/arch/ia64/include/asm/acpi.h deleted file mode 100644 index 58500a964238..000000000000 --- a/arch/ia64/include/asm/acpi.h +++ /dev/null @@ -1,110 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Copyright (C) 1999 VA Linux Systems - * Copyright (C) 1999 Walt Drummond - * Copyright (C) 2000,2001 J.I. Lee - * Copyright (C) 2001,2002 Paul Diefenbaugh - */ - -#ifndef _ASM_ACPI_H -#define _ASM_ACPI_H - -#ifdef __KERNEL__ - -#include - -#include -#include -#include - - -extern int acpi_lapic; -#define acpi_disabled 0 /* ACPI always enabled on IA64 */ -#define acpi_noirq 0 /* ACPI always enabled on IA64 */ -#define acpi_pci_disabled 0 /* ACPI PCI always enabled on IA64 */ -#define acpi_strict 1 /* no ACPI spec workarounds on IA64 */ - -static inline bool acpi_has_cpu_in_madt(void) -{ - return !!acpi_lapic; -} - -#define acpi_processor_cstate_check(x) (x) /* no idle limits on IA64 :) */ -static inline void disable_acpi(void) { } - -int acpi_request_vector (u32 int_type); -int acpi_gsi_to_irq (u32 gsi, unsigned int *irq); - -/* Low-level suspend routine. */ -extern int acpi_suspend_lowlevel(void); - -static inline unsigned long acpi_get_wakeup_address(void) -{ - return 0; -} - -/* - * Record the cpei override flag and current logical cpu. This is - * useful for CPU removal. - */ -extern unsigned int can_cpei_retarget(void); -extern unsigned int is_cpu_cpei_target(unsigned int cpu); -extern void set_cpei_target_cpu(unsigned int cpu); -extern unsigned int get_cpei_target_cpu(void); -extern void prefill_possible_map(void); -#ifdef CONFIG_ACPI_HOTPLUG_CPU -extern int additional_cpus; -#else -#define additional_cpus 0 -#endif - -#ifdef CONFIG_ACPI_NUMA -#if MAX_NUMNODES > 256 -#define MAX_PXM_DOMAINS MAX_NUMNODES -#else -#define MAX_PXM_DOMAINS (256) -#endif -extern int pxm_to_nid_map[MAX_PXM_DOMAINS]; -extern int __initdata nid_to_pxm_map[MAX_NUMNODES]; -#endif - -static inline bool arch_has_acpi_pdc(void) { return true; } -static inline void arch_acpi_set_proc_cap_bits(u32 *cap) -{ - *cap |= ACPI_PROC_CAP_EST_CAPABILITY_SMP; -} - -#ifdef CONFIG_ACPI_NUMA -extern cpumask_t early_cpu_possible_map; -#define for_each_possible_early_cpu(cpu) \ - for_each_cpu((cpu), &early_cpu_possible_map) - -static inline void per_cpu_scan_finalize(int min_cpus, int reserve_cpus) -{ - int low_cpu, high_cpu; - int cpu; - int next_nid = 0; - - low_cpu = cpumask_weight(&early_cpu_possible_map); - - high_cpu = max(low_cpu, min_cpus); - high_cpu = min(high_cpu + reserve_cpus, NR_CPUS); - - for (cpu = low_cpu; cpu < high_cpu; cpu++) { - cpumask_set_cpu(cpu, &early_cpu_possible_map); - if (node_cpuid[cpu].nid == NUMA_NO_NODE) { - node_cpuid[cpu].nid = next_nid; - next_nid++; - if (next_nid >= num_online_nodes()) - next_nid = 0; - } - } -} - -extern void acpi_numa_fixup(void); - -#endif /* CONFIG_ACPI_NUMA */ - -#endif /*__KERNEL__*/ - -#endif /*_ASM_ACPI_H*/ diff --git a/arch/ia64/include/asm/asm-offsets.h b/arch/ia64/include/asm/asm-offsets.h deleted file mode 100644 index d370ee36a182..000000000000 --- a/arch/ia64/include/asm/asm-offsets.h +++ /dev/null @@ -1 +0,0 @@ -#include diff --git a/arch/ia64/include/asm/asm-prototypes.h b/arch/ia64/include/asm/asm-prototypes.h deleted file mode 100644 index a96689447a74..000000000000 --- a/arch/ia64/include/asm/asm-prototypes.h +++ /dev/null @@ -1,30 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_IA64_ASM_PROTOTYPES_H -#define _ASM_IA64_ASM_PROTOTYPES_H - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -extern const char ia64_ivt[]; - -signed int __divsi3(signed int, unsigned int); -signed int __modsi3(signed int, unsigned int); - -signed long long __divdi3(signed long long, unsigned long long); -signed long long __moddi3(signed long long, unsigned long long); - -unsigned int __udivsi3(unsigned int, unsigned int); -unsigned int __umodsi3(unsigned int, unsigned int); - -unsigned long long __udivdi3(unsigned long long, unsigned long long); -unsigned long long __umoddi3(unsigned long long, unsigned long long); - -#endif /* _ASM_IA64_ASM_PROTOTYPES_H */ diff --git a/arch/ia64/include/asm/asmmacro.h b/arch/ia64/include/asm/asmmacro.h deleted file mode 100644 index 52619c517f09..000000000000 --- a/arch/ia64/include/asm/asmmacro.h +++ /dev/null @@ -1,136 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_IA64_ASMMACRO_H -#define _ASM_IA64_ASMMACRO_H - -/* - * Copyright (C) 2000-2001, 2003-2004 Hewlett-Packard Co - * David Mosberger-Tang - */ - - -#define ENTRY(name) \ - .align 32; \ - .proc name; \ -name: - -#define ENTRY_MIN_ALIGN(name) \ - .align 16; \ - .proc name; \ -name: - -#define GLOBAL_ENTRY(name) \ - .global name; \ - ENTRY(name) - -#define END(name) \ - .endp name - -/* - * Helper macros to make unwind directives more readable: - */ - -/* prologue_gr: */ -#define ASM_UNW_PRLG_RP 0x8 -#define ASM_UNW_PRLG_PFS 0x4 -#define ASM_UNW_PRLG_PSP 0x2 -#define ASM_UNW_PRLG_PR 0x1 -#define ASM_UNW_PRLG_GRSAVE(ninputs) (32+(ninputs)) - -/* - * Helper macros for accessing user memory. - * - * When adding any new .section/.previous entries here, make sure to - * also add it to the DISCARD section in arch/ia64/kernel/gate.lds.S or - * unpleasant things will happen. - */ - - .section "__ex_table", "a" // declare section & section attributes - .previous - -# define EX(y,x...) \ - .xdata4 "__ex_table", 99f-., y-.; \ - [99:] x -# define EXCLR(y,x...) \ - .xdata4 "__ex_table", 99f-., y-.+4; \ - [99:] x - -/* - * Tag MCA recoverable instruction ranges. - */ - - .section "__mca_table", "a" // declare section & section attributes - .previous - -# define MCA_RECOVER_RANGE(y) \ - .xdata4 "__mca_table", y-., 99f-.; \ - [99:] - -/* - * Mark instructions that need a load of a virtual address patched to be - * a load of a physical address. We use this either in critical performance - * path (ivt.S - TLB miss processing) or in places where it might not be - * safe to use a "tpa" instruction (mca_asm.S - error recovery). - */ - .section ".data..patch.vtop", "a" // declare section & section attributes - .previous - -#define LOAD_PHYSICAL(pr, reg, obj) \ -[1:](pr)movl reg = obj; \ - .xdata4 ".data..patch.vtop", 1b-. - -/* - * For now, we always put in the McKinley E9 workaround. On CPUs that don't need it, - * we'll patch out the work-around bundles with NOPs, so their impact is minimal. - */ -#define DO_MCKINLEY_E9_WORKAROUND - -#ifdef DO_MCKINLEY_E9_WORKAROUND - .section ".data..patch.mckinley_e9", "a" - .previous -/* workaround for Itanium 2 Errata 9: */ -# define FSYS_RETURN \ - .xdata4 ".data..patch.mckinley_e9", 1f-.; \ -1:{ .mib; \ - nop.m 0; \ - mov r16=ar.pfs; \ - br.call.sptk.many b7=2f;; \ - }; \ -2:{ .mib; \ - nop.m 0; \ - mov ar.pfs=r16; \ - br.ret.sptk.many b6;; \ - } -#else -# define FSYS_RETURN br.ret.sptk.many b6 -#endif - -/* - * If physical stack register size is different from DEF_NUM_STACK_REG, - * dynamically patch the kernel for correct size. - */ - .section ".data..patch.phys_stack_reg", "a" - .previous -#define LOAD_PHYS_STACK_REG_SIZE(reg) \ -[1:] adds reg=IA64_NUM_PHYS_STACK_REG*8+8,r0; \ - .xdata4 ".data..patch.phys_stack_reg", 1b-. - -/* - * Up until early 2004, use of .align within a function caused bad unwind info. - * TEXT_ALIGN(n) expands into ".align n" if a fixed GAS is available or into nothing - * otherwise. - */ -#ifdef HAVE_WORKING_TEXT_ALIGN -# define TEXT_ALIGN(n) .align n -#else -# define TEXT_ALIGN(n) -#endif - -#ifdef HAVE_SERIALIZE_DIRECTIVE -# define dv_serialize_data .serialize.data -# define dv_serialize_instruction .serialize.instruction -#else -# define dv_serialize_data -# define dv_serialize_instruction -#endif - -#endif /* _ASM_IA64_ASMMACRO_H */ diff --git a/arch/ia64/include/asm/atomic.h b/arch/ia64/include/asm/atomic.h deleted file mode 100644 index 6540a628d257..000000000000 --- a/arch/ia64/include/asm/atomic.h +++ /dev/null @@ -1,216 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_IA64_ATOMIC_H -#define _ASM_IA64_ATOMIC_H - -/* - * Atomic operations that C can't guarantee us. Useful for - * resource counting etc.. - * - * NOTE: don't mess with the types below! The "unsigned long" and - * "int" types were carefully placed so as to ensure proper operation - * of the macros. - * - * Copyright (C) 1998, 1999, 2002-2003 Hewlett-Packard Co - * David Mosberger-Tang - */ -#include - -#include -#include - - -#define ATOMIC64_INIT(i) { (i) } - -#define arch_atomic_read(v) READ_ONCE((v)->counter) -#define arch_atomic64_read(v) READ_ONCE((v)->counter) - -#define arch_atomic_set(v,i) WRITE_ONCE(((v)->counter), (i)) -#define arch_atomic64_set(v,i) WRITE_ONCE(((v)->counter), (i)) - -#define ATOMIC_OP(op, c_op) \ -static __inline__ int \ -ia64_atomic_##op (int i, atomic_t *v) \ -{ \ - __s32 old, new; \ - CMPXCHG_BUGCHECK_DECL \ - \ - do { \ - CMPXCHG_BUGCHECK(v); \ - old = arch_atomic_read(v); \ - new = old c_op i; \ - } while (ia64_cmpxchg(acq, v, old, new, sizeof(atomic_t)) != old); \ - return new; \ -} - -#define ATOMIC_FETCH_OP(op, c_op) \ -static __inline__ int \ -ia64_atomic_fetch_##op (int i, atomic_t *v) \ -{ \ - __s32 old, new; \ - CMPXCHG_BUGCHECK_DECL \ - \ - do { \ - CMPXCHG_BUGCHECK(v); \ - old = arch_atomic_read(v); \ - new = old c_op i; \ - } while (ia64_cmpxchg(acq, v, old, new, sizeof(atomic_t)) != old); \ - return old; \ -} - -#define ATOMIC_OPS(op, c_op) \ - ATOMIC_OP(op, c_op) \ - ATOMIC_FETCH_OP(op, c_op) - -ATOMIC_OPS(add, +) -ATOMIC_OPS(sub, -) - -#ifdef __OPTIMIZE__ -#define __ia64_atomic_const(i) \ - static const int __ia64_atomic_p = __builtin_constant_p(i) ? \ - ((i) == 1 || (i) == 4 || (i) == 8 || (i) == 16 || \ - (i) == -1 || (i) == -4 || (i) == -8 || (i) == -16) : 0;\ - __ia64_atomic_p -#else -#define __ia64_atomic_const(i) 0 -#endif - -#define arch_atomic_add_return(i,v) \ -({ \ - int __ia64_aar_i = (i); \ - __ia64_atomic_const(i) \ - ? ia64_fetch_and_add(__ia64_aar_i, &(v)->counter) \ - : ia64_atomic_add(__ia64_aar_i, v); \ -}) - -#define arch_atomic_sub_return(i,v) \ -({ \ - int __ia64_asr_i = (i); \ - __ia64_atomic_const(i) \ - ? ia64_fetch_and_add(-__ia64_asr_i, &(v)->counter) \ - : ia64_atomic_sub(__ia64_asr_i, v); \ -}) - -#define arch_atomic_fetch_add(i,v) \ -({ \ - int __ia64_aar_i = (i); \ - __ia64_atomic_const(i) \ - ? ia64_fetchadd(__ia64_aar_i, &(v)->counter, acq) \ - : ia64_atomic_fetch_add(__ia64_aar_i, v); \ -}) - -#define arch_atomic_fetch_sub(i,v) \ -({ \ - int __ia64_asr_i = (i); \ - __ia64_atomic_const(i) \ - ? ia64_fetchadd(-__ia64_asr_i, &(v)->counter, acq) \ - : ia64_atomic_fetch_sub(__ia64_asr_i, v); \ -}) - -ATOMIC_FETCH_OP(and, &) -ATOMIC_FETCH_OP(or, |) -ATOMIC_FETCH_OP(xor, ^) - -#define arch_atomic_and(i,v) (void)ia64_atomic_fetch_and(i,v) -#define arch_atomic_or(i,v) (void)ia64_atomic_fetch_or(i,v) -#define arch_atomic_xor(i,v) (void)ia64_atomic_fetch_xor(i,v) - -#define arch_atomic_fetch_and(i,v) ia64_atomic_fetch_and(i,v) -#define arch_atomic_fetch_or(i,v) ia64_atomic_fetch_or(i,v) -#define arch_atomic_fetch_xor(i,v) ia64_atomic_fetch_xor(i,v) - -#undef ATOMIC_OPS -#undef ATOMIC_FETCH_OP -#undef ATOMIC_OP - -#define ATOMIC64_OP(op, c_op) \ -static __inline__ s64 \ -ia64_atomic64_##op (s64 i, atomic64_t *v) \ -{ \ - s64 old, new; \ - CMPXCHG_BUGCHECK_DECL \ - \ - do { \ - CMPXCHG_BUGCHECK(v); \ - old = arch_atomic64_read(v); \ - new = old c_op i; \ - } while (ia64_cmpxchg(acq, v, old, new, sizeof(atomic64_t)) != old); \ - return new; \ -} - -#define ATOMIC64_FETCH_OP(op, c_op) \ -static __inline__ s64 \ -ia64_atomic64_fetch_##op (s64 i, atomic64_t *v) \ -{ \ - s64 old, new; \ - CMPXCHG_BUGCHECK_DECL \ - \ - do { \ - CMPXCHG_BUGCHECK(v); \ - old = arch_atomic64_read(v); \ - new = old c_op i; \ - } while (ia64_cmpxchg(acq, v, old, new, sizeof(atomic64_t)) != old); \ - return old; \ -} - -#define ATOMIC64_OPS(op, c_op) \ - ATOMIC64_OP(op, c_op) \ - ATOMIC64_FETCH_OP(op, c_op) - -ATOMIC64_OPS(add, +) -ATOMIC64_OPS(sub, -) - -#define arch_atomic64_add_return(i,v) \ -({ \ - s64 __ia64_aar_i = (i); \ - __ia64_atomic_const(i) \ - ? ia64_fetch_and_add(__ia64_aar_i, &(v)->counter) \ - : ia64_atomic64_add(__ia64_aar_i, v); \ -}) - -#define arch_atomic64_sub_return(i,v) \ -({ \ - s64 __ia64_asr_i = (i); \ - __ia64_atomic_const(i) \ - ? ia64_fetch_and_add(-__ia64_asr_i, &(v)->counter) \ - : ia64_atomic64_sub(__ia64_asr_i, v); \ -}) - -#define arch_atomic64_fetch_add(i,v) \ -({ \ - s64 __ia64_aar_i = (i); \ - __ia64_atomic_const(i) \ - ? ia64_fetchadd(__ia64_aar_i, &(v)->counter, acq) \ - : ia64_atomic64_fetch_add(__ia64_aar_i, v); \ -}) - -#define arch_atomic64_fetch_sub(i,v) \ -({ \ - s64 __ia64_asr_i = (i); \ - __ia64_atomic_const(i) \ - ? ia64_fetchadd(-__ia64_asr_i, &(v)->counter, acq) \ - : ia64_atomic64_fetch_sub(__ia64_asr_i, v); \ -}) - -ATOMIC64_FETCH_OP(and, &) -ATOMIC64_FETCH_OP(or, |) -ATOMIC64_FETCH_OP(xor, ^) - -#define arch_atomic64_and(i,v) (void)ia64_atomic64_fetch_and(i,v) -#define arch_atomic64_or(i,v) (void)ia64_atomic64_fetch_or(i,v) -#define arch_atomic64_xor(i,v) (void)ia64_atomic64_fetch_xor(i,v) - -#define arch_atomic64_fetch_and(i,v) ia64_atomic64_fetch_and(i,v) -#define arch_atomic64_fetch_or(i,v) ia64_atomic64_fetch_or(i,v) -#define arch_atomic64_fetch_xor(i,v) ia64_atomic64_fetch_xor(i,v) - -#undef ATOMIC64_OPS -#undef ATOMIC64_FETCH_OP -#undef ATOMIC64_OP - -#define arch_atomic_add(i,v) (void)arch_atomic_add_return((i), (v)) -#define arch_atomic_sub(i,v) (void)arch_atomic_sub_return((i), (v)) - -#define arch_atomic64_add(i,v) (void)arch_atomic64_add_return((i), (v)) -#define arch_atomic64_sub(i,v) (void)arch_atomic64_sub_return((i), (v)) - -#endif /* _ASM_IA64_ATOMIC_H */ diff --git a/arch/ia64/include/asm/barrier.h b/arch/ia64/include/asm/barrier.h deleted file mode 100644 index 751cdd353446..000000000000 --- a/arch/ia64/include/asm/barrier.h +++ /dev/null @@ -1,79 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Memory barrier definitions. This is based on information published - * in the Processor Abstraction Layer and the System Abstraction Layer - * manual. - * - * Copyright (C) 1998-2003 Hewlett-Packard Co - * David Mosberger-Tang - * Copyright (C) 1999 Asit Mallick - * Copyright (C) 1999 Don Dugger - */ -#ifndef _ASM_IA64_BARRIER_H -#define _ASM_IA64_BARRIER_H - -#include - -/* - * Macros to force memory ordering. In these descriptions, "previous" - * and "subsequent" refer to program order; "visible" means that all - * architecturally visible effects of a memory access have occurred - * (at a minimum, this means the memory has been read or written). - * - * wmb(): Guarantees that all preceding stores to memory- - * like regions are visible before any subsequent - * stores and that all following stores will be - * visible only after all previous stores. - * rmb(): Like wmb(), but for reads. - * mb(): wmb()/rmb() combo, i.e., all previous memory - * accesses are visible before all subsequent - * accesses and vice versa. This is also known as - * a "fence." - * - * Note: "mb()" and its variants cannot be used as a fence to order - * accesses to memory mapped I/O registers. For that, mf.a needs to - * be used. However, we don't want to always use mf.a because (a) - * it's (presumably) much slower than mf and (b) mf.a is supported for - * sequential memory pages only. - */ -#define mb() ia64_mf() -#define rmb() mb() -#define wmb() mb() - -#define dma_rmb() mb() -#define dma_wmb() mb() - -# define __smp_mb() mb() - -#define __smp_mb__before_atomic() barrier() -#define __smp_mb__after_atomic() barrier() - -/* - * IA64 GCC turns volatile stores into st.rel and volatile loads into ld.acq no - * need for asm trickery! - */ - -#define __smp_store_release(p, v) \ -do { \ - compiletime_assert_atomic_type(*p); \ - barrier(); \ - WRITE_ONCE(*p, v); \ -} while (0) - -#define __smp_load_acquire(p) \ -({ \ - typeof(*p) ___p1 = READ_ONCE(*p); \ - compiletime_assert_atomic_type(*p); \ - barrier(); \ - ___p1; \ -}) - -/* - * The group barrier in front of the rsm & ssm are necessary to ensure - * that none of the previous instructions in the same group are - * affected by the rsm/ssm. - */ - -#include - -#endif /* _ASM_IA64_BARRIER_H */ diff --git a/arch/ia64/include/asm/bitops.h b/arch/ia64/include/asm/bitops.h deleted file mode 100644 index 1accb7842f58..000000000000 --- a/arch/ia64/include/asm/bitops.h +++ /dev/null @@ -1,453 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_IA64_BITOPS_H -#define _ASM_IA64_BITOPS_H - -/* - * Copyright (C) 1998-2003 Hewlett-Packard Co - * David Mosberger-Tang - * - * 02/06/02 find_next_bit() and find_first_bit() added from Erich Focht's ia64 - * O(1) scheduler patch - */ - -#ifndef _LINUX_BITOPS_H -#error only can be included directly -#endif - -#include -#include -#include -#include - -/** - * set_bit - Atomically set a bit in memory - * @nr: the bit to set - * @addr: the address to start counting from - * - * This function is atomic and may not be reordered. See __set_bit() - * if you do not require the atomic guarantees. - * Note that @nr may be almost arbitrarily large; this function is not - * restricted to acting on a single-word quantity. - * - * The address must be (at least) "long" aligned. - * Note that there are driver (e.g., eepro100) which use these operations to - * operate on hw-defined data-structures, so we can't easily change these - * operations to force a bigger alignment. - * - * bit 0 is the LSB of addr; bit 32 is the LSB of (addr+1). - */ -static __inline__ void -set_bit (int nr, volatile void *addr) -{ - __u32 bit, old, new; - volatile __u32 *m; - CMPXCHG_BUGCHECK_DECL - - m = (volatile __u32 *) addr + (nr >> 5); - bit = 1 << (nr & 31); - do { - CMPXCHG_BUGCHECK(m); - old = *m; - new = old | bit; - } while (cmpxchg_acq(m, old, new) != old); -} - -/** - * arch___set_bit - Set a bit in memory - * @nr: the bit to set - * @addr: the address to start counting from - * - * Unlike set_bit(), this function is non-atomic and may be reordered. - * If it's called on the same region of memory simultaneously, the effect - * may be that only one operation succeeds. - */ -static __always_inline void -arch___set_bit(unsigned long nr, volatile unsigned long *addr) -{ - *((__u32 *) addr + (nr >> 5)) |= (1 << (nr & 31)); -} - -/** - * clear_bit - Clears a bit in memory - * @nr: Bit to clear - * @addr: Address to start counting from - * - * clear_bit() is atomic and may not be reordered. However, it does - * not contain a memory barrier, so if it is used for locking purposes, - * you should call smp_mb__before_atomic() and/or smp_mb__after_atomic() - * in order to ensure changes are visible on other processors. - */ -static __inline__ void -clear_bit (int nr, volatile void *addr) -{ - __u32 mask, old, new; - volatile __u32 *m; - CMPXCHG_BUGCHECK_DECL - - m = (volatile __u32 *) addr + (nr >> 5); - mask = ~(1 << (nr & 31)); - do { - CMPXCHG_BUGCHECK(m); - old = *m; - new = old & mask; - } while (cmpxchg_acq(m, old, new) != old); -} - -/** - * clear_bit_unlock - Clears a bit in memory with release - * @nr: Bit to clear - * @addr: Address to start counting from - * - * clear_bit_unlock() is atomic and may not be reordered. It does - * contain a memory barrier suitable for unlock type operations. - */ -static __inline__ void -clear_bit_unlock (int nr, volatile void *addr) -{ - __u32 mask, old, new; - volatile __u32 *m; - CMPXCHG_BUGCHECK_DECL - - m = (volatile __u32 *) addr + (nr >> 5); - mask = ~(1 << (nr & 31)); - do { - CMPXCHG_BUGCHECK(m); - old = *m; - new = old & mask; - } while (cmpxchg_rel(m, old, new) != old); -} - -/** - * __clear_bit_unlock - Non-atomically clears a bit in memory with release - * @nr: Bit to clear - * @addr: Address to start counting from - * - * Similarly to clear_bit_unlock, the implementation uses a store - * with release semantics. See also arch_spin_unlock(). - */ -static __inline__ void -__clear_bit_unlock(int nr, void *addr) -{ - __u32 * const m = (__u32 *) addr + (nr >> 5); - __u32 const new = *m & ~(1 << (nr & 31)); - - ia64_st4_rel_nta(m, new); -} - -/** - * arch___clear_bit - Clears a bit in memory (non-atomic version) - * @nr: the bit to clear - * @addr: the address to start counting from - * - * Unlike clear_bit(), this function is non-atomic and may be reordered. - * If it's called on the same region of memory simultaneously, the effect - * may be that only one operation succeeds. - */ -static __always_inline void -arch___clear_bit(unsigned long nr, volatile unsigned long *addr) -{ - *((__u32 *) addr + (nr >> 5)) &= ~(1 << (nr & 31)); -} - -/** - * change_bit - Toggle a bit in memory - * @nr: Bit to toggle - * @addr: Address to start counting from - * - * change_bit() is atomic and may not be reordered. - * Note that @nr may be almost arbitrarily large; this function is not - * restricted to acting on a single-word quantity. - */ -static __inline__ void -change_bit (int nr, volatile void *addr) -{ - __u32 bit, old, new; - volatile __u32 *m; - CMPXCHG_BUGCHECK_DECL - - m = (volatile __u32 *) addr + (nr >> 5); - bit = (1 << (nr & 31)); - do { - CMPXCHG_BUGCHECK(m); - old = *m; - new = old ^ bit; - } while (cmpxchg_acq(m, old, new) != old); -} - -/** - * arch___change_bit - Toggle a bit in memory - * @nr: the bit to toggle - * @addr: the address to start counting from - * - * Unlike change_bit(), this function is non-atomic and may be reordered. - * If it's called on the same region of memory simultaneously, the effect - * may be that only one operation succeeds. - */ -static __always_inline void -arch___change_bit(unsigned long nr, volatile unsigned long *addr) -{ - *((__u32 *) addr + (nr >> 5)) ^= (1 << (nr & 31)); -} - -/** - * test_and_set_bit - Set a bit and return its old value - * @nr: Bit to set - * @addr: Address to count from - * - * This operation is atomic and cannot be reordered. - * It also implies the acquisition side of the memory barrier. - */ -static __inline__ int -test_and_set_bit (int nr, volatile void *addr) -{ - __u32 bit, old, new; - volatile __u32 *m; - CMPXCHG_BUGCHECK_DECL - - m = (volatile __u32 *) addr + (nr >> 5); - bit = 1 << (nr & 31); - do { - CMPXCHG_BUGCHECK(m); - old = *m; - new = old | bit; - } while (cmpxchg_acq(m, old, new) != old); - return (old & bit) != 0; -} - -/** - * test_and_set_bit_lock - Set a bit and return its old value for lock - * @nr: Bit to set - * @addr: Address to count from - * - * This is the same as test_and_set_bit on ia64 - */ -#define test_and_set_bit_lock test_and_set_bit - -/** - * arch___test_and_set_bit - Set a bit and return its old value - * @nr: Bit to set - * @addr: Address to count from - * - * This operation is non-atomic and can be reordered. - * If two examples of this operation race, one can appear to succeed - * but actually fail. You must protect multiple accesses with a lock. - */ -static __always_inline bool -arch___test_and_set_bit(unsigned long nr, volatile unsigned long *addr) -{ - __u32 *p = (__u32 *) addr + (nr >> 5); - __u32 m = 1 << (nr & 31); - int oldbitset = (*p & m) != 0; - - *p |= m; - return oldbitset; -} - -/** - * test_and_clear_bit - Clear a bit and return its old value - * @nr: Bit to clear - * @addr: Address to count from - * - * This operation is atomic and cannot be reordered. - * It also implies the acquisition side of the memory barrier. - */ -static __inline__ int -test_and_clear_bit (int nr, volatile void *addr) -{ - __u32 mask, old, new; - volatile __u32 *m; - CMPXCHG_BUGCHECK_DECL - - m = (volatile __u32 *) addr + (nr >> 5); - mask = ~(1 << (nr & 31)); - do { - CMPXCHG_BUGCHECK(m); - old = *m; - new = old & mask; - } while (cmpxchg_acq(m, old, new) != old); - return (old & ~mask) != 0; -} - -/** - * arch___test_and_clear_bit - Clear a bit and return its old value - * @nr: Bit to clear - * @addr: Address to count from - * - * This operation is non-atomic and can be reordered. - * If two examples of this operation race, one can appear to succeed - * but actually fail. You must protect multiple accesses with a lock. - */ -static __always_inline bool -arch___test_and_clear_bit(unsigned long nr, volatile unsigned long *addr) -{ - __u32 *p = (__u32 *) addr + (nr >> 5); - __u32 m = 1 << (nr & 31); - int oldbitset = (*p & m) != 0; - - *p &= ~m; - return oldbitset; -} - -/** - * test_and_change_bit - Change a bit and return its old value - * @nr: Bit to change - * @addr: Address to count from - * - * This operation is atomic and cannot be reordered. - * It also implies the acquisition side of the memory barrier. - */ -static __inline__ int -test_and_change_bit (int nr, volatile void *addr) -{ - __u32 bit, old, new; - volatile __u32 *m; - CMPXCHG_BUGCHECK_DECL - - m = (volatile __u32 *) addr + (nr >> 5); - bit = (1 << (nr & 31)); - do { - CMPXCHG_BUGCHECK(m); - old = *m; - new = old ^ bit; - } while (cmpxchg_acq(m, old, new) != old); - return (old & bit) != 0; -} - -/** - * arch___test_and_change_bit - Change a bit and return its old value - * @nr: Bit to change - * @addr: Address to count from - * - * This operation is non-atomic and can be reordered. - */ -static __always_inline bool -arch___test_and_change_bit(unsigned long nr, volatile unsigned long *addr) -{ - __u32 old, bit = (1 << (nr & 31)); - __u32 *m = (__u32 *) addr + (nr >> 5); - - old = *m; - *m = old ^ bit; - return (old & bit) != 0; -} - -#define arch_test_bit generic_test_bit -#define arch_test_bit_acquire generic_test_bit_acquire - -/** - * ffz - find the first zero bit in a long word - * @x: The long word to find the bit in - * - * Returns the bit-number (0..63) of the first (least significant) zero bit. - * Undefined if no zero exists, so code should check against ~0UL first... - */ -static inline unsigned long -ffz (unsigned long x) -{ - unsigned long result; - - result = ia64_popcnt(x & (~x - 1)); - return result; -} - -/** - * __ffs - find first bit in word. - * @x: The word to search - * - * Undefined if no bit exists, so code should check against 0 first. - */ -static __inline__ unsigned long -__ffs (unsigned long x) -{ - unsigned long result; - - result = ia64_popcnt((x-1) & ~x); - return result; -} - -#ifdef __KERNEL__ - -/* - * Return bit number of last (most-significant) bit set. Undefined - * for x==0. Bits are numbered from 0..63 (e.g., ia64_fls(9) == 3). - */ -static inline unsigned long -ia64_fls (unsigned long x) -{ - long double d = x; - long exp; - - exp = ia64_getf_exp(d); - return exp - 0xffff; -} - -/* - * Find the last (most significant) bit set. Returns 0 for x==0 and - * bits are numbered from 1..32 (e.g., fls(9) == 4). - */ -static inline int fls(unsigned int t) -{ - unsigned long x = t & 0xffffffffu; - - if (!x) - return 0; - x |= x >> 1; - x |= x >> 2; - x |= x >> 4; - x |= x >> 8; - x |= x >> 16; - return ia64_popcnt(x); -} - -/* - * Find the last (most significant) bit set. Undefined for x==0. - * Bits are numbered from 0..63 (e.g., __fls(9) == 3). - */ -static inline unsigned long -__fls (unsigned long x) -{ - x |= x >> 1; - x |= x >> 2; - x |= x >> 4; - x |= x >> 8; - x |= x >> 16; - x |= x >> 32; - return ia64_popcnt(x) - 1; -} - -#include - -#include - -/* - * hweightN: returns the hamming weight (i.e. the number - * of bits set) of a N-bit word - */ -static __inline__ unsigned long __arch_hweight64(unsigned long x) -{ - unsigned long result; - result = ia64_popcnt(x); - return result; -} - -#define __arch_hweight32(x) ((unsigned int) __arch_hweight64((x) & 0xfffffffful)) -#define __arch_hweight16(x) ((unsigned int) __arch_hweight64((x) & 0xfffful)) -#define __arch_hweight8(x) ((unsigned int) __arch_hweight64((x) & 0xfful)) - -#include - -#endif /* __KERNEL__ */ - -#ifdef __KERNEL__ - -#include - -#include - -#include - -#include - -#endif /* __KERNEL__ */ - -#endif /* _ASM_IA64_BITOPS_H */ diff --git a/arch/ia64/include/asm/bug.h b/arch/ia64/include/asm/bug.h deleted file mode 100644 index 66b37a532765..000000000000 --- a/arch/ia64/include/asm/bug.h +++ /dev/null @@ -1,19 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_IA64_BUG_H -#define _ASM_IA64_BUG_H - -#ifdef CONFIG_BUG -#define ia64_abort() __builtin_trap() -#define BUG() do { \ - printk("kernel BUG at %s:%d!\n", __FILE__, __LINE__); \ - barrier_before_unreachable(); \ - ia64_abort(); \ -} while (0) - -/* should this BUG be made generic? */ -#define HAVE_ARCH_BUG -#endif - -#include - -#endif diff --git a/arch/ia64/include/asm/cache.h b/arch/ia64/include/asm/cache.h deleted file mode 100644 index 2f1c70647068..000000000000 --- a/arch/ia64/include/asm/cache.h +++ /dev/null @@ -1,30 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_IA64_CACHE_H -#define _ASM_IA64_CACHE_H - - -/* - * Copyright (C) 1998-2000 Hewlett-Packard Co - * David Mosberger-Tang - */ - -/* Bytes per L1 (data) cache line. */ -#define L1_CACHE_SHIFT CONFIG_IA64_L1_CACHE_SHIFT -#define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT) - -#ifdef CONFIG_SMP -# define SMP_CACHE_SHIFT L1_CACHE_SHIFT -# define SMP_CACHE_BYTES L1_CACHE_BYTES -#else - /* - * The "aligned" directive can only _increase_ alignment, so this is - * safe and provides an easy way to avoid wasting space on a - * uni-processor: - */ -# define SMP_CACHE_SHIFT 3 -# define SMP_CACHE_BYTES (1 << 3) -#endif - -#define __read_mostly __section(".data..read_mostly") - -#endif /* _ASM_IA64_CACHE_H */ diff --git a/arch/ia64/include/asm/cacheflush.h b/arch/ia64/include/asm/cacheflush.h deleted file mode 100644 index eac493fa9e0d..000000000000 --- a/arch/ia64/include/asm/cacheflush.h +++ /dev/null @@ -1,39 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_IA64_CACHEFLUSH_H -#define _ASM_IA64_CACHEFLUSH_H - -/* - * Copyright (C) 2002 Hewlett-Packard Co - * David Mosberger-Tang - */ - -#include -#include - -#include - -#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1 -static inline void flush_dcache_folio(struct folio *folio) -{ - clear_bit(PG_arch_1, &folio->flags); -} -#define flush_dcache_folio flush_dcache_folio - -static inline void flush_dcache_page(struct page *page) -{ - flush_dcache_folio(page_folio(page)); -} - -extern void flush_icache_range(unsigned long start, unsigned long end); -#define flush_icache_range flush_icache_range -extern void clflush_cache_range(void *addr, int size); - -#define flush_icache_user_page(vma, page, user_addr, len) \ -do { \ - unsigned long _addr = (unsigned long) page_address(page) + ((user_addr) & ~PAGE_MASK); \ - flush_icache_range(_addr, _addr + (len)); \ -} while (0) - -#include - -#endif /* _ASM_IA64_CACHEFLUSH_H */ diff --git a/arch/ia64/include/asm/checksum.h b/arch/ia64/include/asm/checksum.h deleted file mode 100644 index f3026213aa32..000000000000 --- a/arch/ia64/include/asm/checksum.h +++ /dev/null @@ -1,63 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_IA64_CHECKSUM_H -#define _ASM_IA64_CHECKSUM_H - -/* - * Modified 1998, 1999 - * David Mosberger-Tang , Hewlett-Packard Co - */ - -/* - * This is a version of ip_compute_csum() optimized for IP headers, - * which always checksum on 4 octet boundaries. - */ -extern __sum16 ip_fast_csum(const void *iph, unsigned int ihl); - -/* - * Computes the checksum of the TCP/UDP pseudo-header returns a 16-bit - * checksum, already complemented - */ -extern __sum16 csum_tcpudp_magic(__be32 saddr, __be32 daddr, - __u32 len, __u8 proto, __wsum sum); - -extern __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr, - __u32 len, __u8 proto, __wsum sum); - -/* - * Computes the checksum of a memory block at buff, length len, - * and adds in "sum" (32-bit) - * - * returns a 32-bit number suitable for feeding into itself - * or csum_tcpudp_magic - * - * this function must be called with even lengths, except - * for the last fragment, which may be odd - * - * it's best to have buff aligned on a 32-bit boundary - */ -extern __wsum csum_partial(const void *buff, int len, __wsum sum); - -/* - * This routine is used for miscellaneous IP-like checksums, mainly in - * icmp.c - */ -extern __sum16 ip_compute_csum(const void *buff, int len); - -/* - * Fold a partial checksum without adding pseudo headers. - */ -static inline __sum16 csum_fold(__wsum csum) -{ - u32 sum = (__force u32)csum; - sum = (sum & 0xffff) + (sum >> 16); - sum = (sum & 0xffff) + (sum >> 16); - return (__force __sum16)~sum; -} - -#define _HAVE_ARCH_IPV6_CSUM 1 -struct in6_addr; -extern __sum16 csum_ipv6_magic(const struct in6_addr *saddr, - const struct in6_addr *daddr, - __u32 len, __u8 proto, __wsum csum); - -#endif /* _ASM_IA64_CHECKSUM_H */ diff --git a/arch/ia64/include/asm/clocksource.h b/arch/ia64/include/asm/clocksource.h deleted file mode 100644 index 71a517751afa..000000000000 --- a/arch/ia64/include/asm/clocksource.h +++ /dev/null @@ -1,11 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* IA64-specific clocksource additions */ - -#ifndef _ASM_IA64_CLOCKSOURCE_H -#define _ASM_IA64_CLOCKSOURCE_H - -struct arch_clocksource_data { - void *fsys_mmio; /* used by fsyscall asm code */ -}; - -#endif /* _ASM_IA64_CLOCKSOURCE_H */ diff --git a/arch/ia64/include/asm/cmpxchg.h b/arch/ia64/include/asm/cmpxchg.h deleted file mode 100644 index d85ee1a0a227..000000000000 --- a/arch/ia64/include/asm/cmpxchg.h +++ /dev/null @@ -1,33 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_IA64_CMPXCHG_H -#define _ASM_IA64_CMPXCHG_H - -#include - -#define arch_xchg(ptr, x) \ -({(__typeof__(*(ptr))) __arch_xchg((unsigned long) (x), (ptr), sizeof(*(ptr)));}) - -#define arch_cmpxchg(ptr, o, n) cmpxchg_acq((ptr), (o), (n)) -#define arch_cmpxchg64(ptr, o, n) cmpxchg_acq((ptr), (o), (n)) - -#define arch_cmpxchg_local arch_cmpxchg -#define arch_cmpxchg64_local arch_cmpxchg64 - -#ifdef CONFIG_IA64_DEBUG_CMPXCHG -# define CMPXCHG_BUGCHECK_DECL int _cmpxchg_bugcheck_count = 128; -# define CMPXCHG_BUGCHECK(v) \ -do { \ - if (_cmpxchg_bugcheck_count-- <= 0) { \ - void *ip; \ - extern int _printk(const char *fmt, ...); \ - ip = (void *) ia64_getreg(_IA64_REG_IP); \ - _printk("CMPXCHG_BUGCHECK: stuck at %p on word %p\n", ip, (v));\ - break; \ - } \ -} while (0) -#else /* !CONFIG_IA64_DEBUG_CMPXCHG */ -# define CMPXCHG_BUGCHECK_DECL -# define CMPXCHG_BUGCHECK(v) -#endif /* !CONFIG_IA64_DEBUG_CMPXCHG */ - -#endif /* _ASM_IA64_CMPXCHG_H */ diff --git a/arch/ia64/include/asm/cpu.h b/arch/ia64/include/asm/cpu.h deleted file mode 100644 index db125df9e088..000000000000 --- a/arch/ia64/include/asm/cpu.h +++ /dev/null @@ -1,23 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_IA64_CPU_H_ -#define _ASM_IA64_CPU_H_ - -#include -#include -#include -#include - -struct ia64_cpu { - struct cpu cpu; -}; - -DECLARE_PER_CPU(struct ia64_cpu, cpu_devices); - -DECLARE_PER_CPU(int, cpu_state); - -#ifdef CONFIG_HOTPLUG_CPU -extern int arch_register_cpu(int num); -extern void arch_unregister_cpu(int); -#endif - -#endif /* _ASM_IA64_CPU_H_ */ diff --git a/arch/ia64/include/asm/cputime.h b/arch/ia64/include/asm/cputime.h deleted file mode 100644 index 7f28c3564d5d..000000000000 --- a/arch/ia64/include/asm/cputime.h +++ /dev/null @@ -1,21 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Definitions for measuring cputime on ia64 machines. - * - * Based on . - * - * Copyright (C) 2007 FUJITSU LIMITED - * Copyright (C) 2007 Hidetoshi Seto - * - * If we have CONFIG_VIRT_CPU_ACCOUNTING_NATIVE, we measure cpu time in nsec. - * Otherwise we measure cpu time in jiffies using the generic definitions. - */ - -#ifndef __IA64_CPUTIME_H -#define __IA64_CPUTIME_H - -#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE -extern void arch_vtime_task_switch(struct task_struct *tsk); -#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ - -#endif /* __IA64_CPUTIME_H */ diff --git a/arch/ia64/include/asm/current.h b/arch/ia64/include/asm/current.h deleted file mode 100644 index 86fbcc88dff2..000000000000 --- a/arch/ia64/include/asm/current.h +++ /dev/null @@ -1,18 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_IA64_CURRENT_H -#define _ASM_IA64_CURRENT_H - -/* - * Modified 1998-2000 - * David Mosberger-Tang , Hewlett-Packard Co - */ - -#include - -/* - * In kernel mode, thread pointer (r13) is used to point to the current task - * structure. - */ -#define current ((struct task_struct *) ia64_getreg(_IA64_REG_TP)) - -#endif /* _ASM_IA64_CURRENT_H */ diff --git a/arch/ia64/include/asm/cyclone.h b/arch/ia64/include/asm/cyclone.h deleted file mode 100644 index a481393647e9..000000000000 --- a/arch/ia64/include/asm/cyclone.h +++ /dev/null @@ -1,16 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef ASM_IA64_CYCLONE_H -#define ASM_IA64_CYCLONE_H - -#ifdef CONFIG_IA64_CYCLONE -extern int use_cyclone; -extern void __init cyclone_setup(void); -#else /* CONFIG_IA64_CYCLONE */ -#define use_cyclone 0 -static inline void cyclone_setup(void) -{ - printk(KERN_ERR "Cyclone Counter: System not configured" - " w/ CONFIG_IA64_CYCLONE.\n"); -} -#endif /* CONFIG_IA64_CYCLONE */ -#endif /* !ASM_IA64_CYCLONE_H */ diff --git a/arch/ia64/include/asm/delay.h b/arch/ia64/include/asm/delay.h deleted file mode 100644 index 0227ac586107..000000000000 --- a/arch/ia64/include/asm/delay.h +++ /dev/null @@ -1,89 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_IA64_DELAY_H -#define _ASM_IA64_DELAY_H - -/* - * Delay routines using a pre-computed "cycles/usec" value. - * - * Copyright (C) 1998, 1999 Hewlett-Packard Co - * David Mosberger-Tang - * Copyright (C) 1999 VA Linux Systems - * Copyright (C) 1999 Walt Drummond - * Copyright (C) 1999 Asit Mallick - * Copyright (C) 1999 Don Dugger - */ - -#include -#include -#include - -#include -#include - -static __inline__ void -ia64_set_itm (unsigned long val) -{ - ia64_setreg(_IA64_REG_CR_ITM, val); - ia64_srlz_d(); -} - -static __inline__ unsigned long -ia64_get_itm (void) -{ - unsigned long result; - - result = ia64_getreg(_IA64_REG_CR_ITM); - ia64_srlz_d(); - return result; -} - -static __inline__ void -ia64_set_itv (unsigned long val) -{ - ia64_setreg(_IA64_REG_CR_ITV, val); - ia64_srlz_d(); -} - -static __inline__ unsigned long -ia64_get_itv (void) -{ - return ia64_getreg(_IA64_REG_CR_ITV); -} - -static __inline__ void -ia64_set_itc (unsigned long val) -{ - ia64_setreg(_IA64_REG_AR_ITC, val); - ia64_srlz_d(); -} - -static __inline__ unsigned long -ia64_get_itc (void) -{ - unsigned long result; - - result = ia64_getreg(_IA64_REG_AR_ITC); - ia64_barrier(); -#ifdef CONFIG_ITANIUM - while (unlikely((__s32) result == -1)) { - result = ia64_getreg(_IA64_REG_AR_ITC); - ia64_barrier(); - } -#endif - return result; -} - -extern void ia64_delay_loop (unsigned long loops); - -static __inline__ void -__delay (unsigned long loops) -{ - if (unlikely(loops < 1)) - return; - - ia64_delay_loop (loops - 1); -} - -extern void udelay (unsigned long usecs); - -#endif /* _ASM_IA64_DELAY_H */ diff --git a/arch/ia64/include/asm/device.h b/arch/ia64/include/asm/device.h deleted file mode 100644 index 918b198cd5bb..000000000000 --- a/arch/ia64/include/asm/device.h +++ /dev/null @@ -1,14 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Arch specific extensions to struct device - */ -#ifndef _ASM_IA64_DEVICE_H -#define _ASM_IA64_DEVICE_H - -struct dev_archdata { -}; - -struct pdev_archdata { -}; - -#endif /* _ASM_IA64_DEVICE_H */ diff --git a/arch/ia64/include/asm/div64.h b/arch/ia64/include/asm/div64.h deleted file mode 100644 index 6cd978cefb28..000000000000 --- a/arch/ia64/include/asm/div64.h +++ /dev/null @@ -1 +0,0 @@ -#include diff --git a/arch/ia64/include/asm/dma-mapping.h b/arch/ia64/include/asm/dma-mapping.h deleted file mode 100644 index af6fa8e1597c..000000000000 --- a/arch/ia64/include/asm/dma-mapping.h +++ /dev/null @@ -1,16 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_IA64_DMA_MAPPING_H -#define _ASM_IA64_DMA_MAPPING_H - -/* - * Copyright (C) 2003-2004 Hewlett-Packard Co - * David Mosberger-Tang - */ -extern const struct dma_map_ops *dma_ops; - -static inline const struct dma_map_ops *get_arch_dma_ops(void) -{ - return dma_ops; -} - -#endif /* _ASM_IA64_DMA_MAPPING_H */ diff --git a/arch/ia64/include/asm/dma.h b/arch/ia64/include/asm/dma.h deleted file mode 100644 index eaed2626ffda..000000000000 --- a/arch/ia64/include/asm/dma.h +++ /dev/null @@ -1,17 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_IA64_DMA_H -#define _ASM_IA64_DMA_H - -/* - * Copyright (C) 1998-2002 Hewlett-Packard Co - * David Mosberger-Tang - */ - - -#include /* need byte IO */ - -extern unsigned long MAX_DMA_ADDRESS; - -#define free_dma(x) - -#endif /* _ASM_IA64_DMA_H */ diff --git a/arch/ia64/include/asm/dmi.h b/arch/ia64/include/asm/dmi.h deleted file mode 100644 index ecd9e0a0f5f9..000000000000 --- a/arch/ia64/include/asm/dmi.h +++ /dev/null @@ -1,15 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_DMI_H -#define _ASM_DMI_H 1 - -#include -#include - -/* Use normal IO mappings for DMI */ -#define dmi_early_remap ioremap -#define dmi_early_unmap(x, l) iounmap(x) -#define dmi_remap ioremap -#define dmi_unmap iounmap -#define dmi_alloc(l) kzalloc(l, GFP_ATOMIC) - -#endif diff --git a/arch/ia64/include/asm/early_ioremap.h b/arch/ia64/include/asm/early_ioremap.h deleted file mode 100644 index 934191b1e2e3..000000000000 --- a/arch/ia64/include/asm/early_ioremap.h +++ /dev/null @@ -1,11 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_IA64_EARLY_IOREMAP_H -#define _ASM_IA64_EARLY_IOREMAP_H - -extern void __iomem * early_ioremap (unsigned long phys_addr, unsigned long size); -#define early_memremap(phys_addr, size) early_ioremap(phys_addr, size) - -extern void early_iounmap (volatile void __iomem *addr, unsigned long size); -#define early_memunmap(addr, size) early_iounmap(addr, size) - -#endif diff --git a/arch/ia64/include/asm/efi.h b/arch/ia64/include/asm/efi.h deleted file mode 100644 index 6a4a50d8f19a..000000000000 --- a/arch/ia64/include/asm/efi.h +++ /dev/null @@ -1,13 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_EFI_H -#define _ASM_EFI_H - -typedef int (*efi_freemem_callback_t) (u64 start, u64 end, void *arg); - -void *efi_get_pal_addr(void); -void efi_map_pal_code(void); -void efi_memmap_walk(efi_freemem_callback_t, void *); -void efi_memmap_walk_uc(efi_freemem_callback_t, void *); -void efi_gettimeofday(struct timespec64 *ts); - -#endif diff --git a/arch/ia64/include/asm/elf.h b/arch/ia64/include/asm/elf.h deleted file mode 100644 index 2ef5f9966ad1..000000000000 --- a/arch/ia64/include/asm/elf.h +++ /dev/null @@ -1,233 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_IA64_ELF_H -#define _ASM_IA64_ELF_H - -/* - * ELF-specific definitions. - * - * Copyright (C) 1998-1999, 2002-2004 Hewlett-Packard Co - * David Mosberger-Tang - */ - - -#include -#include -#include - -/* - * This is used to ensure we don't load something for the wrong architecture. - */ -#define elf_check_arch(x) ((x)->e_machine == EM_IA_64) - -/* - * These are used to set parameters in the core dumps. - */ -#define ELF_CLASS ELFCLASS64 -#define ELF_DATA ELFDATA2LSB -#define ELF_ARCH EM_IA_64 - -#define CORE_DUMP_USE_REGSET - -/* Least-significant four bits of ELF header's e_flags are OS-specific. The bits are - interpreted as follows by Linux: */ -#define EF_IA_64_LINUX_EXECUTABLE_STACK 0x1 /* is stack (& heap) executable by default? */ - -#define ELF_EXEC_PAGESIZE PAGE_SIZE - -/* - * This is the location that an ET_DYN program is loaded if exec'ed. - * Typical use of this is to invoke "./ld.so someprog" to test out a - * new version of the loader. We need to make sure that it is out of - * the way of the program that it will "exec", and that there is - * sufficient room for the brk. - */ -#define ELF_ET_DYN_BASE (TASK_UNMAPPED_BASE + 0x800000000UL) - -#define PT_IA_64_UNWIND 0x70000001 - -/* IA-64 relocations: */ -#define R_IA64_NONE 0x00 /* none */ -#define R_IA64_IMM14 0x21 /* symbol + addend, add imm14 */ -#define R_IA64_IMM22 0x22 /* symbol + addend, add imm22 */ -#define R_IA64_IMM64 0x23 /* symbol + addend, mov imm64 */ -#define R_IA64_DIR32MSB 0x24 /* symbol + addend, data4 MSB */ -#define R_IA64_DIR32LSB 0x25 /* symbol + addend, data4 LSB */ -#define R_IA64_DIR64MSB 0x26 /* symbol + addend, data8 MSB */ -#define R_IA64_DIR64LSB 0x27 /* symbol + addend, data8 LSB */ -#define R_IA64_GPREL22 0x2a /* @gprel(sym+add), add imm22 */ -#define R_IA64_GPREL64I 0x2b /* @gprel(sym+add), mov imm64 */ -#define R_IA64_GPREL32MSB 0x2c /* @gprel(sym+add), data4 MSB */ -#define R_IA64_GPREL32LSB 0x2d /* @gprel(sym+add), data4 LSB */ -#define R_IA64_GPREL64MSB 0x2e /* @gprel(sym+add), data8 MSB */ -#define R_IA64_GPREL64LSB 0x2f /* @gprel(sym+add), data8 LSB */ -#define R_IA64_LTOFF22 0x32 /* @ltoff(sym+add), add imm22 */ -#define R_IA64_LTOFF64I 0x33 /* @ltoff(sym+add), mov imm64 */ -#define R_IA64_PLTOFF22 0x3a /* @pltoff(sym+add), add imm22 */ -#define R_IA64_PLTOFF64I 0x3b /* @pltoff(sym+add), mov imm64 */ -#define R_IA64_PLTOFF64MSB 0x3e /* @pltoff(sym+add), data8 MSB */ -#define R_IA64_PLTOFF64LSB 0x3f /* @pltoff(sym+add), data8 LSB */ -#define R_IA64_FPTR64I 0x43 /* @fptr(sym+add), mov imm64 */ -#define R_IA64_FPTR32MSB 0x44 /* @fptr(sym+add), data4 MSB */ -#define R_IA64_FPTR32LSB 0x45 /* @fptr(sym+add), data4 LSB */ -#define R_IA64_FPTR64MSB 0x46 /* @fptr(sym+add), data8 MSB */ -#define R_IA64_FPTR64LSB 0x47 /* @fptr(sym+add), data8 LSB */ -#define R_IA64_PCREL60B 0x48 /* @pcrel(sym+add), brl */ -#define R_IA64_PCREL21B 0x49 /* @pcrel(sym+add), ptb, call */ -#define R_IA64_PCREL21M 0x4a /* @pcrel(sym+add), chk.s */ -#define R_IA64_PCREL21F 0x4b /* @pcrel(sym+add), fchkf */ -#define R_IA64_PCREL32MSB 0x4c /* @pcrel(sym+add), data4 MSB */ -#define R_IA64_PCREL32LSB 0x4d /* @pcrel(sym+add), data4 LSB */ -#define R_IA64_PCREL64MSB 0x4e /* @pcrel(sym+add), data8 MSB */ -#define R_IA64_PCREL64LSB 0x4f /* @pcrel(sym+add), data8 LSB */ -#define R_IA64_LTOFF_FPTR22 0x52 /* @ltoff(@fptr(s+a)), imm22 */ -#define R_IA64_LTOFF_FPTR64I 0x53 /* @ltoff(@fptr(s+a)), imm64 */ -#define R_IA64_LTOFF_FPTR32MSB 0x54 /* @ltoff(@fptr(s+a)), 4 MSB */ -#define R_IA64_LTOFF_FPTR32LSB 0x55 /* @ltoff(@fptr(s+a)), 4 LSB */ -#define R_IA64_LTOFF_FPTR64MSB 0x56 /* @ltoff(@fptr(s+a)), 8 MSB */ -#define R_IA64_LTOFF_FPTR64LSB 0x57 /* @ltoff(@fptr(s+a)), 8 LSB */ -#define R_IA64_SEGREL32MSB 0x5c /* @segrel(sym+add), data4 MSB */ -#define R_IA64_SEGREL32LSB 0x5d /* @segrel(sym+add), data4 LSB */ -#define R_IA64_SEGREL64MSB 0x5e /* @segrel(sym+add), data8 MSB */ -#define R_IA64_SEGREL64LSB 0x5f /* @segrel(sym+add), data8 LSB */ -#define R_IA64_SECREL32MSB 0x64 /* @secrel(sym+add), data4 MSB */ -#define R_IA64_SECREL32LSB 0x65 /* @secrel(sym+add), data4 LSB */ -#define R_IA64_SECREL64MSB 0x66 /* @secrel(sym+add), data8 MSB */ -#define R_IA64_SECREL64LSB 0x67 /* @secrel(sym+add), data8 LSB */ -#define R_IA64_REL32MSB 0x6c /* data 4 + REL */ -#define R_IA64_REL32LSB 0x6d /* data 4 + REL */ -#define R_IA64_REL64MSB 0x6e /* data 8 + REL */ -#define R_IA64_REL64LSB 0x6f /* data 8 + REL */ -#define R_IA64_LTV32MSB 0x74 /* symbol + addend, data4 MSB */ -#define R_IA64_LTV32LSB 0x75 /* symbol + addend, data4 LSB */ -#define R_IA64_LTV64MSB 0x76 /* symbol + addend, data8 MSB */ -#define R_IA64_LTV64LSB 0x77 /* symbol + addend, data8 LSB */ -#define R_IA64_PCREL21BI 0x79 /* @pcrel(sym+add), ptb, call */ -#define R_IA64_PCREL22 0x7a /* @pcrel(sym+add), imm22 */ -#define R_IA64_PCREL64I 0x7b /* @pcrel(sym+add), imm64 */ -#define R_IA64_IPLTMSB 0x80 /* dynamic reloc, imported PLT, MSB */ -#define R_IA64_IPLTLSB 0x81 /* dynamic reloc, imported PLT, LSB */ -#define R_IA64_COPY 0x84 /* dynamic reloc, data copy */ -#define R_IA64_SUB 0x85 /* -symbol + addend, add imm22 */ -#define R_IA64_LTOFF22X 0x86 /* LTOFF22, relaxable. */ -#define R_IA64_LDXMOV 0x87 /* Use of LTOFF22X. */ -#define R_IA64_TPREL14 0x91 /* @tprel(sym+add), add imm14 */ -#define R_IA64_TPREL22 0x92 /* @tprel(sym+add), add imm22 */ -#define R_IA64_TPREL64I 0x93 /* @tprel(sym+add), add imm64 */ -#define R_IA64_TPREL64MSB 0x96 /* @tprel(sym+add), data8 MSB */ -#define R_IA64_TPREL64LSB 0x97 /* @tprel(sym+add), data8 LSB */ -#define R_IA64_LTOFF_TPREL22 0x9a /* @ltoff(@tprel(s+a)), add imm22 */ -#define R_IA64_DTPMOD64MSB 0xa6 /* @dtpmod(sym+add), data8 MSB */ -#define R_IA64_DTPMOD64LSB 0xa7 /* @dtpmod(sym+add), data8 LSB */ -#define R_IA64_LTOFF_DTPMOD22 0xaa /* @ltoff(@dtpmod(s+a)), imm22 */ -#define R_IA64_DTPREL14 0xb1 /* @dtprel(sym+add), imm14 */ -#define R_IA64_DTPREL22 0xb2 /* @dtprel(sym+add), imm22 */ -#define R_IA64_DTPREL64I 0xb3 /* @dtprel(sym+add), imm64 */ -#define R_IA64_DTPREL32MSB 0xb4 /* @dtprel(sym+add), data4 MSB */ -#define R_IA64_DTPREL32LSB 0xb5 /* @dtprel(sym+add), data4 LSB */ -#define R_IA64_DTPREL64MSB 0xb6 /* @dtprel(sym+add), data8 MSB */ -#define R_IA64_DTPREL64LSB 0xb7 /* @dtprel(sym+add), data8 LSB */ -#define R_IA64_LTOFF_DTPREL22 0xba /* @ltoff(@dtprel(s+a)), imm22 */ - -/* IA-64 specific section flags: */ -#define SHF_IA_64_SHORT 0x10000000 /* section near gp */ - -/* - * We use (abuse?) this macro to insert the (empty) vm_area that is - * used to map the register backing store. I don't see any better - * place to do this, but we should discuss this with Linus once we can - * talk to him... - */ -extern void ia64_init_addr_space (void); -#define ELF_PLAT_INIT(_r, load_addr) ia64_init_addr_space() - -/* ELF register definitions. This is needed for core dump support. */ - -/* - * elf_gregset_t contains the application-level state in the following order: - * r0-r31 - * NaT bits (for r0-r31; bit N == 1 iff rN is a NaT) - * predicate registers (p0-p63) - * b0-b7 - * ip cfm psr - * ar.rsc ar.bsp ar.bspstore ar.rnat - * ar.ccv ar.unat ar.fpsr ar.pfs ar.lc ar.ec ar.csd ar.ssd - */ -#define ELF_NGREG 128 /* we really need just 72 but let's leave some headroom... */ -#define ELF_NFPREG 128 /* f0 and f1 could be omitted, but so what... */ - -/* elf_gregset_t register offsets */ -#define ELF_GR_0_OFFSET 0 -#define ELF_NAT_OFFSET (32 * sizeof(elf_greg_t)) -#define ELF_PR_OFFSET (33 * sizeof(elf_greg_t)) -#define ELF_BR_0_OFFSET (34 * sizeof(elf_greg_t)) -#define ELF_CR_IIP_OFFSET (42 * sizeof(elf_greg_t)) -#define ELF_CFM_OFFSET (43 * sizeof(elf_greg_t)) -#define ELF_CR_IPSR_OFFSET (44 * sizeof(elf_greg_t)) -#define ELF_GR_OFFSET(i) (ELF_GR_0_OFFSET + i * sizeof(elf_greg_t)) -#define ELF_BR_OFFSET(i) (ELF_BR_0_OFFSET + i * sizeof(elf_greg_t)) -#define ELF_AR_RSC_OFFSET (45 * sizeof(elf_greg_t)) -#define ELF_AR_BSP_OFFSET (46 * sizeof(elf_greg_t)) -#define ELF_AR_BSPSTORE_OFFSET (47 * sizeof(elf_greg_t)) -#define ELF_AR_RNAT_OFFSET (48 * sizeof(elf_greg_t)) -#define ELF_AR_CCV_OFFSET (49 * sizeof(elf_greg_t)) -#define ELF_AR_UNAT_OFFSET (50 * sizeof(elf_greg_t)) -#define ELF_AR_FPSR_OFFSET (51 * sizeof(elf_greg_t)) -#define ELF_AR_PFS_OFFSET (52 * sizeof(elf_greg_t)) -#define ELF_AR_LC_OFFSET (53 * sizeof(elf_greg_t)) -#define ELF_AR_EC_OFFSET (54 * sizeof(elf_greg_t)) -#define ELF_AR_CSD_OFFSET (55 * sizeof(elf_greg_t)) -#define ELF_AR_SSD_OFFSET (56 * sizeof(elf_greg_t)) -#define ELF_AR_END_OFFSET (57 * sizeof(elf_greg_t)) - -typedef unsigned long elf_greg_t; -typedef elf_greg_t elf_gregset_t[ELF_NGREG]; - -typedef struct ia64_fpreg elf_fpreg_t; -typedef elf_fpreg_t elf_fpregset_t[ELF_NFPREG]; - - - -struct pt_regs; /* forward declaration... */ -extern void ia64_elf_core_copy_regs (struct pt_regs *src, elf_gregset_t dst); -#define ELF_CORE_COPY_REGS(_dest,_regs) ia64_elf_core_copy_regs(_regs, _dest); - -/* This macro yields a bitmask that programs can use to figure out - what instruction set this CPU supports. */ -#define ELF_HWCAP 0 - -/* This macro yields a string that ld.so will use to load - implementation specific libraries for optimization. Not terribly - relevant until we have real hardware to play with... */ -#define ELF_PLATFORM NULL - -#define elf_read_implies_exec(ex, executable_stack) \ - ((executable_stack!=EXSTACK_DISABLE_X) && ((ex).e_flags & EF_IA_64_LINUX_EXECUTABLE_STACK) != 0) - -struct task_struct; - -#define GATE_EHDR ((const struct elfhdr *) GATE_ADDR) - -/* update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT entries changes */ -#define ARCH_DLINFO \ -do { \ - extern char __kernel_syscall_via_epc[]; \ - NEW_AUX_ENT(AT_SYSINFO, (unsigned long) __kernel_syscall_via_epc); \ - NEW_AUX_ENT(AT_SYSINFO_EHDR, (unsigned long) GATE_EHDR); \ -} while (0) - -/* - * format for entries in the Global Offset Table - */ -struct got_entry { - uint64_t val; -}; - -/* - * Layout of the Function Descriptor - */ -struct fdesc { - uint64_t addr; - uint64_t gp; -}; - -#endif /* _ASM_IA64_ELF_H */ diff --git a/arch/ia64/include/asm/emergency-restart.h b/arch/ia64/include/asm/emergency-restart.h deleted file mode 100644 index 108d8c48e42e..000000000000 --- a/arch/ia64/include/asm/emergency-restart.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef _ASM_EMERGENCY_RESTART_H -#define _ASM_EMERGENCY_RESTART_H - -#include - -#endif /* _ASM_EMERGENCY_RESTART_H */ diff --git a/arch/ia64/include/asm/esi.h b/arch/ia64/include/asm/esi.h deleted file mode 100644 index 56d1310af06e..000000000000 --- a/arch/ia64/include/asm/esi.h +++ /dev/null @@ -1,30 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * ESI service calls. - * - * Copyright (c) Copyright 2005-2006 Hewlett-Packard Development Company, L.P. - * Alex Williamson - */ -#ifndef esi_h -#define esi_h - -#include - -#define ESI_QUERY 0x00000001 -#define ESI_OPEN_HANDLE 0x02000000 -#define ESI_CLOSE_HANDLE 0x02000001 - -enum esi_proc_type { - ESI_PROC_SERIALIZED, /* calls need to be serialized */ - ESI_PROC_MP_SAFE, /* MP-safe, but not reentrant */ - ESI_PROC_REENTRANT /* MP-safe and reentrant */ -}; - -extern struct ia64_sal_retval esi_call_phys (void *, u64 *); -extern int ia64_esi_call(efi_guid_t, struct ia64_sal_retval *, - enum esi_proc_type, - u64, u64, u64, u64, u64, u64, u64, u64); -extern int ia64_esi_call_phys(efi_guid_t, struct ia64_sal_retval *, u64, u64, - u64, u64, u64, u64, u64, u64); - -#endif /* esi_h */ diff --git a/arch/ia64/include/asm/exception.h b/arch/ia64/include/asm/exception.h deleted file mode 100644 index 1d5df8116a31..000000000000 --- a/arch/ia64/include/asm/exception.h +++ /dev/null @@ -1,23 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -#ifndef __ASM_EXCEPTION_H -#define __ASM_EXCEPTION_H - -struct pt_regs; -struct exception_table_entry; - -extern void ia64_handle_exception(struct pt_regs *regs, - const struct exception_table_entry *e); - -#define ia64_done_with_exception(regs) \ -({ \ - int __ex_ret = 0; \ - const struct exception_table_entry *e; \ - e = search_exception_tables((regs)->cr_iip + ia64_psr(regs)->ri); \ - if (e) { \ - ia64_handle_exception(regs, e); \ - __ex_ret = 1; \ - } \ - __ex_ret; \ -}) - -#endif /* __ASM_EXCEPTION_H */ diff --git a/arch/ia64/include/asm/extable.h b/arch/ia64/include/asm/extable.h deleted file mode 100644 index 83eac6aa0639..000000000000 --- a/arch/ia64/include/asm/extable.h +++ /dev/null @@ -1,12 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_IA64_EXTABLE_H -#define _ASM_IA64_EXTABLE_H - -#define ARCH_HAS_RELATIVE_EXTABLE - -struct exception_table_entry { - int insn; /* location-relative address of insn this fixup is for */ - int fixup; /* location-relative continuation addr.; if bit 2 is set, r9 is set to 0 */ -}; - -#endif diff --git a/arch/ia64/include/asm/fb.h b/arch/ia64/include/asm/fb.h deleted file mode 100644 index 1717b26fd423..000000000000 --- a/arch/ia64/include/asm/fb.h +++ /dev/null @@ -1,43 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_FB_H_ -#define _ASM_FB_H_ - -#include -#include -#include - -#include - -struct file; - -static inline void fb_pgprotect(struct file *file, struct vm_area_struct *vma, - unsigned long off) -{ - if (efi_range_is_wc(vma->vm_start, vma->vm_end - vma->vm_start)) - vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); - else - vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); -} -#define fb_pgprotect fb_pgprotect - -static inline void fb_memcpy_fromio(void *to, const volatile void __iomem *from, size_t n) -{ - memcpy(to, (void __force *)from, n); -} -#define fb_memcpy_fromio fb_memcpy_fromio - -static inline void fb_memcpy_toio(volatile void __iomem *to, const void *from, size_t n) -{ - memcpy((void __force *)to, from, n); -} -#define fb_memcpy_toio fb_memcpy_toio - -static inline void fb_memset_io(volatile void __iomem *addr, int c, size_t n) -{ - memset((void __force *)addr, c, n); -} -#define fb_memset fb_memset_io - -#include - -#endif /* _ASM_FB_H_ */ diff --git a/arch/ia64/include/asm/fpswa.h b/arch/ia64/include/asm/fpswa.h deleted file mode 100644 index 2a0c23728b26..000000000000 --- a/arch/ia64/include/asm/fpswa.h +++ /dev/null @@ -1,74 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_IA64_FPSWA_H -#define _ASM_IA64_FPSWA_H - -/* - * Floating-point Software Assist - * - * Copyright (C) 1999 Intel Corporation. - * Copyright (C) 1999 Asit Mallick - * Copyright (C) 1999 Goutham Rao - */ - -typedef struct { - /* 4 * 128 bits */ - unsigned long fp_lp[4*2]; -} fp_state_low_preserved_t; - -typedef struct { - /* 10 * 128 bits */ - unsigned long fp_lv[10 * 2]; -} fp_state_low_volatile_t; - -typedef struct { - /* 16 * 128 bits */ - unsigned long fp_hp[16 * 2]; -} fp_state_high_preserved_t; - -typedef struct { - /* 96 * 128 bits */ - unsigned long fp_hv[96 * 2]; -} fp_state_high_volatile_t; - -/** - * floating point state to be passed to the FP emulation library by - * the trap/fault handler - */ -typedef struct { - unsigned long bitmask_low64; - unsigned long bitmask_high64; - fp_state_low_preserved_t *fp_state_low_preserved; - fp_state_low_volatile_t *fp_state_low_volatile; - fp_state_high_preserved_t *fp_state_high_preserved; - fp_state_high_volatile_t *fp_state_high_volatile; -} fp_state_t; - -typedef struct { - unsigned long status; - unsigned long err0; - unsigned long err1; - unsigned long err2; -} fpswa_ret_t; - -/** - * function header for the Floating Point software assist - * library. This function is invoked by the Floating point software - * assist trap/fault handler. - */ -typedef fpswa_ret_t (*efi_fpswa_t) (unsigned long trap_type, void *bundle, unsigned long *ipsr, - unsigned long *fsr, unsigned long *isr, unsigned long *preds, - unsigned long *ifs, fp_state_t *fp_state); - -/** - * This is the FPSWA library interface as defined by EFI. We need to pass a - * pointer to the interface itself on a call to the assist library - */ -typedef struct { - unsigned int revision; - unsigned int reserved; - efi_fpswa_t fpswa; -} fpswa_interface_t; - -extern fpswa_interface_t *fpswa_interface; - -#endif /* _ASM_IA64_FPSWA_H */ diff --git a/arch/ia64/include/asm/ftrace.h b/arch/ia64/include/asm/ftrace.h deleted file mode 100644 index a07a8e575453..000000000000 --- a/arch/ia64/include/asm/ftrace.h +++ /dev/null @@ -1,28 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_IA64_FTRACE_H -#define _ASM_IA64_FTRACE_H - -#ifdef CONFIG_FUNCTION_TRACER -#define MCOUNT_INSN_SIZE 32 /* sizeof mcount call */ - -#ifndef __ASSEMBLY__ -extern void _mcount(unsigned long pfs, unsigned long r1, unsigned long b0, unsigned long r0); -#define mcount _mcount - -/* In IA64, MCOUNT_ADDR is set in link time, so it's not a constant at compile time */ -#define MCOUNT_ADDR (((struct fnptr *)mcount)->ip) -#define FTRACE_ADDR (((struct fnptr *)ftrace_caller)->ip) - -static inline unsigned long ftrace_call_adjust(unsigned long addr) -{ - /* second bundle, insn 2 */ - return addr - 0x12; -} - -struct dyn_arch_ftrace { -}; -#endif - -#endif /* CONFIG_FUNCTION_TRACER */ - -#endif /* _ASM_IA64_FTRACE_H */ diff --git a/arch/ia64/include/asm/futex.h b/arch/ia64/include/asm/futex.h deleted file mode 100644 index 1db26b432d8c..000000000000 --- a/arch/ia64/include/asm/futex.h +++ /dev/null @@ -1,109 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_FUTEX_H -#define _ASM_FUTEX_H - -#include -#include -#include - -#define __futex_atomic_op1(insn, ret, oldval, uaddr, oparg) \ -do { \ - register unsigned long r8 __asm ("r8") = 0; \ - __asm__ __volatile__( \ - " mf;; \n" \ - "[1:] " insn ";; \n" \ - " .xdata4 \"__ex_table\", 1b-., 2f-. \n" \ - "[2:]" \ - : "+r" (r8), "=r" (oldval) \ - : "r" (uaddr), "r" (oparg) \ - : "memory"); \ - ret = r8; \ -} while (0) - -#define __futex_atomic_op2(insn, ret, oldval, uaddr, oparg) \ -do { \ - register unsigned long r8 __asm ("r8") = 0; \ - int val, newval; \ - do { \ - __asm__ __volatile__( \ - " mf;; \n" \ - "[1:] ld4 %3=[%4];; \n" \ - " mov %2=%3 \n" \ - insn ";; \n" \ - " mov ar.ccv=%2;; \n" \ - "[2:] cmpxchg4.acq %1=[%4],%3,ar.ccv;; \n" \ - " .xdata4 \"__ex_table\", 1b-., 3f-.\n" \ - " .xdata4 \"__ex_table\", 2b-., 3f-.\n" \ - "[3:]" \ - : "+r" (r8), "=r" (val), "=&r" (oldval), \ - "=&r" (newval) \ - : "r" (uaddr), "r" (oparg) \ - : "memory"); \ - if (unlikely (r8)) \ - break; \ - } while (unlikely (val != oldval)); \ - ret = r8; \ -} while (0) - -static inline int -arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *uaddr) -{ - int oldval = 0, ret; - - if (!access_ok(uaddr, sizeof(u32))) - return -EFAULT; - - switch (op) { - case FUTEX_OP_SET: - __futex_atomic_op1("xchg4 %1=[%2],%3", ret, oldval, uaddr, - oparg); - break; - case FUTEX_OP_ADD: - __futex_atomic_op2("add %3=%3,%5", ret, oldval, uaddr, oparg); - break; - case FUTEX_OP_OR: - __futex_atomic_op2("or %3=%3,%5", ret, oldval, uaddr, oparg); - break; - case FUTEX_OP_ANDN: - __futex_atomic_op2("and %3=%3,%5", ret, oldval, uaddr, - ~oparg); - break; - case FUTEX_OP_XOR: - __futex_atomic_op2("xor %3=%3,%5", ret, oldval, uaddr, oparg); - break; - default: - ret = -ENOSYS; - } - - if (!ret) - *oval = oldval; - - return ret; -} - -static inline int -futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, - u32 oldval, u32 newval) -{ - if (!access_ok(uaddr, sizeof(u32))) - return -EFAULT; - - { - register unsigned long r8 __asm ("r8") = 0; - unsigned long prev; - __asm__ __volatile__( - " mf;; \n" - " mov ar.ccv=%4;; \n" - "[1:] cmpxchg4.acq %1=[%2],%3,ar.ccv \n" - " .xdata4 \"__ex_table\", 1b-., 2f-. \n" - "[2:]" - : "+r" (r8), "=&r" (prev) - : "r" (uaddr), "r" (newval), - "rO" ((long) (unsigned) oldval) - : "memory"); - *uval = prev; - return r8; - } -} - -#endif /* _ASM_FUTEX_H */ diff --git a/arch/ia64/include/asm/gcc_intrin.h b/arch/ia64/include/asm/gcc_intrin.h deleted file mode 100644 index 83f230b23867..000000000000 --- a/arch/ia64/include/asm/gcc_intrin.h +++ /dev/null @@ -1,13 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * - * Copyright (C) 2002,2003 Jun Nakajima - * Copyright (C) 2002,2003 Suresh Siddha - */ -#ifndef _ASM_IA64_GCC_INTRIN_H -#define _ASM_IA64_GCC_INTRIN_H - -#include - -register unsigned long ia64_r13 asm ("r13") __used; -#endif /* _ASM_IA64_GCC_INTRIN_H */ diff --git a/arch/ia64/include/asm/hardirq.h b/arch/ia64/include/asm/hardirq.h deleted file mode 100644 index ccde7c2ba00f..000000000000 --- a/arch/ia64/include/asm/hardirq.h +++ /dev/null @@ -1,27 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_IA64_HARDIRQ_H -#define _ASM_IA64_HARDIRQ_H - -/* - * Modified 1998-2002, 2004 Hewlett-Packard Co - * David Mosberger-Tang - */ - -/* - * No irq_cpustat_t for IA-64. The data is held in the per-CPU data structure. - */ - -#define __ARCH_IRQ_STAT 1 - -#define local_softirq_pending_ref ia64_cpu_info.softirq_pending - -#include -#include - -#include - -extern void __iomem *ipi_base_addr; - -void ack_bad_irq(unsigned int irq); - -#endif /* _ASM_IA64_HARDIRQ_H */ diff --git a/arch/ia64/include/asm/hugetlb.h b/arch/ia64/include/asm/hugetlb.h deleted file mode 100644 index 026ead47cd53..000000000000 --- a/arch/ia64/include/asm/hugetlb.h +++ /dev/null @@ -1,34 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_IA64_HUGETLB_H -#define _ASM_IA64_HUGETLB_H - -#include - -#define __HAVE_ARCH_HUGETLB_FREE_PGD_RANGE -void hugetlb_free_pgd_range(struct mmu_gather *tlb, unsigned long addr, - unsigned long end, unsigned long floor, - unsigned long ceiling); - -#define __HAVE_ARCH_PREPARE_HUGEPAGE_RANGE -int prepare_hugepage_range(struct file *file, - unsigned long addr, unsigned long len); - -static inline int is_hugepage_only_range(struct mm_struct *mm, - unsigned long addr, - unsigned long len) -{ - return (REGION_NUMBER(addr) == RGN_HPAGE || - REGION_NUMBER((addr)+(len)-1) == RGN_HPAGE); -} -#define is_hugepage_only_range is_hugepage_only_range - -#define __HAVE_ARCH_HUGE_PTEP_CLEAR_FLUSH -static inline pte_t huge_ptep_clear_flush(struct vm_area_struct *vma, - unsigned long addr, pte_t *ptep) -{ - return *ptep; -} - -#include - -#endif /* _ASM_IA64_HUGETLB_H */ diff --git a/arch/ia64/include/asm/hw_irq.h b/arch/ia64/include/asm/hw_irq.h deleted file mode 100644 index 5d267132f8cb..000000000000 --- a/arch/ia64/include/asm/hw_irq.h +++ /dev/null @@ -1,167 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_IA64_HW_IRQ_H -#define _ASM_IA64_HW_IRQ_H - -/* - * Copyright (C) 2001-2003 Hewlett-Packard Co - * David Mosberger-Tang - */ - -#include -#include -#include -#include - -#include -#include - -typedef u8 ia64_vector; - -/* - * 0 special - * - * 1,3-14 are reserved from firmware - * - * 16-255 (vectored external interrupts) are available - * - * 15 spurious interrupt (see IVR) - * - * 16 lowest priority, 255 highest priority - * - * 15 classes of 16 interrupts each. - */ -#define IA64_MIN_VECTORED_IRQ 16 -#define IA64_MAX_VECTORED_IRQ 255 -#define IA64_NUM_VECTORS 256 - -#define AUTO_ASSIGN -1 - -#define IA64_SPURIOUS_INT_VECTOR 0x0f - -/* - * Vectors 0x10-0x1f are used for low priority interrupts, e.g. CMCI. - */ -#define IA64_CPEP_VECTOR 0x1c /* corrected platform error polling vector */ -#define IA64_CMCP_VECTOR 0x1d /* corrected machine-check polling vector */ -#define IA64_CPE_VECTOR 0x1e /* corrected platform error interrupt vector */ -#define IA64_CMC_VECTOR 0x1f /* corrected machine-check interrupt vector */ -/* - * Vectors 0x20-0x2f are reserved for legacy ISA IRQs. - * Use vectors 0x30-0xe7 as the default device vector range for ia64. - * Platforms may choose to reduce this range in platform_irq_setup, but the - * platform range must fall within - * [IA64_DEF_FIRST_DEVICE_VECTOR..IA64_DEF_LAST_DEVICE_VECTOR] - */ -extern int ia64_first_device_vector; -extern int ia64_last_device_vector; - -#ifdef CONFIG_SMP -/* Reserve the lower priority vector than device vectors for "move IRQ" IPI */ -#define IA64_IRQ_MOVE_VECTOR 0x30 /* "move IRQ" IPI */ -#define IA64_DEF_FIRST_DEVICE_VECTOR 0x31 -#else -#define IA64_DEF_FIRST_DEVICE_VECTOR 0x30 -#endif -#define IA64_DEF_LAST_DEVICE_VECTOR 0xe7 -#define IA64_FIRST_DEVICE_VECTOR ia64_first_device_vector -#define IA64_LAST_DEVICE_VECTOR ia64_last_device_vector -#define IA64_MAX_DEVICE_VECTORS (IA64_DEF_LAST_DEVICE_VECTOR - IA64_DEF_FIRST_DEVICE_VECTOR + 1) -#define IA64_NUM_DEVICE_VECTORS (IA64_LAST_DEVICE_VECTOR - IA64_FIRST_DEVICE_VECTOR + 1) - -#define IA64_MCA_RENDEZ_VECTOR 0xe8 /* MCA rendez interrupt */ -#define IA64_TIMER_VECTOR 0xef /* use highest-prio group 15 interrupt for timer */ -#define IA64_MCA_WAKEUP_VECTOR 0xf0 /* MCA wakeup (must be >MCA_RENDEZ_VECTOR) */ -#define IA64_IPI_LOCAL_TLB_FLUSH 0xfc /* SMP flush local TLB */ -#define IA64_IPI_RESCHEDULE 0xfd /* SMP reschedule */ -#define IA64_IPI_VECTOR 0xfe /* inter-processor interrupt vector */ - -/* Used for encoding redirected irqs */ - -#define IA64_IRQ_REDIRECTED (1 << 31) - -/* IA64 inter-cpu interrupt related definitions */ - -#define IA64_IPI_DEFAULT_BASE_ADDR 0xfee00000 - -/* Delivery modes for inter-cpu interrupts */ -enum { - IA64_IPI_DM_INT = 0x0, /* pend an external interrupt */ - IA64_IPI_DM_PMI = 0x2, /* pend a PMI */ - IA64_IPI_DM_NMI = 0x4, /* pend an NMI (vector 2) */ - IA64_IPI_DM_INIT = 0x5, /* pend an INIT interrupt */ - IA64_IPI_DM_EXTINT = 0x7, /* pend an 8259-compatible interrupt. */ -}; - -extern __u8 isa_irq_to_vector_map[16]; -#define isa_irq_to_vector(x) isa_irq_to_vector_map[(x)] - -struct irq_cfg { - ia64_vector vector; - cpumask_t domain; - cpumask_t old_domain; - unsigned move_cleanup_count; - u8 move_in_progress : 1; -}; -extern spinlock_t vector_lock; -extern struct irq_cfg irq_cfg[NR_IRQS]; -#define irq_to_domain(x) irq_cfg[(x)].domain -DECLARE_PER_CPU(int[IA64_NUM_VECTORS], vector_irq); - -extern struct irq_chip irq_type_ia64_lsapic; /* CPU-internal interrupt controller */ - -#define ia64_register_ipi ia64_native_register_ipi -#define assign_irq_vector ia64_native_assign_irq_vector -#define free_irq_vector ia64_native_free_irq_vector -#define ia64_resend_irq ia64_native_resend_irq - -extern void ia64_native_register_ipi(void); -extern int bind_irq_vector(int irq, int vector, cpumask_t domain); -extern int ia64_native_assign_irq_vector (int irq); /* allocate a free vector */ -extern void ia64_native_free_irq_vector (int vector); -extern int reserve_irq_vector (int vector); -extern void __setup_vector_irq(int cpu); -extern void ia64_send_ipi (int cpu, int vector, int delivery_mode, int redirect); -extern void destroy_and_reserve_irq (unsigned int irq); - -#ifdef CONFIG_SMP -extern int irq_prepare_move(int irq, int cpu); -extern void irq_complete_move(unsigned int irq); -#else -static inline int irq_prepare_move(int irq, int cpu) { return 0; } -static inline void irq_complete_move(unsigned int irq) {} -#endif - -static inline void ia64_native_resend_irq(unsigned int vector) -{ - ia64_send_ipi(smp_processor_id(), vector, IA64_IPI_DM_INT, 0); -} - -/* - * Next follows the irq descriptor interface. On IA-64, each CPU supports 256 interrupt - * vectors. On smaller systems, there is a one-to-one correspondence between interrupt - * vectors and the Linux irq numbers. However, larger systems may have multiple interrupt - * domains meaning that the translation from vector number to irq number depends on the - * interrupt domain that a CPU belongs to. This API abstracts such platform-dependent - * differences and provides a uniform means to translate between vector and irq numbers - * and to obtain the irq descriptor for a given irq number. - */ - -/* Extract the IA-64 vector that corresponds to IRQ. */ -static inline ia64_vector -irq_to_vector (int irq) -{ - return irq_cfg[irq].vector; -} - -/* - * Convert the local IA-64 vector to the corresponding irq number. This translation is - * done in the context of the interrupt domain that the currently executing CPU belongs - * to. - */ -static inline unsigned int -local_vector_to_irq (ia64_vector vec) -{ - return __this_cpu_read(vector_irq[vec]); -} - -#endif /* _ASM_IA64_HW_IRQ_H */ diff --git a/arch/ia64/include/asm/idle.h b/arch/ia64/include/asm/idle.h deleted file mode 100644 index 97c55b97e0ba..000000000000 --- a/arch/ia64/include/asm/idle.h +++ /dev/null @@ -1,8 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_IA64_IDLE_H -#define _ASM_IA64_IDLE_H - -static inline void enter_idle(void) { } -static inline void exit_idle(void) { } - -#endif /* _ASM_IA64_IDLE_H */ diff --git a/arch/ia64/include/asm/intrinsics.h b/arch/ia64/include/asm/intrinsics.h deleted file mode 100644 index 035b17fe12ef..000000000000 --- a/arch/ia64/include/asm/intrinsics.h +++ /dev/null @@ -1,13 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Compiler-dependent intrinsics. - * - * Copyright (C) 2002-2003 Hewlett-Packard Co - * David Mosberger-Tang - */ -#ifndef _ASM_IA64_INTRINSICS_H -#define _ASM_IA64_INTRINSICS_H - -#include - -#endif /* _ASM_IA64_INTRINSICS_H */ diff --git a/arch/ia64/include/asm/io.h b/arch/ia64/include/asm/io.h deleted file mode 100644 index eedc0afa8cad..000000000000 --- a/arch/ia64/include/asm/io.h +++ /dev/null @@ -1,271 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_IA64_IO_H -#define _ASM_IA64_IO_H - -/* - * This file contains the definitions for the emulated IO instructions - * inb/inw/inl/outb/outw/outl and the "string versions" of the same - * (insb/insw/insl/outsb/outsw/outsl). You can also use "pausing" - * versions of the single-IO instructions (inb_p/inw_p/..). - * - * This file is not meant to be obfuscating: it's just complicated to - * (a) handle it all in a way that makes gcc able to optimize it as - * well as possible and (b) trying to avoid writing the same thing - * over and over again with slight variations and possibly making a - * mistake somewhere. - * - * Copyright (C) 1998-2003 Hewlett-Packard Co - * David Mosberger-Tang - * Copyright (C) 1999 Asit Mallick - * Copyright (C) 1999 Don Dugger - */ - -#include -#include - -#define __IA64_UNCACHED_OFFSET RGN_BASE(RGN_UNCACHED) - -/* - * The legacy I/O space defined by the ia64 architecture supports only 65536 ports, but - * large machines may have multiple other I/O spaces so we can't place any a priori limit - * on IO_SPACE_LIMIT. These additional spaces are described in ACPI. - */ -#define IO_SPACE_LIMIT 0xffffffffffffffffUL - -#define MAX_IO_SPACES_BITS 8 -#define MAX_IO_SPACES (1UL << MAX_IO_SPACES_BITS) -#define IO_SPACE_BITS 24 -#define IO_SPACE_SIZE (1UL << IO_SPACE_BITS) - -#define IO_SPACE_NR(port) ((port) >> IO_SPACE_BITS) -#define IO_SPACE_BASE(space) ((space) << IO_SPACE_BITS) -#define IO_SPACE_PORT(port) ((port) & (IO_SPACE_SIZE - 1)) - -#define IO_SPACE_SPARSE_ENCODING(p) ((((p) >> 2) << 12) | ((p) & 0xfff)) - -struct io_space { - unsigned long mmio_base; /* base in MMIO space */ - int sparse; -}; - -extern struct io_space io_space[]; -extern unsigned int num_io_spaces; - -# ifdef __KERNEL__ - -/* - * All MMIO iomem cookies are in region 6; anything less is a PIO cookie: - * 0xCxxxxxxxxxxxxxxx MMIO cookie (return from ioremap) - * 0x000000001SPPPPPP PIO cookie (S=space number, P..P=port) - * - * ioread/writeX() uses the leading 1 in PIO cookies (PIO_OFFSET) to catch - * code that uses bare port numbers without the prerequisite pci_iomap(). - */ -#define PIO_OFFSET (1UL << (MAX_IO_SPACES_BITS + IO_SPACE_BITS)) -#define PIO_MASK (PIO_OFFSET - 1) -#define PIO_RESERVED __IA64_UNCACHED_OFFSET -#define HAVE_ARCH_PIO_SIZE - -#include -#include -#include - -/* - * Change virtual addresses to physical addresses and vv. - */ -static inline unsigned long -virt_to_phys (volatile void *address) -{ - return (unsigned long) address - PAGE_OFFSET; -} -#define virt_to_phys virt_to_phys - -static inline void* -phys_to_virt (unsigned long address) -{ - return (void *) (address + PAGE_OFFSET); -} -#define phys_to_virt phys_to_virt - -#define ARCH_HAS_VALID_PHYS_ADDR_RANGE -extern u64 kern_mem_attribute (unsigned long phys_addr, unsigned long size); -extern int valid_phys_addr_range (phys_addr_t addr, size_t count); /* efi.c */ -extern int valid_mmap_phys_addr_range (unsigned long pfn, size_t count); - -# endif /* KERNEL */ - -/* - * Memory fence w/accept. This should never be used in code that is - * not IA-64 specific. - */ -#define __ia64_mf_a() ia64_mfa() - -static inline void* -__ia64_mk_io_addr (unsigned long port) -{ - struct io_space *space; - unsigned long offset; - - space = &io_space[IO_SPACE_NR(port)]; - port = IO_SPACE_PORT(port); - if (space->sparse) - offset = IO_SPACE_SPARSE_ENCODING(port); - else - offset = port; - - return (void *) (space->mmio_base | offset); -} - -/* - * For the in/out routines, we need to do "mf.a" _after_ doing the I/O access to ensure - * that the access has completed before executing other I/O accesses. Since we're doing - * the accesses through an uncachable (UC) translation, the CPU will execute them in - * program order. However, we still need to tell the compiler not to shuffle them around - * during optimization, which is why we use "volatile" pointers. - */ - -#define inb inb -static inline unsigned int inb(unsigned long port) -{ - volatile unsigned char *addr = __ia64_mk_io_addr(port); - unsigned char ret; - - ret = *addr; - __ia64_mf_a(); - return ret; -} - -#define inw inw -static inline unsigned int inw(unsigned long port) -{ - volatile unsigned short *addr = __ia64_mk_io_addr(port); - unsigned short ret; - - ret = *addr; - __ia64_mf_a(); - return ret; -} - -#define inl inl -static inline unsigned int inl(unsigned long port) -{ - volatile unsigned int *addr = __ia64_mk_io_addr(port); - unsigned int ret; - - ret = *addr; - __ia64_mf_a(); - return ret; -} - -#define outb outb -static inline void outb(unsigned char val, unsigned long port) -{ - volatile unsigned char *addr = __ia64_mk_io_addr(port); - - *addr = val; - __ia64_mf_a(); -} - -#define outw outw -static inline void outw(unsigned short val, unsigned long port) -{ - volatile unsigned short *addr = __ia64_mk_io_addr(port); - - *addr = val; - __ia64_mf_a(); -} - -#define outl outl -static inline void outl(unsigned int val, unsigned long port) -{ - volatile unsigned int *addr = __ia64_mk_io_addr(port); - - *addr = val; - __ia64_mf_a(); -} - -#define insb insb -static inline void insb(unsigned long port, void *dst, unsigned long count) -{ - unsigned char *dp = dst; - - while (count--) - *dp++ = inb(port); -} - -#define insw insw -static inline void insw(unsigned long port, void *dst, unsigned long count) -{ - unsigned short *dp = dst; - - while (count--) - put_unaligned(inw(port), dp++); -} - -#define insl insl -static inline void insl(unsigned long port, void *dst, unsigned long count) -{ - unsigned int *dp = dst; - - while (count--) - put_unaligned(inl(port), dp++); -} - -#define outsb outsb -static inline void outsb(unsigned long port, const void *src, - unsigned long count) -{ - const unsigned char *sp = src; - - while (count--) - outb(*sp++, port); -} - -#define outsw outsw -static inline void outsw(unsigned long port, const void *src, - unsigned long count) -{ - const unsigned short *sp = src; - - while (count--) - outw(get_unaligned(sp++), port); -} - -#define outsl outsl -static inline void outsl(unsigned long port, const void *src, - unsigned long count) -{ - const unsigned int *sp = src; - - while (count--) - outl(get_unaligned(sp++), port); -} - -# ifdef __KERNEL__ - -#define _PAGE_IOREMAP pgprot_val(PAGE_KERNEL) - -extern void __iomem * ioremap_uc(unsigned long offset, unsigned long size); - -#define ioremap_prot ioremap_prot -#define ioremap_cache ioremap -#define ioremap_uc ioremap_uc -#define iounmap iounmap - -/* - * String version of IO memory access ops: - */ -extern void memcpy_fromio(void *dst, const volatile void __iomem *src, long n); -extern void memcpy_toio(volatile void __iomem *dst, const void *src, long n); -extern void memset_io(volatile void __iomem *s, int c, long n); - -#define memcpy_fromio memcpy_fromio -#define memcpy_toio memcpy_toio -#define memset_io memset_io -#define xlate_dev_mem_ptr xlate_dev_mem_ptr -#include -#undef PCI_IOBASE - -# endif /* __KERNEL__ */ - -#endif /* _ASM_IA64_IO_H */ diff --git a/arch/ia64/include/asm/iommu.h b/arch/ia64/include/asm/iommu.h deleted file mode 100644 index eb0db20c9d4c..000000000000 --- a/arch/ia64/include/asm/iommu.h +++ /dev/null @@ -1,22 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_IA64_IOMMU_H -#define _ASM_IA64_IOMMU_H 1 - -#include - -/* 10 seconds */ -#define DMAR_OPERATION_TIMEOUT (((cycles_t) local_cpu_data->itc_freq)*10) - -extern void no_iommu_init(void); -#ifdef CONFIG_INTEL_IOMMU -extern int force_iommu, no_iommu; -extern int iommu_detected; - -static inline int __init -arch_rmrr_sanity_check(struct acpi_dmar_reserved_memory *rmrr) { return 0; } -#else -#define no_iommu (1) -#define iommu_detected (0) -#endif - -#endif diff --git a/arch/ia64/include/asm/iosapic.h b/arch/ia64/include/asm/iosapic.h deleted file mode 100644 index a91aeb413e17..000000000000 --- a/arch/ia64/include/asm/iosapic.h +++ /dev/null @@ -1,106 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __ASM_IA64_IOSAPIC_H -#define __ASM_IA64_IOSAPIC_H - -#define IOSAPIC_REG_SELECT 0x0 -#define IOSAPIC_WINDOW 0x10 -#define IOSAPIC_EOI 0x40 - -#define IOSAPIC_VERSION 0x1 - -/* - * Redirection table entry - */ -#define IOSAPIC_RTE_LOW(i) (0x10+i*2) -#define IOSAPIC_RTE_HIGH(i) (0x11+i*2) - -#define IOSAPIC_DEST_SHIFT 16 - -/* - * Delivery mode - */ -#define IOSAPIC_DELIVERY_SHIFT 8 -#define IOSAPIC_FIXED 0x0 -#define IOSAPIC_LOWEST_PRIORITY 0x1 -#define IOSAPIC_PMI 0x2 -#define IOSAPIC_NMI 0x4 -#define IOSAPIC_INIT 0x5 -#define IOSAPIC_EXTINT 0x7 - -/* - * Interrupt polarity - */ -#define IOSAPIC_POLARITY_SHIFT 13 -#define IOSAPIC_POL_HIGH 0 -#define IOSAPIC_POL_LOW 1 - -/* - * Trigger mode - */ -#define IOSAPIC_TRIGGER_SHIFT 15 -#define IOSAPIC_EDGE 0 -#define IOSAPIC_LEVEL 1 - -/* - * Mask bit - */ - -#define IOSAPIC_MASK_SHIFT 16 -#define IOSAPIC_MASK (1< - * Stephane Eranian - * - * 11/24/98 S.Eranian updated TIMER_IRQ and irq_canonicalize - * 01/20/99 S.Eranian added keyboard interrupt - * 02/29/00 D.Mosberger moved most things into hw_irq.h - */ - -#include -#include -#include - -#define NR_IRQS IA64_NATIVE_NR_IRQS - -static __inline__ int -irq_canonicalize (int irq) -{ - /* - * We do the legacy thing here of pretending that irqs < 16 - * are 8259 irqs. This really shouldn't be necessary at all, - * but we keep it here as serial.c still uses it... - */ - return ((irq == 2) ? 9 : irq); -} - -extern void set_irq_affinity_info (unsigned int irq, int dest, int redir); - -int create_irq(void); -void destroy_irq(unsigned int irq); - -#endif /* _ASM_IA64_IRQ_H */ diff --git a/arch/ia64/include/asm/irq_regs.h b/arch/ia64/include/asm/irq_regs.h deleted file mode 100644 index 3dd9c0b70270..000000000000 --- a/arch/ia64/include/asm/irq_regs.h +++ /dev/null @@ -1 +0,0 @@ -#include diff --git a/arch/ia64/include/asm/irq_remapping.h b/arch/ia64/include/asm/irq_remapping.h deleted file mode 100644 index 547a6e87018c..000000000000 --- a/arch/ia64/include/asm/irq_remapping.h +++ /dev/null @@ -1,5 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __IA64_INTR_REMAPPING_H -#define __IA64_INTR_REMAPPING_H -#define irq_remapping_enabled 0 -#endif diff --git a/arch/ia64/include/asm/irqflags.h b/arch/ia64/include/asm/irqflags.h deleted file mode 100644 index 1dc30f12e545..000000000000 --- a/arch/ia64/include/asm/irqflags.h +++ /dev/null @@ -1,95 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * IRQ flags defines. - * - * Copyright (C) 1998-2003 Hewlett-Packard Co - * David Mosberger-Tang - * Copyright (C) 1999 Asit Mallick - * Copyright (C) 1999 Don Dugger - */ - -#ifndef _ASM_IA64_IRQFLAGS_H -#define _ASM_IA64_IRQFLAGS_H - -#include -#include - -#ifdef CONFIG_IA64_DEBUG_IRQ -extern unsigned long last_cli_ip; -static inline void arch_maybe_save_ip(unsigned long flags) -{ - if (flags & IA64_PSR_I) - last_cli_ip = ia64_getreg(_IA64_REG_IP); -} -#else -#define arch_maybe_save_ip(flags) do {} while (0) -#endif - -/* - * - clearing psr.i is implicitly serialized (visible by next insn) - * - setting psr.i requires data serialization - * - we need a stop-bit before reading PSR because we sometimes - * write a floating-point register right before reading the PSR - * and that writes to PSR.mfl - */ - -static inline unsigned long arch_local_save_flags(void) -{ - ia64_stop(); - return ia64_getreg(_IA64_REG_PSR); -} - -static inline unsigned long arch_local_irq_save(void) -{ - unsigned long flags = arch_local_save_flags(); - - ia64_stop(); - ia64_rsm(IA64_PSR_I); - arch_maybe_save_ip(flags); - return flags; -} - -static inline void arch_local_irq_disable(void) -{ -#ifdef CONFIG_IA64_DEBUG_IRQ - arch_local_irq_save(); -#else - ia64_stop(); - ia64_rsm(IA64_PSR_I); -#endif -} - -static inline void arch_local_irq_enable(void) -{ - ia64_stop(); - ia64_ssm(IA64_PSR_I); - ia64_srlz_d(); -} - -static inline void arch_local_irq_restore(unsigned long flags) -{ -#ifdef CONFIG_IA64_DEBUG_IRQ - unsigned long old_psr = arch_local_save_flags(); -#endif - ia64_intrin_local_irq_restore(flags & IA64_PSR_I); - arch_maybe_save_ip(old_psr & ~flags); -} - -static inline bool arch_irqs_disabled_flags(unsigned long flags) -{ - return (flags & IA64_PSR_I) == 0; -} - -static inline bool arch_irqs_disabled(void) -{ - return arch_irqs_disabled_flags(arch_local_save_flags()); -} - -static inline void arch_safe_halt(void) -{ - arch_local_irq_enable(); - ia64_pal_halt_light(); /* PAL_HALT_LIGHT */ -} - - -#endif /* _ASM_IA64_IRQFLAGS_H */ diff --git a/arch/ia64/include/asm/kdebug.h b/arch/ia64/include/asm/kdebug.h deleted file mode 100644 index 4f7e6dc974bc..000000000000 --- a/arch/ia64/include/asm/kdebug.h +++ /dev/null @@ -1,45 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -#ifndef _IA64_KDEBUG_H -#define _IA64_KDEBUG_H 1 -/* - * - * Copyright (C) Intel Corporation, 2005 - * - * 2005-Apr Rusty Lynch and Anil S Keshavamurthy - * adopted from - * include/asm-x86_64/kdebug.h - * - * 2005-Oct Keith Owens . Expand notify_die to cover more - * events. - */ - -enum die_val { - DIE_BREAK = 1, - DIE_FAULT, - DIE_OOPS, - DIE_MACHINE_HALT, - DIE_MACHINE_RESTART, - DIE_MCA_MONARCH_ENTER, - DIE_MCA_MONARCH_PROCESS, - DIE_MCA_MONARCH_LEAVE, - DIE_MCA_SLAVE_ENTER, - DIE_MCA_SLAVE_PROCESS, - DIE_MCA_SLAVE_LEAVE, - DIE_MCA_RENDZVOUS_ENTER, - DIE_MCA_RENDZVOUS_PROCESS, - DIE_MCA_RENDZVOUS_LEAVE, - DIE_MCA_NEW_TIMEOUT, - DIE_INIT_ENTER, - DIE_INIT_MONARCH_ENTER, - DIE_INIT_MONARCH_PROCESS, - DIE_INIT_MONARCH_LEAVE, - DIE_INIT_SLAVE_ENTER, - DIE_INIT_SLAVE_PROCESS, - DIE_INIT_SLAVE_LEAVE, - DIE_KDEBUG_ENTER, - DIE_KDEBUG_LEAVE, - DIE_KDUMP_ENTER, - DIE_KDUMP_LEAVE, -}; - -#endif diff --git a/arch/ia64/include/asm/kexec.h b/arch/ia64/include/asm/kexec.h deleted file mode 100644 index 294b1e1ebd2d..000000000000 --- a/arch/ia64/include/asm/kexec.h +++ /dev/null @@ -1,46 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_IA64_KEXEC_H -#define _ASM_IA64_KEXEC_H - -#include - -/* Maximum physical address we can use pages from */ -#define KEXEC_SOURCE_MEMORY_LIMIT (-1UL) -/* Maximum address we can reach in physical address mode */ -#define KEXEC_DESTINATION_MEMORY_LIMIT (-1UL) -/* Maximum address we can use for the control code buffer */ -#define KEXEC_CONTROL_MEMORY_LIMIT TASK_SIZE - -#define KEXEC_CONTROL_PAGE_SIZE (8192 + 8192 + 4096) - -/* The native architecture */ -#define KEXEC_ARCH KEXEC_ARCH_IA_64 - -#define kexec_flush_icache_page(page) do { \ - unsigned long page_addr = (unsigned long)page_address(page); \ - flush_icache_range(page_addr, page_addr + PAGE_SIZE); \ - } while(0) - -extern struct kimage *ia64_kimage; -extern const unsigned int relocate_new_kernel_size; -extern void relocate_new_kernel(unsigned long, unsigned long, - struct ia64_boot_param *, unsigned long); -static inline void -crash_setup_regs(struct pt_regs *newregs, struct pt_regs *oldregs) -{ -} -extern struct resource efi_memmap_res; -extern struct resource boot_param_res; -extern void kdump_smp_send_stop(void); -extern void kdump_smp_send_init(void); -extern void kexec_disable_iosapic(void); -extern void crash_save_this_cpu(void); -struct rsvd_region; -extern unsigned long kdump_find_rsvd_region(unsigned long size, - struct rsvd_region *rsvd_regions, int n); -extern void kdump_cpu_freeze(struct unw_frame_info *info, void *arg); -extern int kdump_status[]; -extern atomic_t kdump_cpu_freezed; -extern atomic_t kdump_in_progress; - -#endif /* _ASM_IA64_KEXEC_H */ diff --git a/arch/ia64/include/asm/kprobes.h b/arch/ia64/include/asm/kprobes.h deleted file mode 100644 index 9e956768946c..000000000000 --- a/arch/ia64/include/asm/kprobes.h +++ /dev/null @@ -1,116 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -#ifndef _ASM_KPROBES_H -#define _ASM_KPROBES_H -/* - * Kernel Probes (KProbes) - * - * Copyright (C) IBM Corporation, 2002, 2004 - * Copyright (C) Intel Corporation, 2005 - * - * 2005-Apr Rusty Lynch and Anil S Keshavamurthy - * adapted from i386 - */ -#include -#include - -#define BREAK_INST (long)(__IA64_BREAK_KPROBE << 6) - -#ifdef CONFIG_KPROBES - -#include -#include -#include - -#define __ARCH_WANT_KPROBES_INSN_SLOT -#define MAX_INSN_SIZE 2 /* last half is for kprobe-booster */ -#define NOP_M_INST (long)(1<<27) -#define BRL_INST(i1, i2) ((long)((0xcL << 37) | /* brl */ \ - (0x1L << 12) | /* many */ \ - (((i1) & 1) << 36) | ((i2) << 13))) /* imm */ - -typedef union cmp_inst { - struct { - unsigned long long qp : 6; - unsigned long long p1 : 6; - unsigned long long c : 1; - unsigned long long r2 : 7; - unsigned long long r3 : 7; - unsigned long long p2 : 6; - unsigned long long ta : 1; - unsigned long long x2 : 2; - unsigned long long tb : 1; - unsigned long long opcode : 4; - unsigned long long reserved : 23; - }f; - unsigned long long l; -} cmp_inst_t; - -struct kprobe; - -typedef struct _bundle { - struct { - unsigned long long template : 5; - unsigned long long slot0 : 41; - unsigned long long slot1_p0 : 64-46; - } quad0; - struct { - unsigned long long slot1_p1 : 41 - (64-46); - unsigned long long slot2 : 41; - } quad1; -} __attribute__((__aligned__(16))) bundle_t; - -struct prev_kprobe { - struct kprobe *kp; - unsigned long status; -}; - -#define MAX_PARAM_RSE_SIZE (0x60+0x60/0x3f) -/* per-cpu kprobe control block */ -#define ARCH_PREV_KPROBE_SZ 2 -struct kprobe_ctlblk { - unsigned long kprobe_status; - unsigned long *bsp; - unsigned long cfm; - atomic_t prev_kprobe_index; - struct prev_kprobe prev_kprobe[ARCH_PREV_KPROBE_SZ]; -}; - -#define kretprobe_blacklist_size 0 - -#define SLOT0_OPCODE_SHIFT (37) -#define SLOT1_p1_OPCODE_SHIFT (37 - (64-46)) -#define SLOT2_OPCODE_SHIFT (37) - -#define INDIRECT_CALL_OPCODE (1) -#define IP_RELATIVE_CALL_OPCODE (5) -#define IP_RELATIVE_BRANCH_OPCODE (4) -#define IP_RELATIVE_PREDICT_OPCODE (7) -#define LONG_BRANCH_OPCODE (0xC) -#define LONG_CALL_OPCODE (0xD) -#define flush_insn_slot(p) do { } while (0) - -typedef struct kprobe_opcode { - bundle_t bundle; -} kprobe_opcode_t; - -/* Architecture specific copy of original instruction*/ -struct arch_specific_insn { - /* copy of the instruction to be emulated */ - kprobe_opcode_t *insn; - #define INST_FLAG_FIX_RELATIVE_IP_ADDR 1 - #define INST_FLAG_FIX_BRANCH_REG 2 - #define INST_FLAG_BREAK_INST 4 - #define INST_FLAG_BOOSTABLE 8 - unsigned long inst_flag; - unsigned short target_br_reg; - unsigned short slot; -}; - -extern int kprobe_fault_handler(struct pt_regs *regs, int trapnr); -extern int kprobe_exceptions_notify(struct notifier_block *self, - unsigned long val, void *data); - -extern void arch_remove_kprobe(struct kprobe *p); - -#endif /* CONFIG_KPROBES */ -#endif /* _ASM_KPROBES_H */ diff --git a/arch/ia64/include/asm/kregs.h b/arch/ia64/include/asm/kregs.h deleted file mode 100644 index 44113b75e4eb..000000000000 --- a/arch/ia64/include/asm/kregs.h +++ /dev/null @@ -1,166 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_IA64_KREGS_H -#define _ASM_IA64_KREGS_H - -/* - * Copyright (C) 2001-2002 Hewlett-Packard Co - * David Mosberger-Tang - */ -/* - * This file defines the kernel register usage convention used by Linux/ia64. - */ - -/* - * Kernel registers: - */ -#define IA64_KR_IO_BASE 0 /* ar.k0: legacy I/O base address */ -#define IA64_KR_TSSD 1 /* ar.k1: IVE uses this as the TSSD */ -#define IA64_KR_PER_CPU_DATA 3 /* ar.k3: physical per-CPU base */ -#define IA64_KR_CURRENT_STACK 4 /* ar.k4: what's mapped in IA64_TR_CURRENT_STACK */ -#define IA64_KR_FPU_OWNER 5 /* ar.k5: fpu-owner (UP only, at the moment) */ -#define IA64_KR_CURRENT 6 /* ar.k6: "current" task pointer */ -#define IA64_KR_PT_BASE 7 /* ar.k7: page table base address (physical) */ - -#define _IA64_KR_PASTE(x,y) x##y -#define _IA64_KR_PREFIX(n) _IA64_KR_PASTE(ar.k, n) -#define IA64_KR(n) _IA64_KR_PREFIX(IA64_KR_##n) - -/* - * Translation registers: - */ -#define IA64_TR_KERNEL 0 /* itr0, dtr0: maps kernel image (code & data) */ -#define IA64_TR_PALCODE 1 /* itr1: maps PALcode as required by EFI */ -#define IA64_TR_CURRENT_STACK 1 /* dtr1: maps kernel's memory- & register-stacks */ - -#define IA64_TR_ALLOC_BASE 2 /* itr&dtr: Base of dynamic TR resource*/ -#define IA64_TR_ALLOC_MAX 64 /* Max number for dynamic use*/ - -/* Processor status register bits: */ -#define IA64_PSR_BE_BIT 1 -#define IA64_PSR_UP_BIT 2 -#define IA64_PSR_AC_BIT 3 -#define IA64_PSR_MFL_BIT 4 -#define IA64_PSR_MFH_BIT 5 -#define IA64_PSR_IC_BIT 13 -#define IA64_PSR_I_BIT 14 -#define IA64_PSR_PK_BIT 15 -#define IA64_PSR_DT_BIT 17 -#define IA64_PSR_DFL_BIT 18 -#define IA64_PSR_DFH_BIT 19 -#define IA64_PSR_SP_BIT 20 -#define IA64_PSR_PP_BIT 21 -#define IA64_PSR_DI_BIT 22 -#define IA64_PSR_SI_BIT 23 -#define IA64_PSR_DB_BIT 24 -#define IA64_PSR_LP_BIT 25 -#define IA64_PSR_TB_BIT 26 -#define IA64_PSR_RT_BIT 27 -/* The following are not affected by save_flags()/restore_flags(): */ -#define IA64_PSR_CPL0_BIT 32 -#define IA64_PSR_CPL1_BIT 33 -#define IA64_PSR_IS_BIT 34 -#define IA64_PSR_MC_BIT 35 -#define IA64_PSR_IT_BIT 36 -#define IA64_PSR_ID_BIT 37 -#define IA64_PSR_DA_BIT 38 -#define IA64_PSR_DD_BIT 39 -#define IA64_PSR_SS_BIT 40 -#define IA64_PSR_RI_BIT 41 -#define IA64_PSR_ED_BIT 43 -#define IA64_PSR_BN_BIT 44 -#define IA64_PSR_IA_BIT 45 - -/* A mask of PSR bits that we generally don't want to inherit across a clone2() or an - execve(). Only list flags here that need to be cleared/set for BOTH clone2() and - execve(). */ -#define IA64_PSR_BITS_TO_CLEAR (IA64_PSR_MFL | IA64_PSR_MFH | IA64_PSR_DB | IA64_PSR_LP | \ - IA64_PSR_TB | IA64_PSR_ID | IA64_PSR_DA | IA64_PSR_DD | \ - IA64_PSR_SS | IA64_PSR_ED | IA64_PSR_IA) -#define IA64_PSR_BITS_TO_SET (IA64_PSR_DFH | IA64_PSR_SP) - -#define IA64_PSR_BE (__IA64_UL(1) << IA64_PSR_BE_BIT) -#define IA64_PSR_UP (__IA64_UL(1) << IA64_PSR_UP_BIT) -#define IA64_PSR_AC (__IA64_UL(1) << IA64_PSR_AC_BIT) -#define IA64_PSR_MFL (__IA64_UL(1) << IA64_PSR_MFL_BIT) -#define IA64_PSR_MFH (__IA64_UL(1) << IA64_PSR_MFH_BIT) -#define IA64_PSR_IC (__IA64_UL(1) << IA64_PSR_IC_BIT) -#define IA64_PSR_I (__IA64_UL(1) << IA64_PSR_I_BIT) -#define IA64_PSR_PK (__IA64_UL(1) << IA64_PSR_PK_BIT) -#define IA64_PSR_DT (__IA64_UL(1) << IA64_PSR_DT_BIT) -#define IA64_PSR_DFL (__IA64_UL(1) << IA64_PSR_DFL_BIT) -#define IA64_PSR_DFH (__IA64_UL(1) << IA64_PSR_DFH_BIT) -#define IA64_PSR_SP (__IA64_UL(1) << IA64_PSR_SP_BIT) -#define IA64_PSR_PP (__IA64_UL(1) << IA64_PSR_PP_BIT) -#define IA64_PSR_DI (__IA64_UL(1) << IA64_PSR_DI_BIT) -#define IA64_PSR_SI (__IA64_UL(1) << IA64_PSR_SI_BIT) -#define IA64_PSR_DB (__IA64_UL(1) << IA64_PSR_DB_BIT) -#define IA64_PSR_LP (__IA64_UL(1) << IA64_PSR_LP_BIT) -#define IA64_PSR_TB (__IA64_UL(1) << IA64_PSR_TB_BIT) -#define IA64_PSR_RT (__IA64_UL(1) << IA64_PSR_RT_BIT) -/* The following are not affected by save_flags()/restore_flags(): */ -#define IA64_PSR_CPL (__IA64_UL(3) << IA64_PSR_CPL0_BIT) -#define IA64_PSR_IS (__IA64_UL(1) << IA64_PSR_IS_BIT) -#define IA64_PSR_MC (__IA64_UL(1) << IA64_PSR_MC_BIT) -#define IA64_PSR_IT (__IA64_UL(1) << IA64_PSR_IT_BIT) -#define IA64_PSR_ID (__IA64_UL(1) << IA64_PSR_ID_BIT) -#define IA64_PSR_DA (__IA64_UL(1) << IA64_PSR_DA_BIT) -#define IA64_PSR_DD (__IA64_UL(1) << IA64_PSR_DD_BIT) -#define IA64_PSR_SS (__IA64_UL(1) << IA64_PSR_SS_BIT) -#define IA64_PSR_RI (__IA64_UL(3) << IA64_PSR_RI_BIT) -#define IA64_PSR_ED (__IA64_UL(1) << IA64_PSR_ED_BIT) -#define IA64_PSR_BN (__IA64_UL(1) << IA64_PSR_BN_BIT) -#define IA64_PSR_IA (__IA64_UL(1) << IA64_PSR_IA_BIT) - -/* User mask bits: */ -#define IA64_PSR_UM (IA64_PSR_BE | IA64_PSR_UP | IA64_PSR_AC | IA64_PSR_MFL | IA64_PSR_MFH) - -/* Default Control Register */ -#define IA64_DCR_PP_BIT 0 /* privileged performance monitor default */ -#define IA64_DCR_BE_BIT 1 /* big-endian default */ -#define IA64_DCR_LC_BIT 2 /* ia32 lock-check enable */ -#define IA64_DCR_DM_BIT 8 /* defer TLB miss faults */ -#define IA64_DCR_DP_BIT 9 /* defer page-not-present faults */ -#define IA64_DCR_DK_BIT 10 /* defer key miss faults */ -#define IA64_DCR_DX_BIT 11 /* defer key permission faults */ -#define IA64_DCR_DR_BIT 12 /* defer access right faults */ -#define IA64_DCR_DA_BIT 13 /* defer access bit faults */ -#define IA64_DCR_DD_BIT 14 /* defer debug faults */ - -#define IA64_DCR_PP (__IA64_UL(1) << IA64_DCR_PP_BIT) -#define IA64_DCR_BE (__IA64_UL(1) << IA64_DCR_BE_BIT) -#define IA64_DCR_LC (__IA64_UL(1) << IA64_DCR_LC_BIT) -#define IA64_DCR_DM (__IA64_UL(1) << IA64_DCR_DM_BIT) -#define IA64_DCR_DP (__IA64_UL(1) << IA64_DCR_DP_BIT) -#define IA64_DCR_DK (__IA64_UL(1) << IA64_DCR_DK_BIT) -#define IA64_DCR_DX (__IA64_UL(1) << IA64_DCR_DX_BIT) -#define IA64_DCR_DR (__IA64_UL(1) << IA64_DCR_DR_BIT) -#define IA64_DCR_DA (__IA64_UL(1) << IA64_DCR_DA_BIT) -#define IA64_DCR_DD (__IA64_UL(1) << IA64_DCR_DD_BIT) - -/* Interrupt Status Register */ -#define IA64_ISR_X_BIT 32 /* execute access */ -#define IA64_ISR_W_BIT 33 /* write access */ -#define IA64_ISR_R_BIT 34 /* read access */ -#define IA64_ISR_NA_BIT 35 /* non-access */ -#define IA64_ISR_SP_BIT 36 /* speculative load exception */ -#define IA64_ISR_RS_BIT 37 /* mandatory register-stack exception */ -#define IA64_ISR_IR_BIT 38 /* invalid register frame exception */ -#define IA64_ISR_CODE_MASK 0xf - -#define IA64_ISR_X (__IA64_UL(1) << IA64_ISR_X_BIT) -#define IA64_ISR_W (__IA64_UL(1) << IA64_ISR_W_BIT) -#define IA64_ISR_R (__IA64_UL(1) << IA64_ISR_R_BIT) -#define IA64_ISR_NA (__IA64_UL(1) << IA64_ISR_NA_BIT) -#define IA64_ISR_SP (__IA64_UL(1) << IA64_ISR_SP_BIT) -#define IA64_ISR_RS (__IA64_UL(1) << IA64_ISR_RS_BIT) -#define IA64_ISR_IR (__IA64_UL(1) << IA64_ISR_IR_BIT) - -/* ISR code field for non-access instructions */ -#define IA64_ISR_CODE_TPA 0 -#define IA64_ISR_CODE_FC 1 -#define IA64_ISR_CODE_PROBE 2 -#define IA64_ISR_CODE_TAK 3 -#define IA64_ISR_CODE_LFETCH 4 -#define IA64_ISR_CODE_PROBEF 5 - -#endif /* _ASM_IA64_kREGS_H */ diff --git a/arch/ia64/include/asm/libata-portmap.h b/arch/ia64/include/asm/libata-portmap.h deleted file mode 100644 index 757f84e5dc6e..000000000000 --- a/arch/ia64/include/asm/libata-portmap.h +++ /dev/null @@ -1,9 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __ASM_IA64_LIBATA_PORTMAP_H -#define __ASM_IA64_LIBATA_PORTMAP_H - -#define ATA_PRIMARY_IRQ(dev) isa_irq_to_vector(14) - -#define ATA_SECONDARY_IRQ(dev) isa_irq_to_vector(15) - -#endif diff --git a/arch/ia64/include/asm/linkage.h b/arch/ia64/include/asm/linkage.h deleted file mode 100644 index 5178af560925..000000000000 --- a/arch/ia64/include/asm/linkage.h +++ /dev/null @@ -1,19 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __ASM_LINKAGE_H -#define __ASM_LINKAGE_H - -#ifndef __ASSEMBLY__ - -#define asmlinkage CPP_ASMLINKAGE __attribute__((syscall_linkage)) - -#else - -#include - -#endif - -#define cond_syscall(x) asm(".weak\t" #x "#\n" #x "#\t=\tsys_ni_syscall#") -#define SYSCALL_ALIAS(alias, name) \ - asm ( #alias "# = " #name "#\n\t.globl " #alias "#") - -#endif diff --git a/arch/ia64/include/asm/local.h b/arch/ia64/include/asm/local.h deleted file mode 100644 index c11c530f74d0..000000000000 --- a/arch/ia64/include/asm/local.h +++ /dev/null @@ -1 +0,0 @@ -#include diff --git a/arch/ia64/include/asm/mca.h b/arch/ia64/include/asm/mca.h deleted file mode 100644 index 05805249296c..000000000000 --- a/arch/ia64/include/asm/mca.h +++ /dev/null @@ -1,185 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * File: mca.h - * Purpose: Machine check handling specific defines - * - * Copyright (C) 1999, 2004 Silicon Graphics, Inc. - * Copyright (C) Vijay Chander - * Copyright (C) Srinivasa Thirumalachar - * Copyright (C) Russ Anderson - */ - -#ifndef _ASM_IA64_MCA_H -#define _ASM_IA64_MCA_H - -#if !defined(__ASSEMBLY__) - -#include -#include -#include -#include - -#define IA64_MCA_RENDEZ_TIMEOUT (20 * 1000) /* value in milliseconds - 20 seconds */ - -typedef struct ia64_fptr { - unsigned long fp; - unsigned long gp; -} ia64_fptr_t; - -typedef union cmcv_reg_u { - u64 cmcv_regval; - struct { - u64 cmcr_vector : 8; - u64 cmcr_reserved1 : 4; - u64 cmcr_ignored1 : 1; - u64 cmcr_reserved2 : 3; - u64 cmcr_mask : 1; - u64 cmcr_ignored2 : 47; - } cmcv_reg_s; - -} cmcv_reg_t; - -#define cmcv_mask cmcv_reg_s.cmcr_mask -#define cmcv_vector cmcv_reg_s.cmcr_vector - -enum { - IA64_MCA_RENDEZ_CHECKIN_NOTDONE = 0x0, - IA64_MCA_RENDEZ_CHECKIN_DONE = 0x1, - IA64_MCA_RENDEZ_CHECKIN_INIT = 0x2, - IA64_MCA_RENDEZ_CHECKIN_CONCURRENT_MCA = 0x3, -}; - -/* Information maintained by the MC infrastructure */ -typedef struct ia64_mc_info_s { - u64 imi_mca_handler; - size_t imi_mca_handler_size; - u64 imi_monarch_init_handler; - size_t imi_monarch_init_handler_size; - u64 imi_slave_init_handler; - size_t imi_slave_init_handler_size; - u8 imi_rendez_checkin[NR_CPUS]; - -} ia64_mc_info_t; - -/* Handover state from SAL to OS and vice versa, for both MCA and INIT events. - * Besides the handover state, it also contains some saved registers from the - * time of the event. - * Note: mca_asm.S depends on the precise layout of this structure. - */ - -struct ia64_sal_os_state { - - /* SAL to OS */ - unsigned long os_gp; /* GP of the os registered with the SAL, physical */ - unsigned long pal_proc; /* PAL_PROC entry point, physical */ - unsigned long sal_proc; /* SAL_PROC entry point, physical */ - unsigned long rv_rc; /* MCA - Rendezvous state, INIT - reason code */ - unsigned long proc_state_param; /* from R18 */ - unsigned long monarch; /* 1 for a monarch event, 0 for a slave */ - - /* common */ - unsigned long sal_ra; /* Return address in SAL, physical */ - unsigned long sal_gp; /* GP of the SAL - physical */ - struct pal_min_state_area *pal_min_state; /* from R17. physical in asm, virtual in C */ - /* Previous values of IA64_KR(CURRENT) and IA64_KR(CURRENT_STACK). - * Note: if the MCA/INIT recovery code wants to resume to a new context - * then it must change these values to reflect the new kernel stack. - */ - unsigned long prev_IA64_KR_CURRENT; /* previous value of IA64_KR(CURRENT) */ - unsigned long prev_IA64_KR_CURRENT_STACK; - struct task_struct *prev_task; /* previous task, NULL if it is not useful */ - /* Some interrupt registers are not saved in minstate, pt_regs or - * switch_stack. Because MCA/INIT can occur when interrupts are - * disabled, we need to save the additional interrupt registers over - * MCA/INIT and resume. - */ - unsigned long isr; - unsigned long ifa; - unsigned long itir; - unsigned long iipa; - unsigned long iim; - unsigned long iha; - - /* OS to SAL */ - unsigned long os_status; /* OS status to SAL, enum below */ - unsigned long context; /* 0 if return to same context - 1 if return to new context */ - - /* I-resources */ - unsigned long iip; - unsigned long ipsr; - unsigned long ifs; -}; - -enum { - IA64_MCA_CORRECTED = 0x0, /* Error has been corrected by OS_MCA */ - IA64_MCA_WARM_BOOT = -1, /* Warm boot of the system need from SAL */ - IA64_MCA_COLD_BOOT = -2, /* Cold boot of the system need from SAL */ - IA64_MCA_HALT = -3 /* System to be halted by SAL */ -}; - -enum { - IA64_INIT_RESUME = 0x0, /* Resume after return from INIT */ - IA64_INIT_WARM_BOOT = -1, /* Warm boot of the system need from SAL */ -}; - -enum { - IA64_MCA_SAME_CONTEXT = 0x0, /* SAL to return to same context */ - IA64_MCA_NEW_CONTEXT = -1 /* SAL to return to new context */ -}; - -/* Per-CPU MCA state that is too big for normal per-CPU variables. */ - -struct ia64_mca_cpu { - u64 mca_stack[KERNEL_STACK_SIZE/8]; - u64 init_stack[KERNEL_STACK_SIZE/8]; -}; - -/* Array of physical addresses of each CPU's MCA area. */ -extern unsigned long __per_cpu_mca[NR_CPUS]; - -extern int cpe_vector; -extern int ia64_cpe_irq; -extern void ia64_mca_init(void); -extern void ia64_mca_irq_init(void); -extern void ia64_mca_cpu_init(void *); -extern void ia64_os_mca_dispatch(void); -extern void ia64_os_mca_dispatch_end(void); -extern void ia64_mca_ucmc_handler(struct pt_regs *, struct ia64_sal_os_state *); -extern void ia64_init_handler(struct pt_regs *, - struct switch_stack *, - struct ia64_sal_os_state *); -extern void ia64_os_init_on_kdump(void); -extern void ia64_monarch_init_handler(void); -extern void ia64_slave_init_handler(void); -extern void ia64_mca_cmc_vector_setup(void); -extern int ia64_reg_MCA_extension(int (*fn)(void *, struct ia64_sal_os_state *)); -extern void ia64_unreg_MCA_extension(void); -extern unsigned long ia64_get_rnat(unsigned long *); -extern void ia64_set_psr_mc(void); -extern void ia64_mca_printk(const char * fmt, ...) - __attribute__ ((format (printf, 1, 2))); - -struct ia64_mca_notify_die { - struct ia64_sal_os_state *sos; - int *monarch_cpu; - int *data; -}; - -DECLARE_PER_CPU(u64, ia64_mca_pal_base); - -#else /* __ASSEMBLY__ */ - -#define IA64_MCA_CORRECTED 0x0 /* Error has been corrected by OS_MCA */ -#define IA64_MCA_WARM_BOOT -1 /* Warm boot of the system need from SAL */ -#define IA64_MCA_COLD_BOOT -2 /* Cold boot of the system need from SAL */ -#define IA64_MCA_HALT -3 /* System to be halted by SAL */ - -#define IA64_INIT_RESUME 0x0 /* Resume after return from INIT */ -#define IA64_INIT_WARM_BOOT -1 /* Warm boot of the system need from SAL */ - -#define IA64_MCA_SAME_CONTEXT 0x0 /* SAL to return to same context */ -#define IA64_MCA_NEW_CONTEXT -1 /* SAL to return to new context */ - -#endif /* !__ASSEMBLY__ */ -#endif /* _ASM_IA64_MCA_H */ diff --git a/arch/ia64/include/asm/mca_asm.h b/arch/ia64/include/asm/mca_asm.h deleted file mode 100644 index e3ab1f41f1c3..000000000000 --- a/arch/ia64/include/asm/mca_asm.h +++ /dev/null @@ -1,245 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * File: mca_asm.h - * Purpose: Machine check handling specific defines - * - * Copyright (C) 1999 Silicon Graphics, Inc. - * Copyright (C) Vijay Chander - * Copyright (C) Srinivasa Thirumalachar - * Copyright (C) 2000 Hewlett-Packard Co. - * Copyright (C) 2000 David Mosberger-Tang - * Copyright (C) 2002 Intel Corp. - * Copyright (C) 2002 Jenna Hall - * Copyright (C) 2005 Silicon Graphics, Inc - * Copyright (C) 2005 Keith Owens - */ -#ifndef _ASM_IA64_MCA_ASM_H -#define _ASM_IA64_MCA_ASM_H - -#include - -#define PSR_IC 13 -#define PSR_I 14 -#define PSR_DT 17 -#define PSR_RT 27 -#define PSR_MC 35 -#define PSR_IT 36 -#define PSR_BN 44 - -/* - * This macro converts a instruction virtual address to a physical address - * Right now for simulation purposes the virtual addresses are - * direct mapped to physical addresses. - * 1. Lop off bits 61 thru 63 in the virtual address - */ -#define INST_VA_TO_PA(addr) \ - dep addr = 0, addr, 61, 3 -/* - * This macro converts a data virtual address to a physical address - * Right now for simulation purposes the virtual addresses are - * direct mapped to physical addresses. - * 1. Lop off bits 61 thru 63 in the virtual address - */ -#define DATA_VA_TO_PA(addr) \ - tpa addr = addr -/* - * This macro converts a data physical address to a virtual address - * Right now for simulation purposes the virtual addresses are - * direct mapped to physical addresses. - * 1. Put 0x7 in bits 61 thru 63. - */ -#define DATA_PA_TO_VA(addr,temp) \ - mov temp = 0x7 ;; \ - dep addr = temp, addr, 61, 3 - -#define GET_THIS_PADDR(reg, var) \ - mov reg = IA64_KR(PER_CPU_DATA);; \ - addl reg = THIS_CPU(var), reg - -/* - * This macro jumps to the instruction at the given virtual address - * and starts execution in physical mode with all the address - * translations turned off. - * 1. Save the current psr - * 2. Make sure that all the upper 32 bits are off - * - * 3. Clear the interrupt enable and interrupt state collection bits - * in the psr before updating the ipsr and iip. - * - * 4. Turn off the instruction, data and rse translation bits of the psr - * and store the new value into ipsr - * Also make sure that the interrupts are disabled. - * Ensure that we are in little endian mode. - * [psr.{rt, it, dt, i, be} = 0] - * - * 5. Get the physical address corresponding to the virtual address - * of the next instruction bundle and put it in iip. - * (Using magic numbers 24 and 40 in the deposint instruction since - * the IA64_SDK code directly maps to lower 24bits as physical address - * from a virtual address). - * - * 6. Do an rfi to move the values from ipsr to psr and iip to ip. - */ -#define PHYSICAL_MODE_ENTER(temp1, temp2, start_addr, old_psr) \ - mov old_psr = psr; \ - ;; \ - dep old_psr = 0, old_psr, 32, 32; \ - \ - mov ar.rsc = 0 ; \ - ;; \ - srlz.d; \ - mov temp2 = ar.bspstore; \ - ;; \ - DATA_VA_TO_PA(temp2); \ - ;; \ - mov temp1 = ar.rnat; \ - ;; \ - mov ar.bspstore = temp2; \ - ;; \ - mov ar.rnat = temp1; \ - mov temp1 = psr; \ - mov temp2 = psr; \ - ;; \ - \ - dep temp2 = 0, temp2, PSR_IC, 2; \ - ;; \ - mov psr.l = temp2; \ - ;; \ - srlz.d; \ - dep temp1 = 0, temp1, 32, 32; \ - ;; \ - dep temp1 = 0, temp1, PSR_IT, 1; \ - ;; \ - dep temp1 = 0, temp1, PSR_DT, 1; \ - ;; \ - dep temp1 = 0, temp1, PSR_RT, 1; \ - ;; \ - dep temp1 = 0, temp1, PSR_I, 1; \ - ;; \ - dep temp1 = 0, temp1, PSR_IC, 1; \ - ;; \ - dep temp1 = -1, temp1, PSR_MC, 1; \ - ;; \ - mov cr.ipsr = temp1; \ - ;; \ - LOAD_PHYSICAL(p0, temp2, start_addr); \ - ;; \ - mov cr.iip = temp2; \ - mov cr.ifs = r0; \ - DATA_VA_TO_PA(sp); \ - DATA_VA_TO_PA(gp); \ - ;; \ - srlz.i; \ - ;; \ - nop 1; \ - nop 2; \ - nop 1; \ - nop 2; \ - rfi; \ - ;; - -/* - * This macro jumps to the instruction at the given virtual address - * and starts execution in virtual mode with all the address - * translations turned on. - * 1. Get the old saved psr - * - * 2. Clear the interrupt state collection bit in the current psr. - * - * 3. Set the instruction translation bit back in the old psr - * Note we have to do this since we are right now saving only the - * lower 32-bits of old psr.(Also the old psr has the data and - * rse translation bits on) - * - * 4. Set ipsr to this old_psr with "it" bit set and "bn" = 1. - * - * 5. Reset the current thread pointer (r13). - * - * 6. Set iip to the virtual address of the next instruction bundle. - * - * 7. Do an rfi to move ipsr to psr and iip to ip. - */ - -#define VIRTUAL_MODE_ENTER(temp1, temp2, start_addr, old_psr) \ - mov temp2 = psr; \ - ;; \ - mov old_psr = temp2; \ - ;; \ - dep temp2 = 0, temp2, PSR_IC, 2; \ - ;; \ - mov psr.l = temp2; \ - mov ar.rsc = 0; \ - ;; \ - srlz.d; \ - mov r13 = ar.k6; \ - mov temp2 = ar.bspstore; \ - ;; \ - DATA_PA_TO_VA(temp2,temp1); \ - ;; \ - mov temp1 = ar.rnat; \ - ;; \ - mov ar.bspstore = temp2; \ - ;; \ - mov ar.rnat = temp1; \ - ;; \ - mov temp1 = old_psr; \ - ;; \ - mov temp2 = 1; \ - ;; \ - dep temp1 = temp2, temp1, PSR_IC, 1; \ - ;; \ - dep temp1 = temp2, temp1, PSR_IT, 1; \ - ;; \ - dep temp1 = temp2, temp1, PSR_DT, 1; \ - ;; \ - dep temp1 = temp2, temp1, PSR_RT, 1; \ - ;; \ - dep temp1 = temp2, temp1, PSR_BN, 1; \ - ;; \ - \ - mov cr.ipsr = temp1; \ - movl temp2 = start_addr; \ - ;; \ - mov cr.iip = temp2; \ - movl gp = __gp \ - ;; \ - DATA_PA_TO_VA(sp, temp1); \ - srlz.i; \ - ;; \ - nop 1; \ - nop 2; \ - nop 1; \ - rfi \ - ;; - -/* - * The MCA and INIT stacks in struct ia64_mca_cpu look like normal kernel - * stacks, except that the SAL/OS state and a switch_stack are stored near the - * top of the MCA/INIT stack. To support concurrent entry to MCA or INIT, as - * well as MCA over INIT, each event needs its own SAL/OS state. All entries - * are 16 byte aligned. - * - * +---------------------------+ - * | pt_regs | - * +---------------------------+ - * | switch_stack | - * +---------------------------+ - * | SAL/OS state | - * +---------------------------+ - * | 16 byte scratch area | - * +---------------------------+ <-------- SP at start of C MCA handler - * | ..... | - * +---------------------------+ - * | RBS for MCA/INIT handler | - * +---------------------------+ - * | struct task for MCA/INIT | - * +---------------------------+ <-------- Bottom of MCA/INIT stack - */ - -#define ALIGN16(x) ((x)&~15) -#define MCA_PT_REGS_OFFSET ALIGN16(KERNEL_STACK_SIZE-IA64_PT_REGS_SIZE) -#define MCA_SWITCH_STACK_OFFSET ALIGN16(MCA_PT_REGS_OFFSET-IA64_SWITCH_STACK_SIZE) -#define MCA_SOS_OFFSET ALIGN16(MCA_SWITCH_STACK_OFFSET-IA64_SAL_OS_STATE_SIZE) -#define MCA_SP_OFFSET ALIGN16(MCA_SOS_OFFSET-16) - -#endif /* _ASM_IA64_MCA_ASM_H */ diff --git a/arch/ia64/include/asm/meminit.h b/arch/ia64/include/asm/meminit.h deleted file mode 100644 index f1d5bf2ba847..000000000000 --- a/arch/ia64/include/asm/meminit.h +++ /dev/null @@ -1,59 +0,0 @@ -#ifndef meminit_h -#define meminit_h - -/* - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - */ - - -/* - * Entries defined so far: - * - boot param structure itself - * - memory map - * - initrd (optional) - * - command line string - * - kernel code & data - * - crash dumping code reserved region - * - Kernel memory map built from EFI memory map - * - ELF core header - * - * More could be added if necessary - */ -#define IA64_MAX_RSVD_REGIONS 9 - -struct rsvd_region { - u64 start; /* virtual address of beginning of element */ - u64 end; /* virtual address of end of element + 1 */ -}; - -extern struct rsvd_region rsvd_region[IA64_MAX_RSVD_REGIONS + 1]; - -extern void find_memory (void); -extern void reserve_memory (void); -extern void find_initrd (void); -extern int filter_rsvd_memory (u64 start, u64 end, void *arg); -extern int filter_memory (u64 start, u64 end, void *arg); -extern unsigned long efi_memmap_init(u64 *s, u64 *e); -extern int find_max_min_low_pfn (u64, u64, void *); - -extern unsigned long vmcore_find_descriptor_size(unsigned long address); - -/* - * For rounding an address to the next IA64_GRANULE_SIZE or order - */ -#define GRANULEROUNDDOWN(n) ((n) & ~(IA64_GRANULE_SIZE-1)) -#define GRANULEROUNDUP(n) (((n)+IA64_GRANULE_SIZE-1) & ~(IA64_GRANULE_SIZE-1)) - -#ifdef CONFIG_NUMA - extern void call_pernode_memory (unsigned long start, unsigned long len, void *func); -#else -# define call_pernode_memory(start, len, func) (*func)(start, len, 0) -#endif - -#define IGNORE_PFN0 1 /* XXX fix me: ignore pfn 0 until TLB miss handler is updated... */ - -extern int register_active_ranges(u64 start, u64 len, int nid); - -#endif /* meminit_h */ diff --git a/arch/ia64/include/asm/mman.h b/arch/ia64/include/asm/mman.h deleted file mode 100644 index 15cf100add0e..000000000000 --- a/arch/ia64/include/asm/mman.h +++ /dev/null @@ -1,18 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Based on . - * - * Modified 1998-2000, 2002 - * David Mosberger-Tang , Hewlett-Packard Co - */ -#ifndef _ASM_IA64_MMAN_H -#define _ASM_IA64_MMAN_H - -#include - -#ifndef __ASSEMBLY__ -#define arch_mmap_check ia64_mmap_check -int ia64_mmap_check(unsigned long addr, unsigned long len, - unsigned long flags); -#endif -#endif /* _ASM_IA64_MMAN_H */ diff --git a/arch/ia64/include/asm/mmiowb.h b/arch/ia64/include/asm/mmiowb.h deleted file mode 100644 index d67aab4ea3b4..000000000000 --- a/arch/ia64/include/asm/mmiowb.h +++ /dev/null @@ -1,17 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ - -#ifndef _ASM_IA64_MMIOWB_H -#define _ASM_IA64_MMIOWB_H - -/** - * mmiowb - I/O write barrier - * - * Ensure ordering of I/O space writes. This will make sure that writes - * following the barrier will arrive after all previous writes. For most - * ia64 platforms, this is a simple 'mf.a' instruction. - */ -#define mmiowb() ia64_mfa() - -#include - -#endif /* _ASM_IA64_MMIOWB_H */ diff --git a/arch/ia64/include/asm/mmu.h b/arch/ia64/include/asm/mmu.h deleted file mode 100644 index f75f44f531c2..000000000000 --- a/arch/ia64/include/asm/mmu.h +++ /dev/null @@ -1,14 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __MMU_H -#define __MMU_H - -/* - * Type for a context number. We declare it volatile to ensure proper - * ordering when it's accessed outside of spinlock'd critical sections - * (e.g., as done in activate_mm() and init_new_context()). - */ -typedef volatile unsigned long mm_context_t; - -typedef unsigned long nv_mm_context_t; - -#endif diff --git a/arch/ia64/include/asm/mmu_context.h b/arch/ia64/include/asm/mmu_context.h deleted file mode 100644 index 06257e355d00..000000000000 --- a/arch/ia64/include/asm/mmu_context.h +++ /dev/null @@ -1,194 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_IA64_MMU_CONTEXT_H -#define _ASM_IA64_MMU_CONTEXT_H - -/* - * Copyright (C) 1998-2002 Hewlett-Packard Co - * David Mosberger-Tang - */ - -/* - * Routines to manage the allocation of task context numbers. Task context - * numbers are used to reduce or eliminate the need to perform TLB flushes - * due to context switches. Context numbers are implemented using ia-64 - * region ids. Since the IA-64 TLB does not consider the region number when - * performing a TLB lookup, we need to assign a unique region id to each - * region in a process. We use the least significant three bits in aregion - * id for this purpose. - */ - -#define IA64_REGION_ID_KERNEL 0 /* the kernel's region id (tlb.c depends on this being 0) */ - -#define ia64_rid(ctx,addr) (((ctx) << 3) | (addr >> 61)) - -# include -# ifndef __ASSEMBLY__ - -#include -#include -#include -#include -#include - -#include -#include - -struct ia64_ctx { - spinlock_t lock; - unsigned int next; /* next context number to use */ - unsigned int limit; /* available free range */ - unsigned int max_ctx; /* max. context value supported by all CPUs */ - /* call wrap_mmu_context when next >= max */ - unsigned long *bitmap; /* bitmap size is max_ctx+1 */ - unsigned long *flushmap;/* pending rid to be flushed */ -}; - -extern struct ia64_ctx ia64_ctx; -DECLARE_PER_CPU(u8, ia64_need_tlb_flush); - -extern void mmu_context_init (void); -extern void wrap_mmu_context (struct mm_struct *mm); - -/* - * When the context counter wraps around all TLBs need to be flushed because - * an old context number might have been reused. This is signalled by the - * ia64_need_tlb_flush per-CPU variable, which is checked in the routine - * below. Called by activate_mm(). - */ -static inline void -delayed_tlb_flush (void) -{ - extern void local_flush_tlb_all (void); - unsigned long flags; - - if (unlikely(__ia64_per_cpu_var(ia64_need_tlb_flush))) { - spin_lock_irqsave(&ia64_ctx.lock, flags); - if (__ia64_per_cpu_var(ia64_need_tlb_flush)) { - local_flush_tlb_all(); - __ia64_per_cpu_var(ia64_need_tlb_flush) = 0; - } - spin_unlock_irqrestore(&ia64_ctx.lock, flags); - } -} - -static inline nv_mm_context_t -get_mmu_context (struct mm_struct *mm) -{ - unsigned long flags; - nv_mm_context_t context = mm->context; - - if (likely(context)) - goto out; - - spin_lock_irqsave(&ia64_ctx.lock, flags); - /* re-check, now that we've got the lock: */ - context = mm->context; - if (context == 0) { - cpumask_clear(mm_cpumask(mm)); - if (ia64_ctx.next >= ia64_ctx.limit) { - ia64_ctx.next = find_next_zero_bit(ia64_ctx.bitmap, - ia64_ctx.max_ctx, ia64_ctx.next); - ia64_ctx.limit = find_next_bit(ia64_ctx.bitmap, - ia64_ctx.max_ctx, ia64_ctx.next); - if (ia64_ctx.next >= ia64_ctx.max_ctx) - wrap_mmu_context(mm); - } - mm->context = context = ia64_ctx.next++; - __set_bit(context, ia64_ctx.bitmap); - } - spin_unlock_irqrestore(&ia64_ctx.lock, flags); -out: - /* - * Ensure we're not starting to use "context" before any old - * uses of it are gone from our TLB. - */ - delayed_tlb_flush(); - - return context; -} - -/* - * Initialize context number to some sane value. MM is guaranteed to be a - * brand-new address-space, so no TLB flushing is needed, ever. - */ -#define init_new_context init_new_context -static inline int -init_new_context (struct task_struct *p, struct mm_struct *mm) -{ - mm->context = 0; - return 0; -} - -static inline void -reload_context (nv_mm_context_t context) -{ - unsigned long rid; - unsigned long rid_incr = 0; - unsigned long rr0, rr1, rr2, rr3, rr4; - -#ifdef CONFIG_HUGETLB_PAGE - unsigned long old_rr4; - old_rr4 = ia64_get_rr(RGN_BASE(RGN_HPAGE)); -#endif - rid = context << 3; /* make space for encoding the region number */ - rid_incr = 1 << 8; - - /* encode the region id, preferred page size, and VHPT enable bit: */ - rr0 = (rid << 8) | (PAGE_SHIFT << 2) | 1; - rr1 = rr0 + 1*rid_incr; - rr2 = rr0 + 2*rid_incr; - rr3 = rr0 + 3*rid_incr; - rr4 = rr0 + 4*rid_incr; -#ifdef CONFIG_HUGETLB_PAGE - rr4 = (rr4 & (~(0xfcUL))) | (old_rr4 & 0xfc); - -# if RGN_HPAGE != 4 -# error "reload_context assumes RGN_HPAGE is 4" -# endif -#endif - - ia64_set_rr0_to_rr4(rr0, rr1, rr2, rr3, rr4); - ia64_srlz_i(); /* srlz.i implies srlz.d */ -} - -/* - * Must be called with preemption off - */ -static inline void -activate_context (struct mm_struct *mm) -{ - nv_mm_context_t context; - - do { - context = get_mmu_context(mm); - if (!cpumask_test_cpu(smp_processor_id(), mm_cpumask(mm))) - cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm)); - reload_context(context); - /* - * in the unlikely event of a TLB-flush by another thread, - * redo the load. - */ - } while (unlikely(context != mm->context)); -} - -/* - * Switch from address space PREV to address space NEXT. - */ -#define activate_mm activate_mm -static inline void -activate_mm (struct mm_struct *prev, struct mm_struct *next) -{ - /* - * We may get interrupts here, but that's OK because interrupt - * handlers cannot touch user-space. - */ - ia64_set_kr(IA64_KR_PT_BASE, __pa(next->pgd)); - activate_context(next); -} - -#define switch_mm(prev_mm,next_mm,next_task) activate_mm(prev_mm, next_mm) - -#include - -# endif /* ! __ASSEMBLY__ */ -#endif /* _ASM_IA64_MMU_CONTEXT_H */ diff --git a/arch/ia64/include/asm/mmzone.h b/arch/ia64/include/asm/mmzone.h deleted file mode 100644 index 767201f66c93..000000000000 --- a/arch/ia64/include/asm/mmzone.h +++ /dev/null @@ -1,35 +0,0 @@ -/* - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - * - * Copyright (c) 2000,2003 Silicon Graphics, Inc. All rights reserved. - * Copyright (c) 2002 NEC Corp. - * Copyright (c) 2002 Erich Focht - * Copyright (c) 2002 Kimio Suganuma - */ -#ifndef _ASM_IA64_MMZONE_H -#define _ASM_IA64_MMZONE_H - -#include -#include -#include - -#ifdef CONFIG_NUMA - -static inline int pfn_to_nid(unsigned long pfn) -{ - extern int paddr_to_nid(unsigned long); - int nid = paddr_to_nid(pfn << PAGE_SHIFT); - if (nid < 0) - return 0; - else - return nid; -} - -#define MAX_PHYSNODE_ID 2048 -#endif /* CONFIG_NUMA */ - -#define NR_NODE_MEMBLKS (MAX_NUMNODES * 4) - -#endif /* _ASM_IA64_MMZONE_H */ diff --git a/arch/ia64/include/asm/module.h b/arch/ia64/include/asm/module.h deleted file mode 100644 index 7271b9c5fc76..000000000000 --- a/arch/ia64/include/asm/module.h +++ /dev/null @@ -1,35 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_IA64_MODULE_H -#define _ASM_IA64_MODULE_H - -#include - -/* - * IA-64-specific support for kernel module loader. - * - * Copyright (C) 2003 Hewlett-Packard Co - * David Mosberger-Tang - */ - -struct elf64_shdr; /* forward declration */ - -struct mod_arch_specific { - /* Used only at module load time. */ - struct elf64_shdr *core_plt; /* core PLT section */ - struct elf64_shdr *init_plt; /* init PLT section */ - struct elf64_shdr *got; /* global offset table */ - struct elf64_shdr *opd; /* official procedure descriptors */ - struct elf64_shdr *unwind; /* unwind-table section */ - unsigned long gp; /* global-pointer for module */ - unsigned int next_got_entry; /* index of next available got entry */ - - /* Used at module run and cleanup time. */ - void *core_unw_table; /* core unwind-table cookie returned by unwinder */ - void *init_unw_table; /* init unwind-table cookie returned by unwinder */ - void *opd_addr; /* symbolize uses .opd to get to actual function */ - unsigned long opd_size; -}; - -#define ARCH_SHF_SMALL SHF_IA_64_SHORT - -#endif /* _ASM_IA64_MODULE_H */ diff --git a/arch/ia64/include/asm/module.lds.h b/arch/ia64/include/asm/module.lds.h deleted file mode 100644 index eff68f362793..000000000000 --- a/arch/ia64/include/asm/module.lds.h +++ /dev/null @@ -1,14 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -SECTIONS { - /* Group unwind sections into a single section: */ - .IA_64.unwind_info : { *(.IA_64.unwind_info*) } - .IA_64.unwind : { *(.IA_64.unwind*) } - /* - * Create place-holder sections to hold the PLTs, GOT, and - * official procedure-descriptors (.opd). - */ - .core.plt : { BYTE(0) } - .init.plt : { BYTE(0) } - .got : { BYTE(0) } - .opd : { BYTE(0) } -} diff --git a/arch/ia64/include/asm/msidef.h b/arch/ia64/include/asm/msidef.h deleted file mode 100644 index 18d0e4226748..000000000000 --- a/arch/ia64/include/asm/msidef.h +++ /dev/null @@ -1,43 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _IA64_MSI_DEF_H -#define _IA64_MSI_DEF_H - -/* - * Shifts for APIC-based data - */ - -#define MSI_DATA_VECTOR_SHIFT 0 -#define MSI_DATA_VECTOR(v) (((u8)v) << MSI_DATA_VECTOR_SHIFT) -#define MSI_DATA_VECTOR_MASK 0xffffff00 - -#define MSI_DATA_DELIVERY_MODE_SHIFT 8 -#define MSI_DATA_DELIVERY_FIXED (0 << MSI_DATA_DELIVERY_MODE_SHIFT) -#define MSI_DATA_DELIVERY_LOWPRI (1 << MSI_DATA_DELIVERY_MODE_SHIFT) - -#define MSI_DATA_LEVEL_SHIFT 14 -#define MSI_DATA_LEVEL_DEASSERT (0 << MSI_DATA_LEVEL_SHIFT) -#define MSI_DATA_LEVEL_ASSERT (1 << MSI_DATA_LEVEL_SHIFT) - -#define MSI_DATA_TRIGGER_SHIFT 15 -#define MSI_DATA_TRIGGER_EDGE (0 << MSI_DATA_TRIGGER_SHIFT) -#define MSI_DATA_TRIGGER_LEVEL (1 << MSI_DATA_TRIGGER_SHIFT) - -/* - * Shift/mask fields for APIC-based bus address - */ - -#define MSI_ADDR_DEST_ID_SHIFT 4 -#define MSI_ADDR_HEADER 0xfee00000 - -#define MSI_ADDR_DEST_ID_MASK 0xfff0000f -#define MSI_ADDR_DEST_ID_CPU(cpu) ((cpu) << MSI_ADDR_DEST_ID_SHIFT) - -#define MSI_ADDR_DEST_MODE_SHIFT 2 -#define MSI_ADDR_DEST_MODE_PHYS (0 << MSI_ADDR_DEST_MODE_SHIFT) -#define MSI_ADDR_DEST_MODE_LOGIC (1 << MSI_ADDR_DEST_MODE_SHIFT) - -#define MSI_ADDR_REDIRECTION_SHIFT 3 -#define MSI_ADDR_REDIRECTION_CPU (0 << MSI_ADDR_REDIRECTION_SHIFT) -#define MSI_ADDR_REDIRECTION_LOWPRI (1 << MSI_ADDR_REDIRECTION_SHIFT) - -#endif/* _IA64_MSI_DEF_H */ diff --git a/arch/ia64/include/asm/native/inst.h b/arch/ia64/include/asm/native/inst.h deleted file mode 100644 index e08662396029..000000000000 --- a/arch/ia64/include/asm/native/inst.h +++ /dev/null @@ -1,119 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/****************************************************************************** - * arch/ia64/include/asm/native/inst.h - * - * Copyright (c) 2008 Isaku Yamahata - * VA Linux Systems Japan K.K. - */ - -#define DO_SAVE_MIN IA64_NATIVE_DO_SAVE_MIN - -#define MOV_FROM_IFA(reg) \ - mov reg = cr.ifa - -#define MOV_FROM_ITIR(reg) \ - mov reg = cr.itir - -#define MOV_FROM_ISR(reg) \ - mov reg = cr.isr - -#define MOV_FROM_IHA(reg) \ - mov reg = cr.iha - -#define MOV_FROM_IPSR(pred, reg) \ -(pred) mov reg = cr.ipsr - -#define MOV_FROM_IIM(reg) \ - mov reg = cr.iim - -#define MOV_FROM_IIP(reg) \ - mov reg = cr.iip - -#define MOV_FROM_IVR(reg, clob) \ - mov reg = cr.ivr - -#define MOV_FROM_PSR(pred, reg, clob) \ -(pred) mov reg = psr - -#define MOV_FROM_ITC(pred, pred_clob, reg, clob) \ -(pred) mov reg = ar.itc - -#define MOV_TO_IFA(reg, clob) \ - mov cr.ifa = reg - -#define MOV_TO_ITIR(pred, reg, clob) \ -(pred) mov cr.itir = reg - -#define MOV_TO_IHA(pred, reg, clob) \ -(pred) mov cr.iha = reg - -#define MOV_TO_IPSR(pred, reg, clob) \ -(pred) mov cr.ipsr = reg - -#define MOV_TO_IFS(pred, reg, clob) \ -(pred) mov cr.ifs = reg - -#define MOV_TO_IIP(reg, clob) \ - mov cr.iip = reg - -#define MOV_TO_KR(kr, reg, clob0, clob1) \ - mov IA64_KR(kr) = reg - -#define ITC_I(pred, reg, clob) \ -(pred) itc.i reg - -#define ITC_D(pred, reg, clob) \ -(pred) itc.d reg - -#define ITC_I_AND_D(pred_i, pred_d, reg, clob) \ -(pred_i) itc.i reg; \ -(pred_d) itc.d reg - -#define THASH(pred, reg0, reg1, clob) \ -(pred) thash reg0 = reg1 - -#define SSM_PSR_IC_AND_DEFAULT_BITS_AND_SRLZ_I(clob0, clob1) \ - ssm psr.ic | PSR_DEFAULT_BITS \ - ;; \ - srlz.i /* guarantee that interruption collectin is on */ \ - ;; - -#define SSM_PSR_IC_AND_SRLZ_D(clob0, clob1) \ - ssm psr.ic \ - ;; \ - srlz.d - -#define RSM_PSR_IC(clob) \ - rsm psr.ic - -#define SSM_PSR_I(pred, pred_clob, clob) \ -(pred) ssm psr.i - -#define RSM_PSR_I(pred, clob0, clob1) \ -(pred) rsm psr.i - -#define RSM_PSR_I_IC(clob0, clob1, clob2) \ - rsm psr.i | psr.ic - -#define RSM_PSR_DT \ - rsm psr.dt - -#define RSM_PSR_BE_I(clob0, clob1) \ - rsm psr.be | psr.i - -#define SSM_PSR_DT_AND_SRLZ_I \ - ssm psr.dt \ - ;; \ - srlz.i - -#define BSW_0(clob0, clob1, clob2) \ - bsw.0 - -#define BSW_1(clob0, clob1) \ - bsw.1 - -#define COVER \ - cover - -#define RFI \ - rfi diff --git a/arch/ia64/include/asm/native/irq.h b/arch/ia64/include/asm/native/irq.h deleted file mode 100644 index aa74915f8aa2..000000000000 --- a/arch/ia64/include/asm/native/irq.h +++ /dev/null @@ -1,20 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/****************************************************************************** - * arch/ia64/include/asm/native/irq.h - * - * Copyright (c) 2008 Isaku Yamahata - * VA Linux Systems Japan K.K. - */ - -#ifndef _ASM_IA64_NATIVE_IRQ_H -#define _ASM_IA64_NATIVE_IRQ_H - -#define NR_VECTORS 256 - -#if (NR_VECTORS + 32 * NR_CPUS) < 1024 -#define IA64_NATIVE_NR_IRQS (NR_VECTORS + 32 * NR_CPUS) -#else -#define IA64_NATIVE_NR_IRQS 1024 -#endif - -#endif /* _ASM_IA64_NATIVE_IRQ_H */ diff --git a/arch/ia64/include/asm/native/patchlist.h b/arch/ia64/include/asm/native/patchlist.h deleted file mode 100644 index f13e7675758c..000000000000 --- a/arch/ia64/include/asm/native/patchlist.h +++ /dev/null @@ -1,24 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/****************************************************************************** - * arch/ia64/include/asm/native/inst.h - * - * Copyright (c) 2008 Isaku Yamahata - * VA Linux Systems Japan K.K. - */ - -#define __paravirt_start_gate_fsyscall_patchlist \ - __ia64_native_start_gate_fsyscall_patchlist -#define __paravirt_end_gate_fsyscall_patchlist \ - __ia64_native_end_gate_fsyscall_patchlist -#define __paravirt_start_gate_brl_fsys_bubble_down_patchlist \ - __ia64_native_start_gate_brl_fsys_bubble_down_patchlist -#define __paravirt_end_gate_brl_fsys_bubble_down_patchlist \ - __ia64_native_end_gate_brl_fsys_bubble_down_patchlist -#define __paravirt_start_gate_vtop_patchlist \ - __ia64_native_start_gate_vtop_patchlist -#define __paravirt_end_gate_vtop_patchlist \ - __ia64_native_end_gate_vtop_patchlist -#define __paravirt_start_gate_mckinley_e9_patchlist \ - __ia64_native_start_gate_mckinley_e9_patchlist -#define __paravirt_end_gate_mckinley_e9_patchlist \ - __ia64_native_end_gate_mckinley_e9_patchlist diff --git a/arch/ia64/include/asm/nodedata.h b/arch/ia64/include/asm/nodedata.h deleted file mode 100644 index 2fb337b0e9b7..000000000000 --- a/arch/ia64/include/asm/nodedata.h +++ /dev/null @@ -1,63 +0,0 @@ -/* - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - * - * Copyright (c) 2000 Silicon Graphics, Inc. All rights reserved. - * Copyright (c) 2002 NEC Corp. - * Copyright (c) 2002 Erich Focht - * Copyright (c) 2002 Kimio Suganuma - */ -#ifndef _ASM_IA64_NODEDATA_H -#define _ASM_IA64_NODEDATA_H - -#include - -#include -#include - -#ifdef CONFIG_NUMA - -/* - * Node Data. One of these structures is located on each node of a NUMA system. - */ - -struct pglist_data; -struct ia64_node_data { - short active_cpu_count; - short node; - struct pglist_data *pg_data_ptrs[MAX_NUMNODES]; -}; - - -/* - * Return a pointer to the node_data structure for the executing cpu. - */ -#define local_node_data (local_cpu_data->node_data) - -/* - * Given a node id, return a pointer to the pg_data_t for the node. - * - * NODE_DATA - should be used in all code not related to system - * initialization. It uses pernode data structures to minimize - * offnode memory references. However, these structure are not - * present during boot. This macro can be used once cpu_init - * completes. - */ -#define NODE_DATA(nid) (local_node_data->pg_data_ptrs[nid]) - -/* - * LOCAL_DATA_ADDR - This is to calculate the address of other node's - * "local_node_data" at hot-plug phase. The local_node_data - * is pointed by per_cpu_page. Kernel usually use it for - * just executing cpu. However, when new node is hot-added, - * the addresses of local data for other nodes are necessary - * to update all of them. - */ -#define LOCAL_DATA_ADDR(pgdat) \ - ((struct ia64_node_data *)((u64)(pgdat) + \ - L1_CACHE_ALIGN(sizeof(struct pglist_data)))) - -#endif /* CONFIG_NUMA */ - -#endif /* _ASM_IA64_NODEDATA_H */ diff --git a/arch/ia64/include/asm/numa.h b/arch/ia64/include/asm/numa.h deleted file mode 100644 index c5c253cb9bd6..000000000000 --- a/arch/ia64/include/asm/numa.h +++ /dev/null @@ -1,83 +0,0 @@ -/* - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - * - * This file contains NUMA specific prototypes and definitions. - * - * 2002/08/05 Erich Focht - * - */ -#ifndef _ASM_IA64_NUMA_H -#define _ASM_IA64_NUMA_H - - -#ifdef CONFIG_NUMA - -#include -#include -#include -#include -#include - -#include - -extern u16 cpu_to_node_map[NR_CPUS] __cacheline_aligned; -extern cpumask_t node_to_cpu_mask[MAX_NUMNODES] __cacheline_aligned; -extern pg_data_t *pgdat_list[MAX_NUMNODES]; - -/* Stuff below this line could be architecture independent */ - -extern int num_node_memblks; /* total number of memory chunks */ - -/* - * List of node memory chunks. Filled when parsing SRAT table to - * obtain information about memory nodes. -*/ - -struct node_memblk_s { - unsigned long start_paddr; - unsigned long size; - int nid; /* which logical node contains this chunk? */ - int bank; /* which mem bank on this node */ -}; - -struct node_cpuid_s { - u16 phys_id; /* id << 8 | eid */ - int nid; /* logical node containing this CPU */ -}; - -extern struct node_memblk_s node_memblk[NR_NODE_MEMBLKS]; -extern struct node_cpuid_s node_cpuid[NR_CPUS]; - -/* - * ACPI 2.0 SLIT (System Locality Information Table) - * http://devresource.hp.com/devresource/Docs/TechPapers/IA64/slit.pdf - * - * This is a matrix with "distances" between nodes, they should be - * proportional to the memory access latency ratios. - */ - -extern u8 numa_slit[MAX_NUMNODES * MAX_NUMNODES]; -#define slit_distance(from,to) (numa_slit[(from) * MAX_NUMNODES + (to)]) -extern int __node_distance(int from, int to); -#define node_distance(from,to) __node_distance(from, to) - -extern int paddr_to_nid(unsigned long paddr); - -#define local_nodeid (cpu_to_node_map[smp_processor_id()]) - -#define numa_off 0 - -extern void map_cpu_to_node(int cpu, int nid); -extern void unmap_cpu_from_node(int cpu, int nid); -extern void numa_clear_node(int cpu); - -#else /* !CONFIG_NUMA */ -#define map_cpu_to_node(cpu, nid) do{}while(0) -#define unmap_cpu_from_node(cpu, nid) do{}while(0) -#define paddr_to_nid(addr) 0 -#define numa_clear_node(cpu) do { } while (0) -#endif /* CONFIG_NUMA */ - -#endif /* _ASM_IA64_NUMA_H */ diff --git a/arch/ia64/include/asm/page.h b/arch/ia64/include/asm/page.h deleted file mode 100644 index 310b09c3342d..000000000000 --- a/arch/ia64/include/asm/page.h +++ /dev/null @@ -1,208 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_IA64_PAGE_H -#define _ASM_IA64_PAGE_H -/* - * Pagetable related stuff. - * - * Copyright (C) 1998, 1999, 2002 Hewlett-Packard Co - * David Mosberger-Tang - */ - -#include -#include - -/* - * The top three bits of an IA64 address are its Region Number. - * Different regions are assigned to different purposes. - */ -#define RGN_SHIFT (61) -#define RGN_BASE(r) (__IA64_UL_CONST(r)<> PAGE_SHIFT) - -#include - -#define page_to_phys(page) (page_to_pfn(page) << PAGE_SHIFT) -#define virt_to_page(kaddr) pfn_to_page(__pa(kaddr) >> PAGE_SHIFT) -#define pfn_to_kaddr(pfn) __va((pfn) << PAGE_SHIFT) - -typedef union ia64_va { - struct { - unsigned long off : 61; /* intra-region offset */ - unsigned long reg : 3; /* region number */ - } f; - unsigned long l; - void *p; -} ia64_va; - -/* - * Note: These macros depend on the fact that PAGE_OFFSET has all - * region bits set to 1 and all other bits set to zero. They are - * expressed in this way to ensure they result in a single "dep" - * instruction. - */ -#define __pa(x) ({ia64_va _v; _v.l = (long) (x); _v.f.reg = 0; _v.l;}) -#define __va(x) ({ia64_va _v; _v.l = (long) (x); _v.f.reg = -1; _v.p;}) - -#define REGION_NUMBER(x) ({ia64_va _v; _v.l = (long) (x); _v.f.reg;}) -#define REGION_OFFSET(x) ({ia64_va _v; _v.l = (long) (x); _v.f.off;}) - -#ifdef CONFIG_HUGETLB_PAGE -# define htlbpage_to_page(x) (((unsigned long) REGION_NUMBER(x) << 61) \ - | (REGION_OFFSET(x) >> (HPAGE_SHIFT-PAGE_SHIFT))) -# define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT) -extern unsigned int hpage_shift; -#endif - -static __inline__ int -get_order (unsigned long size) -{ - long double d = size - 1; - long order; - - order = ia64_getf_exp(d); - order = order - PAGE_SHIFT - 0xffff + 1; - if (order < 0) - order = 0; - return order; -} - -#endif /* !__ASSEMBLY__ */ - -#ifdef STRICT_MM_TYPECHECKS - /* - * These are used to make use of C type-checking.. - */ - typedef struct { unsigned long pte; } pte_t; - typedef struct { unsigned long pmd; } pmd_t; -#if CONFIG_PGTABLE_LEVELS == 4 - typedef struct { unsigned long pud; } pud_t; -#endif - typedef struct { unsigned long pgd; } pgd_t; - typedef struct { unsigned long pgprot; } pgprot_t; - typedef struct page *pgtable_t; - -# define pte_val(x) ((x).pte) -# define pmd_val(x) ((x).pmd) -#if CONFIG_PGTABLE_LEVELS == 4 -# define pud_val(x) ((x).pud) -#endif -# define pgd_val(x) ((x).pgd) -# define pgprot_val(x) ((x).pgprot) - -# define __pte(x) ((pte_t) { (x) } ) -# define __pmd(x) ((pmd_t) { (x) } ) -# define __pgprot(x) ((pgprot_t) { (x) } ) - -#else /* !STRICT_MM_TYPECHECKS */ - /* - * .. while these make it easier on the compiler - */ -# ifndef __ASSEMBLY__ - typedef unsigned long pte_t; - typedef unsigned long pmd_t; - typedef unsigned long pgd_t; - typedef unsigned long pgprot_t; - typedef struct page *pgtable_t; -# endif - -# define pte_val(x) (x) -# define pmd_val(x) (x) -# define pgd_val(x) (x) -# define pgprot_val(x) (x) - -# define __pte(x) (x) -# define __pgd(x) (x) -# define __pgprot(x) (x) -#endif /* !STRICT_MM_TYPECHECKS */ - -#define PAGE_OFFSET RGN_BASE(RGN_KERNEL) - -#define VM_DATA_DEFAULT_FLAGS VM_DATA_FLAGS_TSK_EXEC - -#define GATE_ADDR RGN_BASE(RGN_GATE) - -/* - * 0xa000000000000000+2*PERCPU_PAGE_SIZE - * - 0xa000000000000000+3*PERCPU_PAGE_SIZE remain unmapped (guard page) - */ -#define KERNEL_START (GATE_ADDR+__IA64_UL_CONST(0x100000000)) -#define PERCPU_ADDR (-PERCPU_PAGE_SIZE) -#define LOAD_OFFSET (KERNEL_START - KERNEL_TR_PAGE_SIZE) - -#define __HAVE_ARCH_GATE_AREA 1 - -#endif /* _ASM_IA64_PAGE_H */ diff --git a/arch/ia64/include/asm/pal.h b/arch/ia64/include/asm/pal.h deleted file mode 100644 index e6b652f9e45e..000000000000 --- a/arch/ia64/include/asm/pal.h +++ /dev/null @@ -1,1827 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_IA64_PAL_H -#define _ASM_IA64_PAL_H - -/* - * Processor Abstraction Layer definitions. - * - * This is based on Intel IA-64 Architecture Software Developer's Manual rev 1.0 - * chapter 11 IA-64 Processor Abstraction Layer - * - * Copyright (C) 1998-2001 Hewlett-Packard Co - * David Mosberger-Tang - * Stephane Eranian - * Copyright (C) 1999 VA Linux Systems - * Copyright (C) 1999 Walt Drummond - * Copyright (C) 1999 Srinivasa Prasad Thirumalachar - * Copyright (C) 2008 Silicon Graphics, Inc. (SGI) - * - * 99/10/01 davidm Make sure we pass zero for reserved parameters. - * 00/03/07 davidm Updated pal_cache_flush() to be in sync with PAL v2.6. - * 00/03/23 cfleck Modified processor min-state save area to match updated PAL & SAL info - * 00/05/24 eranian Updated to latest PAL spec, fix structures bugs, added - * 00/05/25 eranian Support for stack calls, and static physical calls - * 00/06/18 eranian Support for stacked physical calls - * 06/10/26 rja Support for Intel Itanium Architecture Software Developer's - * Manual Rev 2.2 (Jan 2006) - */ - -/* - * Note that some of these calls use a static-register only calling - * convention which has nothing to do with the regular calling - * convention. - */ -#define PAL_CACHE_FLUSH 1 /* flush i/d cache */ -#define PAL_CACHE_INFO 2 /* get detailed i/d cache info */ -#define PAL_CACHE_INIT 3 /* initialize i/d cache */ -#define PAL_CACHE_SUMMARY 4 /* get summary of cache hierarchy */ -#define PAL_MEM_ATTRIB 5 /* list supported memory attributes */ -#define PAL_PTCE_INFO 6 /* purge TLB info */ -#define PAL_VM_INFO 7 /* return supported virtual memory features */ -#define PAL_VM_SUMMARY 8 /* return summary on supported vm features */ -#define PAL_BUS_GET_FEATURES 9 /* return processor bus interface features settings */ -#define PAL_BUS_SET_FEATURES 10 /* set processor bus features */ -#define PAL_DEBUG_INFO 11 /* get number of debug registers */ -#define PAL_FIXED_ADDR 12 /* get fixed component of processors's directed address */ -#define PAL_FREQ_BASE 13 /* base frequency of the platform */ -#define PAL_FREQ_RATIOS 14 /* ratio of processor, bus and ITC frequency */ -#define PAL_PERF_MON_INFO 15 /* return performance monitor info */ -#define PAL_PLATFORM_ADDR 16 /* set processor interrupt block and IO port space addr */ -#define PAL_PROC_GET_FEATURES 17 /* get configurable processor features & settings */ -#define PAL_PROC_SET_FEATURES 18 /* enable/disable configurable processor features */ -#define PAL_RSE_INFO 19 /* return rse information */ -#define PAL_VERSION 20 /* return version of PAL code */ -#define PAL_MC_CLEAR_LOG 21 /* clear all processor log info */ -#define PAL_MC_DRAIN 22 /* drain operations which could result in an MCA */ -#define PAL_MC_EXPECTED 23 /* set/reset expected MCA indicator */ -#define PAL_MC_DYNAMIC_STATE 24 /* get processor dynamic state */ -#define PAL_MC_ERROR_INFO 25 /* get processor MCA info and static state */ -#define PAL_MC_RESUME 26 /* Return to interrupted process */ -#define PAL_MC_REGISTER_MEM 27 /* Register memory for PAL to use during MCAs and inits */ -#define PAL_HALT 28 /* enter the low power HALT state */ -#define PAL_HALT_LIGHT 29 /* enter the low power light halt state*/ -#define PAL_COPY_INFO 30 /* returns info needed to relocate PAL */ -#define PAL_CACHE_LINE_INIT 31 /* init tags & data of cache line */ -#define PAL_PMI_ENTRYPOINT 32 /* register PMI memory entry points with the processor */ -#define PAL_ENTER_IA_32_ENV 33 /* enter IA-32 system environment */ -#define PAL_VM_PAGE_SIZE 34 /* return vm TC and page walker page sizes */ - -#define PAL_MEM_FOR_TEST 37 /* get amount of memory needed for late processor test */ -#define PAL_CACHE_PROT_INFO 38 /* get i/d cache protection info */ -#define PAL_REGISTER_INFO 39 /* return AR and CR register information*/ -#define PAL_SHUTDOWN 40 /* enter processor shutdown state */ -#define PAL_PREFETCH_VISIBILITY 41 /* Make Processor Prefetches Visible */ -#define PAL_LOGICAL_TO_PHYSICAL 42 /* returns information on logical to physical processor mapping */ -#define PAL_CACHE_SHARED_INFO 43 /* returns information on caches shared by logical processor */ -#define PAL_GET_HW_POLICY 48 /* Get current hardware resource sharing policy */ -#define PAL_SET_HW_POLICY 49 /* Set current hardware resource sharing policy */ -#define PAL_VP_INFO 50 /* Information about virtual processor features */ -#define PAL_MC_HW_TRACKING 51 /* Hardware tracking status */ - -#define PAL_COPY_PAL 256 /* relocate PAL procedures and PAL PMI */ -#define PAL_HALT_INFO 257 /* return the low power capabilities of processor */ -#define PAL_TEST_PROC 258 /* perform late processor self-test */ -#define PAL_CACHE_READ 259 /* read tag & data of cacheline for diagnostic testing */ -#define PAL_CACHE_WRITE 260 /* write tag & data of cacheline for diagnostic testing */ -#define PAL_VM_TR_READ 261 /* read contents of translation register */ -#define PAL_GET_PSTATE 262 /* get the current P-state */ -#define PAL_SET_PSTATE 263 /* set the P-state */ -#define PAL_BRAND_INFO 274 /* Processor branding information */ - -#define PAL_GET_PSTATE_TYPE_LASTSET 0 -#define PAL_GET_PSTATE_TYPE_AVGANDRESET 1 -#define PAL_GET_PSTATE_TYPE_AVGNORESET 2 -#define PAL_GET_PSTATE_TYPE_INSTANT 3 - -#define PAL_MC_ERROR_INJECT 276 /* Injects processor error or returns injection capabilities */ - -#ifndef __ASSEMBLY__ - -#include -#include -#include - -/* - * Data types needed to pass information into PAL procedures and - * interpret information returned by them. - */ - -/* Return status from the PAL procedure */ -typedef s64 pal_status_t; - -#define PAL_STATUS_SUCCESS 0 /* No error */ -#define PAL_STATUS_UNIMPLEMENTED (-1) /* Unimplemented procedure */ -#define PAL_STATUS_EINVAL (-2) /* Invalid argument */ -#define PAL_STATUS_ERROR (-3) /* Error */ -#define PAL_STATUS_CACHE_INIT_FAIL (-4) /* Could not initialize the - * specified level and type of - * cache without sideeffects - * and "restrict" was 1 - */ -#define PAL_STATUS_REQUIRES_MEMORY (-9) /* Call requires PAL memory buffer */ - -/* Processor cache level in the hierarchy */ -typedef u64 pal_cache_level_t; -#define PAL_CACHE_LEVEL_L0 0 /* L0 */ -#define PAL_CACHE_LEVEL_L1 1 /* L1 */ -#define PAL_CACHE_LEVEL_L2 2 /* L2 */ - - -/* Processor cache type at a particular level in the hierarchy */ - -typedef u64 pal_cache_type_t; -#define PAL_CACHE_TYPE_INSTRUCTION 1 /* Instruction cache */ -#define PAL_CACHE_TYPE_DATA 2 /* Data or unified cache */ -#define PAL_CACHE_TYPE_INSTRUCTION_DATA 3 /* Both Data & Instruction */ - - -#define PAL_CACHE_FLUSH_INVALIDATE 1 /* Invalidate clean lines */ -#define PAL_CACHE_FLUSH_CHK_INTRS 2 /* check for interrupts/mc while flushing */ - -/* Processor cache line size in bytes */ -typedef int pal_cache_line_size_t; - -/* Processor cache line state */ -typedef u64 pal_cache_line_state_t; -#define PAL_CACHE_LINE_STATE_INVALID 0 /* Invalid */ -#define PAL_CACHE_LINE_STATE_SHARED 1 /* Shared */ -#define PAL_CACHE_LINE_STATE_EXCLUSIVE 2 /* Exclusive */ -#define PAL_CACHE_LINE_STATE_MODIFIED 3 /* Modified */ - -typedef struct pal_freq_ratio { - u32 den, num; /* numerator & denominator */ -} itc_ratio, proc_ratio; - -typedef union pal_cache_config_info_1_s { - struct { - u64 u : 1, /* 0 Unified cache ? */ - at : 2, /* 2-1 Cache mem attr*/ - reserved : 5, /* 7-3 Reserved */ - associativity : 8, /* 16-8 Associativity*/ - line_size : 8, /* 23-17 Line size */ - stride : 8, /* 31-24 Stride */ - store_latency : 8, /*39-32 Store latency*/ - load_latency : 8, /* 47-40 Load latency*/ - store_hints : 8, /* 55-48 Store hints*/ - load_hints : 8; /* 63-56 Load hints */ - } pcci1_bits; - u64 pcci1_data; -} pal_cache_config_info_1_t; - -typedef union pal_cache_config_info_2_s { - struct { - u32 cache_size; /*cache size in bytes*/ - - - u32 alias_boundary : 8, /* 39-32 aliased addr - * separation for max - * performance. - */ - tag_ls_bit : 8, /* 47-40 LSb of addr*/ - tag_ms_bit : 8, /* 55-48 MSb of addr*/ - reserved : 8; /* 63-56 Reserved */ - } pcci2_bits; - u64 pcci2_data; -} pal_cache_config_info_2_t; - - -typedef struct pal_cache_config_info_s { - pal_status_t pcci_status; - pal_cache_config_info_1_t pcci_info_1; - pal_cache_config_info_2_t pcci_info_2; - u64 pcci_reserved; -} pal_cache_config_info_t; - -#define pcci_ld_hints pcci_info_1.pcci1_bits.load_hints -#define pcci_st_hints pcci_info_1.pcci1_bits.store_hints -#define pcci_ld_latency pcci_info_1.pcci1_bits.load_latency -#define pcci_st_latency pcci_info_1.pcci1_bits.store_latency -#define pcci_stride pcci_info_1.pcci1_bits.stride -#define pcci_line_size pcci_info_1.pcci1_bits.line_size -#define pcci_assoc pcci_info_1.pcci1_bits.associativity -#define pcci_cache_attr pcci_info_1.pcci1_bits.at -#define pcci_unified pcci_info_1.pcci1_bits.u -#define pcci_tag_msb pcci_info_2.pcci2_bits.tag_ms_bit -#define pcci_tag_lsb pcci_info_2.pcci2_bits.tag_ls_bit -#define pcci_alias_boundary pcci_info_2.pcci2_bits.alias_boundary -#define pcci_cache_size pcci_info_2.pcci2_bits.cache_size - - - -/* Possible values for cache attributes */ - -#define PAL_CACHE_ATTR_WT 0 /* Write through cache */ -#define PAL_CACHE_ATTR_WB 1 /* Write back cache */ -#define PAL_CACHE_ATTR_WT_OR_WB 2 /* Either write thru or write - * back depending on TLB - * memory attributes - */ - - -/* Possible values for cache hints */ - -#define PAL_CACHE_HINT_TEMP_1 0 /* Temporal level 1 */ -#define PAL_CACHE_HINT_NTEMP_1 1 /* Non-temporal level 1 */ -#define PAL_CACHE_HINT_NTEMP_ALL 3 /* Non-temporal all levels */ - -/* Processor cache protection information */ -typedef union pal_cache_protection_element_u { - u32 pcpi_data; - struct { - u32 data_bits : 8, /* # data bits covered by - * each unit of protection - */ - - tagprot_lsb : 6, /* Least -do- */ - tagprot_msb : 6, /* Most Sig. tag address - * bit that this - * protection covers. - */ - prot_bits : 6, /* # of protection bits */ - method : 4, /* Protection method */ - t_d : 2; /* Indicates which part - * of the cache this - * protection encoding - * applies. - */ - } pcp_info; -} pal_cache_protection_element_t; - -#define pcpi_cache_prot_part pcp_info.t_d -#define pcpi_prot_method pcp_info.method -#define pcpi_prot_bits pcp_info.prot_bits -#define pcpi_tagprot_msb pcp_info.tagprot_msb -#define pcpi_tagprot_lsb pcp_info.tagprot_lsb -#define pcpi_data_bits pcp_info.data_bits - -/* Processor cache part encodings */ -#define PAL_CACHE_PROT_PART_DATA 0 /* Data protection */ -#define PAL_CACHE_PROT_PART_TAG 1 /* Tag protection */ -#define PAL_CACHE_PROT_PART_TAG_DATA 2 /* Tag+data protection (tag is - * more significant ) - */ -#define PAL_CACHE_PROT_PART_DATA_TAG 3 /* Data+tag protection (data is - * more significant ) - */ -#define PAL_CACHE_PROT_PART_MAX 6 - - -typedef struct pal_cache_protection_info_s { - pal_status_t pcpi_status; - pal_cache_protection_element_t pcp_info[PAL_CACHE_PROT_PART_MAX]; -} pal_cache_protection_info_t; - - -/* Processor cache protection method encodings */ -#define PAL_CACHE_PROT_METHOD_NONE 0 /* No protection */ -#define PAL_CACHE_PROT_METHOD_ODD_PARITY 1 /* Odd parity */ -#define PAL_CACHE_PROT_METHOD_EVEN_PARITY 2 /* Even parity */ -#define PAL_CACHE_PROT_METHOD_ECC 3 /* ECC protection */ - - -/* Processor cache line identification in the hierarchy */ -typedef union pal_cache_line_id_u { - u64 pclid_data; - struct { - u64 cache_type : 8, /* 7-0 cache type */ - level : 8, /* 15-8 level of the - * cache in the - * hierarchy. - */ - way : 8, /* 23-16 way in the set - */ - part : 8, /* 31-24 part of the - * cache - */ - reserved : 32; /* 63-32 is reserved*/ - } pclid_info_read; - struct { - u64 cache_type : 8, /* 7-0 cache type */ - level : 8, /* 15-8 level of the - * cache in the - * hierarchy. - */ - way : 8, /* 23-16 way in the set - */ - part : 8, /* 31-24 part of the - * cache - */ - mesi : 8, /* 39-32 cache line - * state - */ - start : 8, /* 47-40 lsb of data to - * invert - */ - length : 8, /* 55-48 #bits to - * invert - */ - trigger : 8; /* 63-56 Trigger error - * by doing a load - * after the write - */ - - } pclid_info_write; -} pal_cache_line_id_u_t; - -#define pclid_read_part pclid_info_read.part -#define pclid_read_way pclid_info_read.way -#define pclid_read_level pclid_info_read.level -#define pclid_read_cache_type pclid_info_read.cache_type - -#define pclid_write_trigger pclid_info_write.trigger -#define pclid_write_length pclid_info_write.length -#define pclid_write_start pclid_info_write.start -#define pclid_write_mesi pclid_info_write.mesi -#define pclid_write_part pclid_info_write.part -#define pclid_write_way pclid_info_write.way -#define pclid_write_level pclid_info_write.level -#define pclid_write_cache_type pclid_info_write.cache_type - -/* Processor cache line part encodings */ -#define PAL_CACHE_LINE_ID_PART_DATA 0 /* Data */ -#define PAL_CACHE_LINE_ID_PART_TAG 1 /* Tag */ -#define PAL_CACHE_LINE_ID_PART_DATA_PROT 2 /* Data protection */ -#define PAL_CACHE_LINE_ID_PART_TAG_PROT 3 /* Tag protection */ -#define PAL_CACHE_LINE_ID_PART_DATA_TAG_PROT 4 /* Data+tag - * protection - */ -typedef struct pal_cache_line_info_s { - pal_status_t pcli_status; /* Return status of the read cache line - * info call. - */ - u64 pcli_data; /* 64-bit data, tag, protection bits .. */ - u64 pcli_data_len; /* data length in bits */ - pal_cache_line_state_t pcli_cache_line_state; /* mesi state */ - -} pal_cache_line_info_t; - - -/* Machine Check related crap */ - -/* Pending event status bits */ -typedef u64 pal_mc_pending_events_t; - -#define PAL_MC_PENDING_MCA (1 << 0) -#define PAL_MC_PENDING_INIT (1 << 1) - -/* Error information type */ -typedef u64 pal_mc_info_index_t; - -#define PAL_MC_INFO_PROCESSOR 0 /* Processor */ -#define PAL_MC_INFO_CACHE_CHECK 1 /* Cache check */ -#define PAL_MC_INFO_TLB_CHECK 2 /* Tlb check */ -#define PAL_MC_INFO_BUS_CHECK 3 /* Bus check */ -#define PAL_MC_INFO_REQ_ADDR 4 /* Requestor address */ -#define PAL_MC_INFO_RESP_ADDR 5 /* Responder address */ -#define PAL_MC_INFO_TARGET_ADDR 6 /* Target address */ -#define PAL_MC_INFO_IMPL_DEP 7 /* Implementation - * dependent - */ - -#define PAL_TLB_CHECK_OP_PURGE 8 - -typedef struct pal_process_state_info_s { - u64 reserved1 : 2, - rz : 1, /* PAL_CHECK processor - * rendezvous - * successful. - */ - - ra : 1, /* PAL_CHECK attempted - * a rendezvous. - */ - me : 1, /* Distinct multiple - * errors occurred - */ - - mn : 1, /* Min. state save - * area has been - * registered with PAL - */ - - sy : 1, /* Storage integrity - * synched - */ - - - co : 1, /* Continuable */ - ci : 1, /* MC isolated */ - us : 1, /* Uncontained storage - * damage. - */ - - - hd : 1, /* Non-essential hw - * lost (no loss of - * functionality) - * causing the - * processor to run in - * degraded mode. - */ - - tl : 1, /* 1 => MC occurred - * after an instr was - * executed but before - * the trap that - * resulted from instr - * execution was - * generated. - * (Trap Lost ) - */ - mi : 1, /* More information available - * call PAL_MC_ERROR_INFO - */ - pi : 1, /* Precise instruction pointer */ - pm : 1, /* Precise min-state save area */ - - dy : 1, /* Processor dynamic - * state valid - */ - - - in : 1, /* 0 = MC, 1 = INIT */ - rs : 1, /* RSE valid */ - cm : 1, /* MC corrected */ - ex : 1, /* MC is expected */ - cr : 1, /* Control regs valid*/ - pc : 1, /* Perf cntrs valid */ - dr : 1, /* Debug regs valid */ - tr : 1, /* Translation regs - * valid - */ - rr : 1, /* Region regs valid */ - ar : 1, /* App regs valid */ - br : 1, /* Branch regs valid */ - pr : 1, /* Predicate registers - * valid - */ - - fp : 1, /* fp registers valid*/ - b1 : 1, /* Preserved bank one - * general registers - * are valid - */ - b0 : 1, /* Preserved bank zero - * general registers - * are valid - */ - gr : 1, /* General registers - * are valid - * (excl. banked regs) - */ - dsize : 16, /* size of dynamic - * state returned - * by the processor - */ - - se : 1, /* Shared error. MCA in a - shared structure */ - reserved2 : 10, - cc : 1, /* Cache check */ - tc : 1, /* TLB check */ - bc : 1, /* Bus check */ - rc : 1, /* Register file check */ - uc : 1; /* Uarch check */ - -} pal_processor_state_info_t; - -typedef struct pal_cache_check_info_s { - u64 op : 4, /* Type of cache - * operation that - * caused the machine - * check. - */ - level : 2, /* Cache level */ - reserved1 : 2, - dl : 1, /* Failure in data part - * of cache line - */ - tl : 1, /* Failure in tag part - * of cache line - */ - dc : 1, /* Failure in dcache */ - ic : 1, /* Failure in icache */ - mesi : 3, /* Cache line state */ - mv : 1, /* mesi valid */ - way : 5, /* Way in which the - * error occurred - */ - wiv : 1, /* Way field valid */ - reserved2 : 1, - dp : 1, /* Data poisoned on MBE */ - reserved3 : 6, - hlth : 2, /* Health indicator */ - - index : 20, /* Cache line index */ - reserved4 : 2, - - is : 1, /* instruction set (1 == ia32) */ - iv : 1, /* instruction set field valid */ - pl : 2, /* privilege level */ - pv : 1, /* privilege level field valid */ - mcc : 1, /* Machine check corrected */ - tv : 1, /* Target address - * structure is valid - */ - rq : 1, /* Requester identifier - * structure is valid - */ - rp : 1, /* Responder identifier - * structure is valid - */ - pi : 1; /* Precise instruction pointer - * structure is valid - */ -} pal_cache_check_info_t; - -typedef struct pal_tlb_check_info_s { - - u64 tr_slot : 8, /* Slot# of TR where - * error occurred - */ - trv : 1, /* tr_slot field is valid */ - reserved1 : 1, - level : 2, /* TLB level where failure occurred */ - reserved2 : 4, - dtr : 1, /* Fail in data TR */ - itr : 1, /* Fail in inst TR */ - dtc : 1, /* Fail in data TC */ - itc : 1, /* Fail in inst. TC */ - op : 4, /* Cache operation */ - reserved3 : 6, - hlth : 2, /* Health indicator */ - reserved4 : 22, - - is : 1, /* instruction set (1 == ia32) */ - iv : 1, /* instruction set field valid */ - pl : 2, /* privilege level */ - pv : 1, /* privilege level field valid */ - mcc : 1, /* Machine check corrected */ - tv : 1, /* Target address - * structure is valid - */ - rq : 1, /* Requester identifier - * structure is valid - */ - rp : 1, /* Responder identifier - * structure is valid - */ - pi : 1; /* Precise instruction pointer - * structure is valid - */ -} pal_tlb_check_info_t; - -typedef struct pal_bus_check_info_s { - u64 size : 5, /* Xaction size */ - ib : 1, /* Internal bus error */ - eb : 1, /* External bus error */ - cc : 1, /* Error occurred - * during cache-cache - * transfer. - */ - type : 8, /* Bus xaction type*/ - sev : 5, /* Bus error severity*/ - hier : 2, /* Bus hierarchy level */ - dp : 1, /* Data poisoned on MBE */ - bsi : 8, /* Bus error status - * info - */ - reserved2 : 22, - - is : 1, /* instruction set (1 == ia32) */ - iv : 1, /* instruction set field valid */ - pl : 2, /* privilege level */ - pv : 1, /* privilege level field valid */ - mcc : 1, /* Machine check corrected */ - tv : 1, /* Target address - * structure is valid - */ - rq : 1, /* Requester identifier - * structure is valid - */ - rp : 1, /* Responder identifier - * structure is valid - */ - pi : 1; /* Precise instruction pointer - * structure is valid - */ -} pal_bus_check_info_t; - -typedef struct pal_reg_file_check_info_s { - u64 id : 4, /* Register file identifier */ - op : 4, /* Type of register - * operation that - * caused the machine - * check. - */ - reg_num : 7, /* Register number */ - rnv : 1, /* reg_num valid */ - reserved2 : 38, - - is : 1, /* instruction set (1 == ia32) */ - iv : 1, /* instruction set field valid */ - pl : 2, /* privilege level */ - pv : 1, /* privilege level field valid */ - mcc : 1, /* Machine check corrected */ - reserved3 : 3, - pi : 1; /* Precise instruction pointer - * structure is valid - */ -} pal_reg_file_check_info_t; - -typedef struct pal_uarch_check_info_s { - u64 sid : 5, /* Structure identification */ - level : 3, /* Level of failure */ - array_id : 4, /* Array identification */ - op : 4, /* Type of - * operation that - * caused the machine - * check. - */ - way : 6, /* Way of structure */ - wv : 1, /* way valid */ - xv : 1, /* index valid */ - reserved1 : 6, - hlth : 2, /* Health indicator */ - index : 8, /* Index or set of the uarch - * structure that failed. - */ - reserved2 : 24, - - is : 1, /* instruction set (1 == ia32) */ - iv : 1, /* instruction set field valid */ - pl : 2, /* privilege level */ - pv : 1, /* privilege level field valid */ - mcc : 1, /* Machine check corrected */ - tv : 1, /* Target address - * structure is valid - */ - rq : 1, /* Requester identifier - * structure is valid - */ - rp : 1, /* Responder identifier - * structure is valid - */ - pi : 1; /* Precise instruction pointer - * structure is valid - */ -} pal_uarch_check_info_t; - -typedef union pal_mc_error_info_u { - u64 pmei_data; - pal_processor_state_info_t pme_processor; - pal_cache_check_info_t pme_cache; - pal_tlb_check_info_t pme_tlb; - pal_bus_check_info_t pme_bus; - pal_reg_file_check_info_t pme_reg_file; - pal_uarch_check_info_t pme_uarch; -} pal_mc_error_info_t; - -#define pmci_proc_unknown_check pme_processor.uc -#define pmci_proc_bus_check pme_processor.bc -#define pmci_proc_tlb_check pme_processor.tc -#define pmci_proc_cache_check pme_processor.cc -#define pmci_proc_dynamic_state_size pme_processor.dsize -#define pmci_proc_gpr_valid pme_processor.gr -#define pmci_proc_preserved_bank0_gpr_valid pme_processor.b0 -#define pmci_proc_preserved_bank1_gpr_valid pme_processor.b1 -#define pmci_proc_fp_valid pme_processor.fp -#define pmci_proc_predicate_regs_valid pme_processor.pr -#define pmci_proc_branch_regs_valid pme_processor.br -#define pmci_proc_app_regs_valid pme_processor.ar -#define pmci_proc_region_regs_valid pme_processor.rr -#define pmci_proc_translation_regs_valid pme_processor.tr -#define pmci_proc_debug_regs_valid pme_processor.dr -#define pmci_proc_perf_counters_valid pme_processor.pc -#define pmci_proc_control_regs_valid pme_processor.cr -#define pmci_proc_machine_check_expected pme_processor.ex -#define pmci_proc_machine_check_corrected pme_processor.cm -#define pmci_proc_rse_valid pme_processor.rs -#define pmci_proc_machine_check_or_init pme_processor.in -#define pmci_proc_dynamic_state_valid pme_processor.dy -#define pmci_proc_operation pme_processor.op -#define pmci_proc_trap_lost pme_processor.tl -#define pmci_proc_hardware_damage pme_processor.hd -#define pmci_proc_uncontained_storage_damage pme_processor.us -#define pmci_proc_machine_check_isolated pme_processor.ci -#define pmci_proc_continuable pme_processor.co -#define pmci_proc_storage_intergrity_synced pme_processor.sy -#define pmci_proc_min_state_save_area_regd pme_processor.mn -#define pmci_proc_distinct_multiple_errors pme_processor.me -#define pmci_proc_pal_attempted_rendezvous pme_processor.ra -#define pmci_proc_pal_rendezvous_complete pme_processor.rz - - -#define pmci_cache_level pme_cache.level -#define pmci_cache_line_state pme_cache.mesi -#define pmci_cache_line_state_valid pme_cache.mv -#define pmci_cache_line_index pme_cache.index -#define pmci_cache_instr_cache_fail pme_cache.ic -#define pmci_cache_data_cache_fail pme_cache.dc -#define pmci_cache_line_tag_fail pme_cache.tl -#define pmci_cache_line_data_fail pme_cache.dl -#define pmci_cache_operation pme_cache.op -#define pmci_cache_way_valid pme_cache.wv -#define pmci_cache_target_address_valid pme_cache.tv -#define pmci_cache_way pme_cache.way -#define pmci_cache_mc pme_cache.mc - -#define pmci_tlb_instr_translation_cache_fail pme_tlb.itc -#define pmci_tlb_data_translation_cache_fail pme_tlb.dtc -#define pmci_tlb_instr_translation_reg_fail pme_tlb.itr -#define pmci_tlb_data_translation_reg_fail pme_tlb.dtr -#define pmci_tlb_translation_reg_slot pme_tlb.tr_slot -#define pmci_tlb_mc pme_tlb.mc - -#define pmci_bus_status_info pme_bus.bsi -#define pmci_bus_req_address_valid pme_bus.rq -#define pmci_bus_resp_address_valid pme_bus.rp -#define pmci_bus_target_address_valid pme_bus.tv -#define pmci_bus_error_severity pme_bus.sev -#define pmci_bus_transaction_type pme_bus.type -#define pmci_bus_cache_cache_transfer pme_bus.cc -#define pmci_bus_transaction_size pme_bus.size -#define pmci_bus_internal_error pme_bus.ib -#define pmci_bus_external_error pme_bus.eb -#define pmci_bus_mc pme_bus.mc - -/* - * NOTE: this min_state_save area struct only includes the 1KB - * architectural state save area. The other 3 KB is scratch space - * for PAL. - */ - -struct pal_min_state_area { - u64 pmsa_nat_bits; /* nat bits for saved GRs */ - u64 pmsa_gr[15]; /* GR1 - GR15 */ - u64 pmsa_bank0_gr[16]; /* GR16 - GR31 */ - u64 pmsa_bank1_gr[16]; /* GR16 - GR31 */ - u64 pmsa_pr; /* predicate registers */ - u64 pmsa_br0; /* branch register 0 */ - u64 pmsa_rsc; /* ar.rsc */ - u64 pmsa_iip; /* cr.iip */ - u64 pmsa_ipsr; /* cr.ipsr */ - u64 pmsa_ifs; /* cr.ifs */ - u64 pmsa_xip; /* previous iip */ - u64 pmsa_xpsr; /* previous psr */ - u64 pmsa_xfs; /* previous ifs */ - u64 pmsa_br1; /* branch register 1 */ - u64 pmsa_reserved[70]; /* pal_min_state_area should total to 1KB */ -}; - - -struct ia64_pal_retval { - /* - * A zero status value indicates call completed without error. - * A negative status value indicates reason of call failure. - * A positive status value indicates success but an - * informational value should be printed (e.g., "reboot for - * change to take effect"). - */ - s64 status; - u64 v0; - u64 v1; - u64 v2; -}; - -/* - * Note: Currently unused PAL arguments are generally labeled - * "reserved" so the value specified in the PAL documentation - * (generally 0) MUST be passed. Reserved parameters are not optional - * parameters. - */ -extern struct ia64_pal_retval ia64_pal_call_static (u64, u64, u64, u64); -extern struct ia64_pal_retval ia64_pal_call_stacked (u64, u64, u64, u64); -extern struct ia64_pal_retval ia64_pal_call_phys_static (u64, u64, u64, u64); -extern struct ia64_pal_retval ia64_pal_call_phys_stacked (u64, u64, u64, u64); -extern void ia64_save_scratch_fpregs (struct ia64_fpreg *); -extern void ia64_load_scratch_fpregs (struct ia64_fpreg *); - -#define PAL_CALL(iprv,a0,a1,a2,a3) do { \ - struct ia64_fpreg fr[6]; \ - ia64_save_scratch_fpregs(fr); \ - iprv = ia64_pal_call_static(a0, a1, a2, a3); \ - ia64_load_scratch_fpregs(fr); \ -} while (0) - -#define PAL_CALL_STK(iprv,a0,a1,a2,a3) do { \ - struct ia64_fpreg fr[6]; \ - ia64_save_scratch_fpregs(fr); \ - iprv = ia64_pal_call_stacked(a0, a1, a2, a3); \ - ia64_load_scratch_fpregs(fr); \ -} while (0) - -#define PAL_CALL_PHYS(iprv,a0,a1,a2,a3) do { \ - struct ia64_fpreg fr[6]; \ - ia64_save_scratch_fpregs(fr); \ - iprv = ia64_pal_call_phys_static(a0, a1, a2, a3); \ - ia64_load_scratch_fpregs(fr); \ -} while (0) - -#define PAL_CALL_PHYS_STK(iprv,a0,a1,a2,a3) do { \ - struct ia64_fpreg fr[6]; \ - ia64_save_scratch_fpregs(fr); \ - iprv = ia64_pal_call_phys_stacked(a0, a1, a2, a3); \ - ia64_load_scratch_fpregs(fr); \ -} while (0) - -typedef int (*ia64_pal_handler) (u64, ...); -extern ia64_pal_handler ia64_pal; -extern void ia64_pal_handler_init (void *); - -extern ia64_pal_handler ia64_pal; - -extern pal_cache_config_info_t l0d_cache_config_info; -extern pal_cache_config_info_t l0i_cache_config_info; -extern pal_cache_config_info_t l1_cache_config_info; -extern pal_cache_config_info_t l2_cache_config_info; - -extern pal_cache_protection_info_t l0d_cache_protection_info; -extern pal_cache_protection_info_t l0i_cache_protection_info; -extern pal_cache_protection_info_t l1_cache_protection_info; -extern pal_cache_protection_info_t l2_cache_protection_info; - -extern pal_cache_config_info_t pal_cache_config_info_get(pal_cache_level_t, - pal_cache_type_t); - -extern pal_cache_protection_info_t pal_cache_protection_info_get(pal_cache_level_t, - pal_cache_type_t); - - -extern void pal_error(int); - - -/* Useful wrappers for the current list of pal procedures */ - -typedef union pal_bus_features_u { - u64 pal_bus_features_val; - struct { - u64 pbf_reserved1 : 29; - u64 pbf_req_bus_parking : 1; - u64 pbf_bus_lock_mask : 1; - u64 pbf_enable_half_xfer_rate : 1; - u64 pbf_reserved2 : 20; - u64 pbf_enable_shared_line_replace : 1; - u64 pbf_enable_exclusive_line_replace : 1; - u64 pbf_disable_xaction_queueing : 1; - u64 pbf_disable_resp_err_check : 1; - u64 pbf_disable_berr_check : 1; - u64 pbf_disable_bus_req_internal_err_signal : 1; - u64 pbf_disable_bus_req_berr_signal : 1; - u64 pbf_disable_bus_init_event_check : 1; - u64 pbf_disable_bus_init_event_signal : 1; - u64 pbf_disable_bus_addr_err_check : 1; - u64 pbf_disable_bus_addr_err_signal : 1; - u64 pbf_disable_bus_data_err_check : 1; - } pal_bus_features_s; -} pal_bus_features_u_t; - -extern void pal_bus_features_print (u64); - -/* Provide information about configurable processor bus features */ -static inline s64 -ia64_pal_bus_get_features (pal_bus_features_u_t *features_avail, - pal_bus_features_u_t *features_status, - pal_bus_features_u_t *features_control) -{ - struct ia64_pal_retval iprv; - PAL_CALL_PHYS(iprv, PAL_BUS_GET_FEATURES, 0, 0, 0); - if (features_avail) - features_avail->pal_bus_features_val = iprv.v0; - if (features_status) - features_status->pal_bus_features_val = iprv.v1; - if (features_control) - features_control->pal_bus_features_val = iprv.v2; - return iprv.status; -} - -/* Enables/disables specific processor bus features */ -static inline s64 -ia64_pal_bus_set_features (pal_bus_features_u_t feature_select) -{ - struct ia64_pal_retval iprv; - PAL_CALL_PHYS(iprv, PAL_BUS_SET_FEATURES, feature_select.pal_bus_features_val, 0, 0); - return iprv.status; -} - -/* Get detailed cache information */ -static inline s64 -ia64_pal_cache_config_info (u64 cache_level, u64 cache_type, pal_cache_config_info_t *conf) -{ - struct ia64_pal_retval iprv; - - PAL_CALL(iprv, PAL_CACHE_INFO, cache_level, cache_type, 0); - - if (iprv.status == 0) { - conf->pcci_status = iprv.status; - conf->pcci_info_1.pcci1_data = iprv.v0; - conf->pcci_info_2.pcci2_data = iprv.v1; - conf->pcci_reserved = iprv.v2; - } - return iprv.status; - -} - -/* Get detailed cche protection information */ -static inline s64 -ia64_pal_cache_prot_info (u64 cache_level, u64 cache_type, pal_cache_protection_info_t *prot) -{ - struct ia64_pal_retval iprv; - - PAL_CALL(iprv, PAL_CACHE_PROT_INFO, cache_level, cache_type, 0); - - if (iprv.status == 0) { - prot->pcpi_status = iprv.status; - prot->pcp_info[0].pcpi_data = iprv.v0 & 0xffffffff; - prot->pcp_info[1].pcpi_data = iprv.v0 >> 32; - prot->pcp_info[2].pcpi_data = iprv.v1 & 0xffffffff; - prot->pcp_info[3].pcpi_data = iprv.v1 >> 32; - prot->pcp_info[4].pcpi_data = iprv.v2 & 0xffffffff; - prot->pcp_info[5].pcpi_data = iprv.v2 >> 32; - } - return iprv.status; -} - -/* - * Flush the processor instruction or data caches. *PROGRESS must be - * initialized to zero before calling this for the first time.. - */ -static inline s64 -ia64_pal_cache_flush (u64 cache_type, u64 invalidate, u64 *progress, u64 *vector) -{ - struct ia64_pal_retval iprv; - PAL_CALL(iprv, PAL_CACHE_FLUSH, cache_type, invalidate, *progress); - if (vector) - *vector = iprv.v0; - *progress = iprv.v1; - return iprv.status; -} - - -/* Initialize the processor controlled caches */ -static inline s64 -ia64_pal_cache_init (u64 level, u64 cache_type, u64 rest) -{ - struct ia64_pal_retval iprv; - PAL_CALL(iprv, PAL_CACHE_INIT, level, cache_type, rest); - return iprv.status; -} - -/* Initialize the tags and data of a data or unified cache line of - * processor controlled cache to known values without the availability - * of backing memory. - */ -static inline s64 -ia64_pal_cache_line_init (u64 physical_addr, u64 data_value) -{ - struct ia64_pal_retval iprv; - PAL_CALL(iprv, PAL_CACHE_LINE_INIT, physical_addr, data_value, 0); - return iprv.status; -} - - -/* Read the data and tag of a processor controlled cache line for diags */ -static inline s64 -ia64_pal_cache_read (pal_cache_line_id_u_t line_id, u64 physical_addr) -{ - struct ia64_pal_retval iprv; - PAL_CALL_PHYS_STK(iprv, PAL_CACHE_READ, line_id.pclid_data, - physical_addr, 0); - return iprv.status; -} - -/* Return summary information about the hierarchy of caches controlled by the processor */ -static inline long ia64_pal_cache_summary(unsigned long *cache_levels, - unsigned long *unique_caches) -{ - struct ia64_pal_retval iprv; - PAL_CALL(iprv, PAL_CACHE_SUMMARY, 0, 0, 0); - if (cache_levels) - *cache_levels = iprv.v0; - if (unique_caches) - *unique_caches = iprv.v1; - return iprv.status; -} - -/* Write the data and tag of a processor-controlled cache line for diags */ -static inline s64 -ia64_pal_cache_write (pal_cache_line_id_u_t line_id, u64 physical_addr, u64 data) -{ - struct ia64_pal_retval iprv; - PAL_CALL_PHYS_STK(iprv, PAL_CACHE_WRITE, line_id.pclid_data, - physical_addr, data); - return iprv.status; -} - - -/* Return the parameters needed to copy relocatable PAL procedures from ROM to memory */ -static inline s64 -ia64_pal_copy_info (u64 copy_type, u64 num_procs, u64 num_iopics, - u64 *buffer_size, u64 *buffer_align) -{ - struct ia64_pal_retval iprv; - PAL_CALL(iprv, PAL_COPY_INFO, copy_type, num_procs, num_iopics); - if (buffer_size) - *buffer_size = iprv.v0; - if (buffer_align) - *buffer_align = iprv.v1; - return iprv.status; -} - -/* Copy relocatable PAL procedures from ROM to memory */ -static inline s64 -ia64_pal_copy_pal (u64 target_addr, u64 alloc_size, u64 processor, u64 *pal_proc_offset) -{ - struct ia64_pal_retval iprv; - PAL_CALL(iprv, PAL_COPY_PAL, target_addr, alloc_size, processor); - if (pal_proc_offset) - *pal_proc_offset = iprv.v0; - return iprv.status; -} - -/* Return the number of instruction and data debug register pairs */ -static inline long ia64_pal_debug_info(unsigned long *inst_regs, - unsigned long *data_regs) -{ - struct ia64_pal_retval iprv; - PAL_CALL(iprv, PAL_DEBUG_INFO, 0, 0, 0); - if (inst_regs) - *inst_regs = iprv.v0; - if (data_regs) - *data_regs = iprv.v1; - - return iprv.status; -} - -#ifdef TBD -/* Switch from IA64-system environment to IA-32 system environment */ -static inline s64 -ia64_pal_enter_ia32_env (ia32_env1, ia32_env2, ia32_env3) -{ - struct ia64_pal_retval iprv; - PAL_CALL(iprv, PAL_ENTER_IA_32_ENV, ia32_env1, ia32_env2, ia32_env3); - return iprv.status; -} -#endif - -/* Get unique geographical address of this processor on its bus */ -static inline s64 -ia64_pal_fixed_addr (u64 *global_unique_addr) -{ - struct ia64_pal_retval iprv; - PAL_CALL(iprv, PAL_FIXED_ADDR, 0, 0, 0); - if (global_unique_addr) - *global_unique_addr = iprv.v0; - return iprv.status; -} - -/* Get base frequency of the platform if generated by the processor */ -static inline long ia64_pal_freq_base(unsigned long *platform_base_freq) -{ - struct ia64_pal_retval iprv; - PAL_CALL(iprv, PAL_FREQ_BASE, 0, 0, 0); - if (platform_base_freq) - *platform_base_freq = iprv.v0; - return iprv.status; -} - -/* - * Get the ratios for processor frequency, bus frequency and interval timer to - * the base frequency of the platform - */ -static inline s64 -ia64_pal_freq_ratios (struct pal_freq_ratio *proc_ratio, struct pal_freq_ratio *bus_ratio, - struct pal_freq_ratio *itc_ratio) -{ - struct ia64_pal_retval iprv; - PAL_CALL(iprv, PAL_FREQ_RATIOS, 0, 0, 0); - if (proc_ratio) - *(u64 *)proc_ratio = iprv.v0; - if (bus_ratio) - *(u64 *)bus_ratio = iprv.v1; - if (itc_ratio) - *(u64 *)itc_ratio = iprv.v2; - return iprv.status; -} - -/* - * Get the current hardware resource sharing policy of the processor - */ -static inline s64 -ia64_pal_get_hw_policy (u64 proc_num, u64 *cur_policy, u64 *num_impacted, - u64 *la) -{ - struct ia64_pal_retval iprv; - PAL_CALL(iprv, PAL_GET_HW_POLICY, proc_num, 0, 0); - if (cur_policy) - *cur_policy = iprv.v0; - if (num_impacted) - *num_impacted = iprv.v1; - if (la) - *la = iprv.v2; - return iprv.status; -} - -/* Make the processor enter HALT or one of the implementation dependent low - * power states where prefetching and execution are suspended and cache and - * TLB coherency is not maintained. - */ -static inline s64 -ia64_pal_halt (u64 halt_state) -{ - struct ia64_pal_retval iprv; - PAL_CALL(iprv, PAL_HALT, halt_state, 0, 0); - return iprv.status; -} - -typedef union pal_power_mgmt_info_u { - u64 ppmi_data; - struct { - u64 exit_latency : 16, - entry_latency : 16, - power_consumption : 28, - im : 1, - co : 1, - reserved : 2; - } pal_power_mgmt_info_s; -} pal_power_mgmt_info_u_t; - -/* Return information about processor's optional power management capabilities. */ -static inline s64 -ia64_pal_halt_info (pal_power_mgmt_info_u_t *power_buf) -{ - struct ia64_pal_retval iprv; - PAL_CALL_STK(iprv, PAL_HALT_INFO, (unsigned long) power_buf, 0, 0); - return iprv.status; -} - -/* Get the current P-state information */ -static inline s64 -ia64_pal_get_pstate (u64 *pstate_index, unsigned long type) -{ - struct ia64_pal_retval iprv; - PAL_CALL_STK(iprv, PAL_GET_PSTATE, type, 0, 0); - *pstate_index = iprv.v0; - return iprv.status; -} - -/* Set the P-state */ -static inline s64 -ia64_pal_set_pstate (u64 pstate_index) -{ - struct ia64_pal_retval iprv; - PAL_CALL_STK(iprv, PAL_SET_PSTATE, pstate_index, 0, 0); - return iprv.status; -} - -/* Processor branding information*/ -static inline s64 -ia64_pal_get_brand_info (char *brand_info) -{ - struct ia64_pal_retval iprv; - PAL_CALL_STK(iprv, PAL_BRAND_INFO, 0, (u64)brand_info, 0); - return iprv.status; -} - -/* Cause the processor to enter LIGHT HALT state, where prefetching and execution are - * suspended, but cache and TLB coherency is maintained. - */ -static inline s64 -ia64_pal_halt_light (void) -{ - struct ia64_pal_retval iprv; - PAL_CALL(iprv, PAL_HALT_LIGHT, 0, 0, 0); - return iprv.status; -} - -/* Clear all the processor error logging registers and reset the indicator that allows - * the error logging registers to be written. This procedure also checks the pending - * machine check bit and pending INIT bit and reports their states. - */ -static inline s64 -ia64_pal_mc_clear_log (u64 *pending_vector) -{ - struct ia64_pal_retval iprv; - PAL_CALL(iprv, PAL_MC_CLEAR_LOG, 0, 0, 0); - if (pending_vector) - *pending_vector = iprv.v0; - return iprv.status; -} - -/* Ensure that all outstanding transactions in a processor are completed or that any - * MCA due to thes outstanding transaction is taken. - */ -static inline s64 -ia64_pal_mc_drain (void) -{ - struct ia64_pal_retval iprv; - PAL_CALL(iprv, PAL_MC_DRAIN, 0, 0, 0); - return iprv.status; -} - -/* Return the machine check dynamic processor state */ -static inline s64 -ia64_pal_mc_dynamic_state (u64 info_type, u64 dy_buffer, u64 *size) -{ - struct ia64_pal_retval iprv; - PAL_CALL(iprv, PAL_MC_DYNAMIC_STATE, info_type, dy_buffer, 0); - if (size) - *size = iprv.v0; - return iprv.status; -} - -/* Return processor machine check information */ -static inline s64 -ia64_pal_mc_error_info (u64 info_index, u64 type_index, u64 *size, u64 *error_info) -{ - struct ia64_pal_retval iprv; - PAL_CALL(iprv, PAL_MC_ERROR_INFO, info_index, type_index, 0); - if (size) - *size = iprv.v0; - if (error_info) - *error_info = iprv.v1; - return iprv.status; -} - -/* Injects the requested processor error or returns info on - * supported injection capabilities for current processor implementation - */ -static inline s64 -ia64_pal_mc_error_inject_phys (u64 err_type_info, u64 err_struct_info, - u64 err_data_buffer, u64 *capabilities, u64 *resources) -{ - struct ia64_pal_retval iprv; - PAL_CALL_PHYS_STK(iprv, PAL_MC_ERROR_INJECT, err_type_info, - err_struct_info, err_data_buffer); - if (capabilities) - *capabilities= iprv.v0; - if (resources) - *resources= iprv.v1; - return iprv.status; -} - -static inline s64 -ia64_pal_mc_error_inject_virt (u64 err_type_info, u64 err_struct_info, - u64 err_data_buffer, u64 *capabilities, u64 *resources) -{ - struct ia64_pal_retval iprv; - PAL_CALL_STK(iprv, PAL_MC_ERROR_INJECT, err_type_info, - err_struct_info, err_data_buffer); - if (capabilities) - *capabilities= iprv.v0; - if (resources) - *resources= iprv.v1; - return iprv.status; -} - -/* Inform PALE_CHECK whether a machine check is expected so that PALE_CHECK willnot - * attempt to correct any expected machine checks. - */ -static inline s64 -ia64_pal_mc_expected (u64 expected, u64 *previous) -{ - struct ia64_pal_retval iprv; - PAL_CALL(iprv, PAL_MC_EXPECTED, expected, 0, 0); - if (previous) - *previous = iprv.v0; - return iprv.status; -} - -typedef union pal_hw_tracking_u { - u64 pht_data; - struct { - u64 itc :4, /* Instruction cache tracking */ - dct :4, /* Date cache tracking */ - itt :4, /* Instruction TLB tracking */ - ddt :4, /* Data TLB tracking */ - reserved:48; - } pal_hw_tracking_s; -} pal_hw_tracking_u_t; - -/* - * Hardware tracking status. - */ -static inline s64 -ia64_pal_mc_hw_tracking (u64 *status) -{ - struct ia64_pal_retval iprv; - PAL_CALL(iprv, PAL_MC_HW_TRACKING, 0, 0, 0); - if (status) - *status = iprv.v0; - return iprv.status; -} - -/* Register a platform dependent location with PAL to which it can save - * minimal processor state in the event of a machine check or initialization - * event. - */ -static inline s64 -ia64_pal_mc_register_mem (u64 physical_addr, u64 size, u64 *req_size) -{ - struct ia64_pal_retval iprv; - PAL_CALL(iprv, PAL_MC_REGISTER_MEM, physical_addr, size, 0); - if (req_size) - *req_size = iprv.v0; - return iprv.status; -} - -/* Restore minimal architectural processor state, set CMC interrupt if necessary - * and resume execution - */ -static inline s64 -ia64_pal_mc_resume (u64 set_cmci, u64 save_ptr) -{ - struct ia64_pal_retval iprv; - PAL_CALL(iprv, PAL_MC_RESUME, set_cmci, save_ptr, 0); - return iprv.status; -} - -/* Return the memory attributes implemented by the processor */ -static inline s64 -ia64_pal_mem_attrib (u64 *mem_attrib) -{ - struct ia64_pal_retval iprv; - PAL_CALL(iprv, PAL_MEM_ATTRIB, 0, 0, 0); - if (mem_attrib) - *mem_attrib = iprv.v0 & 0xff; - return iprv.status; -} - -/* Return the amount of memory needed for second phase of processor - * self-test and the required alignment of memory. - */ -static inline s64 -ia64_pal_mem_for_test (u64 *bytes_needed, u64 *alignment) -{ - struct ia64_pal_retval iprv; - PAL_CALL(iprv, PAL_MEM_FOR_TEST, 0, 0, 0); - if (bytes_needed) - *bytes_needed = iprv.v0; - if (alignment) - *alignment = iprv.v1; - return iprv.status; -} - -typedef union pal_perf_mon_info_u { - u64 ppmi_data; - struct { - u64 generic : 8, - width : 8, - cycles : 8, - retired : 8, - reserved : 32; - } pal_perf_mon_info_s; -} pal_perf_mon_info_u_t; - -/* Return the performance monitor information about what can be counted - * and how to configure the monitors to count the desired events. - */ -static inline s64 -ia64_pal_perf_mon_info (u64 *pm_buffer, pal_perf_mon_info_u_t *pm_info) -{ - struct ia64_pal_retval iprv; - PAL_CALL(iprv, PAL_PERF_MON_INFO, (unsigned long) pm_buffer, 0, 0); - if (pm_info) - pm_info->ppmi_data = iprv.v0; - return iprv.status; -} - -/* Specifies the physical address of the processor interrupt block - * and I/O port space. - */ -static inline s64 -ia64_pal_platform_addr (u64 type, u64 physical_addr) -{ - struct ia64_pal_retval iprv; - PAL_CALL(iprv, PAL_PLATFORM_ADDR, type, physical_addr, 0); - return iprv.status; -} - -/* Set the SAL PMI entrypoint in memory */ -static inline s64 -ia64_pal_pmi_entrypoint (u64 sal_pmi_entry_addr) -{ - struct ia64_pal_retval iprv; - PAL_CALL(iprv, PAL_PMI_ENTRYPOINT, sal_pmi_entry_addr, 0, 0); - return iprv.status; -} - -struct pal_features_s; -/* Provide information about configurable processor features */ -static inline s64 -ia64_pal_proc_get_features (u64 *features_avail, - u64 *features_status, - u64 *features_control, - u64 features_set) -{ - struct ia64_pal_retval iprv; - PAL_CALL_PHYS(iprv, PAL_PROC_GET_FEATURES, 0, features_set, 0); - if (iprv.status == 0) { - *features_avail = iprv.v0; - *features_status = iprv.v1; - *features_control = iprv.v2; - } - return iprv.status; -} - -/* Enable/disable processor dependent features */ -static inline s64 -ia64_pal_proc_set_features (u64 feature_select) -{ - struct ia64_pal_retval iprv; - PAL_CALL_PHYS(iprv, PAL_PROC_SET_FEATURES, feature_select, 0, 0); - return iprv.status; -} - -/* - * Put everything in a struct so we avoid the global offset table whenever - * possible. - */ -typedef struct ia64_ptce_info_s { - unsigned long base; - u32 count[2]; - u32 stride[2]; -} ia64_ptce_info_t; - -/* Return the information required for the architected loop used to purge - * (initialize) the entire TC - */ -static inline s64 -ia64_get_ptce (ia64_ptce_info_t *ptce) -{ - struct ia64_pal_retval iprv; - - if (!ptce) - return -1; - - PAL_CALL(iprv, PAL_PTCE_INFO, 0, 0, 0); - if (iprv.status == 0) { - ptce->base = iprv.v0; - ptce->count[0] = iprv.v1 >> 32; - ptce->count[1] = iprv.v1 & 0xffffffff; - ptce->stride[0] = iprv.v2 >> 32; - ptce->stride[1] = iprv.v2 & 0xffffffff; - } - return iprv.status; -} - -/* Return info about implemented application and control registers. */ -static inline s64 -ia64_pal_register_info (u64 info_request, u64 *reg_info_1, u64 *reg_info_2) -{ - struct ia64_pal_retval iprv; - PAL_CALL(iprv, PAL_REGISTER_INFO, info_request, 0, 0); - if (reg_info_1) - *reg_info_1 = iprv.v0; - if (reg_info_2) - *reg_info_2 = iprv.v1; - return iprv.status; -} - -typedef union pal_hints_u { - unsigned long ph_data; - struct { - unsigned long si : 1, - li : 1, - reserved : 62; - } pal_hints_s; -} pal_hints_u_t; - -/* Return information about the register stack and RSE for this processor - * implementation. - */ -static inline long ia64_pal_rse_info(unsigned long *num_phys_stacked, - pal_hints_u_t *hints) -{ - struct ia64_pal_retval iprv; - PAL_CALL(iprv, PAL_RSE_INFO, 0, 0, 0); - if (num_phys_stacked) - *num_phys_stacked = iprv.v0; - if (hints) - hints->ph_data = iprv.v1; - return iprv.status; -} - -/* - * Set the current hardware resource sharing policy of the processor - */ -static inline s64 -ia64_pal_set_hw_policy (u64 policy) -{ - struct ia64_pal_retval iprv; - PAL_CALL(iprv, PAL_SET_HW_POLICY, policy, 0, 0); - return iprv.status; -} - -/* Cause the processor to enter SHUTDOWN state, where prefetching and execution are - * suspended, but cause cache and TLB coherency to be maintained. - * This is usually called in IA-32 mode. - */ -static inline s64 -ia64_pal_shutdown (void) -{ - struct ia64_pal_retval iprv; - PAL_CALL(iprv, PAL_SHUTDOWN, 0, 0, 0); - return iprv.status; -} - -/* Perform the second phase of processor self-test. */ -static inline s64 -ia64_pal_test_proc (u64 test_addr, u64 test_size, u64 attributes, u64 *self_test_state) -{ - struct ia64_pal_retval iprv; - PAL_CALL(iprv, PAL_TEST_PROC, test_addr, test_size, attributes); - if (self_test_state) - *self_test_state = iprv.v0; - return iprv.status; -} - -typedef union pal_version_u { - u64 pal_version_val; - struct { - u64 pv_pal_b_rev : 8; - u64 pv_pal_b_model : 8; - u64 pv_reserved1 : 8; - u64 pv_pal_vendor : 8; - u64 pv_pal_a_rev : 8; - u64 pv_pal_a_model : 8; - u64 pv_reserved2 : 16; - } pal_version_s; -} pal_version_u_t; - - -/* - * Return PAL version information. While the documentation states that - * PAL_VERSION can be called in either physical or virtual mode, some - * implementations only allow physical calls. We don't call it very often, - * so the overhead isn't worth eliminating. - */ -static inline s64 -ia64_pal_version (pal_version_u_t *pal_min_version, pal_version_u_t *pal_cur_version) -{ - struct ia64_pal_retval iprv; - PAL_CALL_PHYS(iprv, PAL_VERSION, 0, 0, 0); - if (pal_min_version) - pal_min_version->pal_version_val = iprv.v0; - - if (pal_cur_version) - pal_cur_version->pal_version_val = iprv.v1; - - return iprv.status; -} - -typedef union pal_tc_info_u { - u64 pti_val; - struct { - u64 num_sets : 8, - associativity : 8, - num_entries : 16, - pf : 1, - unified : 1, - reduce_tr : 1, - reserved : 29; - } pal_tc_info_s; -} pal_tc_info_u_t; - -#define tc_reduce_tr pal_tc_info_s.reduce_tr -#define tc_unified pal_tc_info_s.unified -#define tc_pf pal_tc_info_s.pf -#define tc_num_entries pal_tc_info_s.num_entries -#define tc_associativity pal_tc_info_s.associativity -#define tc_num_sets pal_tc_info_s.num_sets - - -/* Return information about the virtual memory characteristics of the processor - * implementation. - */ -static inline s64 -ia64_pal_vm_info (u64 tc_level, u64 tc_type, pal_tc_info_u_t *tc_info, u64 *tc_pages) -{ - struct ia64_pal_retval iprv; - PAL_CALL(iprv, PAL_VM_INFO, tc_level, tc_type, 0); - if (tc_info) - tc_info->pti_val = iprv.v0; - if (tc_pages) - *tc_pages = iprv.v1; - return iprv.status; -} - -/* Get page size information about the virtual memory characteristics of the processor - * implementation. - */ -static inline s64 ia64_pal_vm_page_size(u64 *tr_pages, u64 *vw_pages) -{ - struct ia64_pal_retval iprv; - PAL_CALL(iprv, PAL_VM_PAGE_SIZE, 0, 0, 0); - if (tr_pages) - *tr_pages = iprv.v0; - if (vw_pages) - *vw_pages = iprv.v1; - return iprv.status; -} - -typedef union pal_vm_info_1_u { - u64 pvi1_val; - struct { - u64 vw : 1, - phys_add_size : 7, - key_size : 8, - max_pkr : 8, - hash_tag_id : 8, - max_dtr_entry : 8, - max_itr_entry : 8, - max_unique_tcs : 8, - num_tc_levels : 8; - } pal_vm_info_1_s; -} pal_vm_info_1_u_t; - -#define PAL_MAX_PURGES 0xFFFF /* all ones is means unlimited */ - -typedef union pal_vm_info_2_u { - u64 pvi2_val; - struct { - u64 impl_va_msb : 8, - rid_size : 8, - max_purges : 16, - reserved : 32; - } pal_vm_info_2_s; -} pal_vm_info_2_u_t; - -/* Get summary information about the virtual memory characteristics of the processor - * implementation. - */ -static inline s64 -ia64_pal_vm_summary (pal_vm_info_1_u_t *vm_info_1, pal_vm_info_2_u_t *vm_info_2) -{ - struct ia64_pal_retval iprv; - PAL_CALL(iprv, PAL_VM_SUMMARY, 0, 0, 0); - if (vm_info_1) - vm_info_1->pvi1_val = iprv.v0; - if (vm_info_2) - vm_info_2->pvi2_val = iprv.v1; - return iprv.status; -} - -typedef union pal_vp_info_u { - u64 pvi_val; - struct { - u64 index: 48, /* virtual feature set info */ - vmm_id: 16; /* feature set id */ - } pal_vp_info_s; -} pal_vp_info_u_t; - -/* - * Returns information about virtual processor features - */ -static inline s64 -ia64_pal_vp_info (u64 feature_set, u64 vp_buffer, u64 *vp_info, u64 *vmm_id) -{ - struct ia64_pal_retval iprv; - PAL_CALL(iprv, PAL_VP_INFO, feature_set, vp_buffer, 0); - if (vp_info) - *vp_info = iprv.v0; - if (vmm_id) - *vmm_id = iprv.v1; - return iprv.status; -} - -typedef union pal_itr_valid_u { - u64 piv_val; - struct { - u64 access_rights_valid : 1, - priv_level_valid : 1, - dirty_bit_valid : 1, - mem_attr_valid : 1, - reserved : 60; - } pal_tr_valid_s; -} pal_tr_valid_u_t; - -/* Read a translation register */ -static inline s64 -ia64_pal_tr_read (u64 reg_num, u64 tr_type, u64 *tr_buffer, pal_tr_valid_u_t *tr_valid) -{ - struct ia64_pal_retval iprv; - PAL_CALL_PHYS_STK(iprv, PAL_VM_TR_READ, reg_num, tr_type,(u64)ia64_tpa(tr_buffer)); - if (tr_valid) - tr_valid->piv_val = iprv.v0; - return iprv.status; -} - -/* - * PAL_PREFETCH_VISIBILITY transaction types - */ -#define PAL_VISIBILITY_VIRTUAL 0 -#define PAL_VISIBILITY_PHYSICAL 1 - -/* - * PAL_PREFETCH_VISIBILITY return codes - */ -#define PAL_VISIBILITY_OK 1 -#define PAL_VISIBILITY_OK_REMOTE_NEEDED 0 -#define PAL_VISIBILITY_INVAL_ARG -2 -#define PAL_VISIBILITY_ERROR -3 - -static inline s64 -ia64_pal_prefetch_visibility (s64 trans_type) -{ - struct ia64_pal_retval iprv; - PAL_CALL(iprv, PAL_PREFETCH_VISIBILITY, trans_type, 0, 0); - return iprv.status; -} - -/* data structure for getting information on logical to physical mappings */ -typedef union pal_log_overview_u { - struct { - u64 num_log :16, /* Total number of logical - * processors on this die - */ - tpc :8, /* Threads per core */ - reserved3 :8, /* Reserved */ - cpp :8, /* Cores per processor */ - reserved2 :8, /* Reserved */ - ppid :8, /* Physical processor ID */ - reserved1 :8; /* Reserved */ - } overview_bits; - u64 overview_data; -} pal_log_overview_t; - -typedef union pal_proc_n_log_info1_u{ - struct { - u64 tid :16, /* Thread id */ - reserved2 :16, /* Reserved */ - cid :16, /* Core id */ - reserved1 :16; /* Reserved */ - } ppli1_bits; - u64 ppli1_data; -} pal_proc_n_log_info1_t; - -typedef union pal_proc_n_log_info2_u { - struct { - u64 la :16, /* Logical address */ - reserved :48; /* Reserved */ - } ppli2_bits; - u64 ppli2_data; -} pal_proc_n_log_info2_t; - -typedef struct pal_logical_to_physical_s -{ - pal_log_overview_t overview; - pal_proc_n_log_info1_t ppli1; - pal_proc_n_log_info2_t ppli2; -} pal_logical_to_physical_t; - -#define overview_num_log overview.overview_bits.num_log -#define overview_tpc overview.overview_bits.tpc -#define overview_cpp overview.overview_bits.cpp -#define overview_ppid overview.overview_bits.ppid -#define log1_tid ppli1.ppli1_bits.tid -#define log1_cid ppli1.ppli1_bits.cid -#define log2_la ppli2.ppli2_bits.la - -/* Get information on logical to physical processor mappings. */ -static inline s64 -ia64_pal_logical_to_phys(u64 proc_number, pal_logical_to_physical_t *mapping) -{ - struct ia64_pal_retval iprv; - - PAL_CALL(iprv, PAL_LOGICAL_TO_PHYSICAL, proc_number, 0, 0); - - if (iprv.status == PAL_STATUS_SUCCESS) - { - mapping->overview.overview_data = iprv.v0; - mapping->ppli1.ppli1_data = iprv.v1; - mapping->ppli2.ppli2_data = iprv.v2; - } - - return iprv.status; -} - -typedef struct pal_cache_shared_info_s -{ - u64 num_shared; - pal_proc_n_log_info1_t ppli1; - pal_proc_n_log_info2_t ppli2; -} pal_cache_shared_info_t; - -/* Get information on logical to physical processor mappings. */ -static inline s64 -ia64_pal_cache_shared_info(u64 level, - u64 type, - u64 proc_number, - pal_cache_shared_info_t *info) -{ - struct ia64_pal_retval iprv; - - PAL_CALL(iprv, PAL_CACHE_SHARED_INFO, level, type, proc_number); - - if (iprv.status == PAL_STATUS_SUCCESS) { - info->num_shared = iprv.v0; - info->ppli1.ppli1_data = iprv.v1; - info->ppli2.ppli2_data = iprv.v2; - } - - return iprv.status; -} -#endif /* __ASSEMBLY__ */ - -#endif /* _ASM_IA64_PAL_H */ diff --git a/arch/ia64/include/asm/param.h b/arch/ia64/include/asm/param.h deleted file mode 100644 index f0b786227c40..000000000000 --- a/arch/ia64/include/asm/param.h +++ /dev/null @@ -1,18 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Fundamental kernel parameters. - * - * Based on . - * - * Modified 1998, 1999, 2002-2003 - * David Mosberger-Tang , Hewlett-Packard Co - */ -#ifndef _ASM_IA64_PARAM_H -#define _ASM_IA64_PARAM_H - -#include - -# define HZ CONFIG_HZ -# define USER_HZ HZ -# define CLOCKS_PER_SEC HZ /* frequency at which times() counts */ -#endif /* _ASM_IA64_PARAM_H */ diff --git a/arch/ia64/include/asm/parport.h b/arch/ia64/include/asm/parport.h deleted file mode 100644 index 360ca9bf2f6f..000000000000 --- a/arch/ia64/include/asm/parport.h +++ /dev/null @@ -1,20 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * parport.h: platform-specific PC-style parport initialisation - * - * Copyright (C) 1999, 2000 Tim Waugh - * - * This file should only be included by drivers/parport/parport_pc.c. - */ - -#ifndef _ASM_IA64_PARPORT_H -#define _ASM_IA64_PARPORT_H 1 - -static int parport_pc_find_isa_ports(int autoirq, int autodma); - -static int parport_pc_find_nonpci_ports(int autoirq, int autodma) -{ - return parport_pc_find_isa_ports(autoirq, autodma); -} - -#endif /* _ASM_IA64_PARPORT_H */ diff --git a/arch/ia64/include/asm/patch.h b/arch/ia64/include/asm/patch.h deleted file mode 100644 index bd487ed22bf5..000000000000 --- a/arch/ia64/include/asm/patch.h +++ /dev/null @@ -1,28 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_IA64_PATCH_H -#define _ASM_IA64_PATCH_H - -/* - * Copyright (C) 2003 Hewlett-Packard Co - * David Mosberger-Tang - * - * There are a number of reasons for patching instructions. Rather than duplicating code - * all over the place, we put the common stuff here. Reasons for patching: in-kernel - * module-loader, virtual-to-physical patch-list, McKinley Errata 9 workaround, and gate - * shared library. Undoubtedly, some of these reasons will disappear and others will - * be added over time. - */ -#include -#include - -extern void ia64_patch (u64 insn_addr, u64 mask, u64 val); /* patch any insn slot */ -extern void ia64_patch_imm64 (u64 insn_addr, u64 val); /* patch "movl" w/abs. value*/ -extern void ia64_patch_imm60 (u64 insn_addr, u64 val); /* patch "brl" w/ip-rel value */ - -extern void ia64_patch_mckinley_e9 (unsigned long start, unsigned long end); -extern void ia64_patch_vtop (unsigned long start, unsigned long end); -extern void ia64_patch_phys_stack_reg(unsigned long val); -extern void ia64_patch_rse (unsigned long start, unsigned long end); -extern void ia64_patch_gate (void); - -#endif /* _ASM_IA64_PATCH_H */ diff --git a/arch/ia64/include/asm/pci.h b/arch/ia64/include/asm/pci.h deleted file mode 100644 index fa8f545c24c9..000000000000 --- a/arch/ia64/include/asm/pci.h +++ /dev/null @@ -1,66 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_IA64_PCI_H -#define _ASM_IA64_PCI_H - -#include -#include -#include -#include -#include -#include - -#include -#include - -struct pci_vector_struct { - __u16 segment; /* PCI Segment number */ - __u16 bus; /* PCI Bus number */ - __u32 pci_id; /* ACPI split 16 bits device, 16 bits function (see section 6.1.1) */ - __u8 pin; /* PCI PIN (0 = A, 1 = B, 2 = C, 3 = D) */ - __u32 irq; /* IRQ assigned */ -}; - -/* - * Can be used to override the logic in pci_scan_bus for skipping already-configured bus - * numbers - to be used for buggy BIOSes or architectures with incomplete PCI setup by the - * loader. - */ -#define pcibios_assign_all_busses() 0 - -#define PCIBIOS_MIN_IO 0x1000 -#define PCIBIOS_MIN_MEM 0x10000000 - -#define HAVE_PCI_MMAP -#define ARCH_GENERIC_PCI_MMAP_RESOURCE -#define arch_can_pci_mmap_wc() 1 - -#define HAVE_PCI_LEGACY -extern int pci_mmap_legacy_page_range(struct pci_bus *bus, - struct vm_area_struct *vma, - enum pci_mmap_state mmap_state); - -char *pci_get_legacy_mem(struct pci_bus *bus); -int pci_legacy_read(struct pci_bus *bus, u16 port, u32 *val, u8 size); -int pci_legacy_write(struct pci_bus *bus, u16 port, u32 val, u8 size); - -struct pci_controller { - struct acpi_device *companion; - void *iommu; - int segment; - int node; /* nearest node with memory or NUMA_NO_NODE for global allocation */ - - void *platform_data; -}; - - -#define PCI_CONTROLLER(busdev) ((struct pci_controller *) busdev->sysdata) -#define pci_domain_nr(busdev) (PCI_CONTROLLER(busdev)->segment) - -extern struct pci_ops pci_root_ops; - -static inline int pci_proc_domain(struct pci_bus *bus) -{ - return (pci_domain_nr(bus) != 0); -} - -#endif /* _ASM_IA64_PCI_H */ diff --git a/arch/ia64/include/asm/percpu.h b/arch/ia64/include/asm/percpu.h deleted file mode 100644 index f357b9bb3576..000000000000 --- a/arch/ia64/include/asm/percpu.h +++ /dev/null @@ -1,53 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_IA64_PERCPU_H -#define _ASM_IA64_PERCPU_H - -/* - * Copyright (C) 2002-2003 Hewlett-Packard Co - * David Mosberger-Tang - */ - -#ifdef __ASSEMBLY__ -# define THIS_CPU(var) (var) /* use this to mark accesses to per-CPU variables... */ -#else /* !__ASSEMBLY__ */ - - -#include - -#ifdef CONFIG_SMP - -#ifdef HAVE_MODEL_SMALL_ATTRIBUTE -# define PER_CPU_ATTRIBUTES __attribute__((__model__ (__small__))) -#endif - -#define __my_cpu_offset __ia64_per_cpu_var(local_per_cpu_offset) - -extern void *per_cpu_init(void); - -#else /* ! SMP */ - -#define per_cpu_init() (__phys_per_cpu_start) - -#endif /* SMP */ - -#define PER_CPU_BASE_SECTION ".data..percpu" - -/* - * Be extremely careful when taking the address of this variable! Due to virtual - * remapping, it is different from the canonical address returned by this_cpu_ptr(&var)! - * On the positive side, using __ia64_per_cpu_var() instead of this_cpu_ptr() is slightly - * more efficient. - */ -#define __ia64_per_cpu_var(var) (*({ \ - __verify_pcpu_ptr(&(var)); \ - ((typeof(var) __kernel __force *)&(var)); \ -})) - -#include - -/* Equal to __per_cpu_offset[smp_processor_id()], but faster to access: */ -DECLARE_PER_CPU(unsigned long, local_per_cpu_offset); - -#endif /* !__ASSEMBLY__ */ - -#endif /* _ASM_IA64_PERCPU_H */ diff --git a/arch/ia64/include/asm/pgalloc.h b/arch/ia64/include/asm/pgalloc.h deleted file mode 100644 index 0fb2b6291d58..000000000000 --- a/arch/ia64/include/asm/pgalloc.h +++ /dev/null @@ -1,64 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_IA64_PGALLOC_H -#define _ASM_IA64_PGALLOC_H - -/* - * This file contains the functions and defines necessary to allocate - * page tables. - * - * This hopefully works with any (fixed) ia-64 page-size, as defined - * in (currently 8192). - * - * Copyright (C) 1998-2001 Hewlett-Packard Co - * David Mosberger-Tang - * Copyright (C) 2000, Goutham Rao - */ - - -#include -#include -#include -#include - -#include - -#include - -static inline pgd_t *pgd_alloc(struct mm_struct *mm) -{ - return (pgd_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO); -} - -#if CONFIG_PGTABLE_LEVELS == 4 -static inline void -p4d_populate(struct mm_struct *mm, p4d_t * p4d_entry, pud_t * pud) -{ - p4d_val(*p4d_entry) = __pa(pud); -} - -#define __pud_free_tlb(tlb, pud, address) pud_free((tlb)->mm, pud) -#endif /* CONFIG_PGTABLE_LEVELS == 4 */ - -static inline void -pud_populate(struct mm_struct *mm, pud_t * pud_entry, pmd_t * pmd) -{ - pud_val(*pud_entry) = __pa(pmd); -} - -#define __pmd_free_tlb(tlb, pmd, address) pmd_free((tlb)->mm, pmd) - -static inline void -pmd_populate(struct mm_struct *mm, pmd_t * pmd_entry, pgtable_t pte) -{ - pmd_val(*pmd_entry) = page_to_phys(pte); -} - -static inline void -pmd_populate_kernel(struct mm_struct *mm, pmd_t * pmd_entry, pte_t * pte) -{ - pmd_val(*pmd_entry) = __pa(pte); -} - -#define __pte_free_tlb(tlb, pte, address) pte_free((tlb)->mm, pte) - -#endif /* _ASM_IA64_PGALLOC_H */ diff --git a/arch/ia64/include/asm/pgtable.h b/arch/ia64/include/asm/pgtable.h deleted file mode 100644 index 9be2d2ba6016..000000000000 --- a/arch/ia64/include/asm/pgtable.h +++ /dev/null @@ -1,545 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_IA64_PGTABLE_H -#define _ASM_IA64_PGTABLE_H - -/* - * This file contains the functions and defines necessary to modify and use - * the IA-64 page table tree. - * - * This hopefully works with any (fixed) IA-64 page-size, as defined - * in . - * - * Copyright (C) 1998-2005 Hewlett-Packard Co - * David Mosberger-Tang - */ - - -#include -#include -#include -#include - -#define IA64_MAX_PHYS_BITS 50 /* max. number of physical address bits (architected) */ - -/* - * First, define the various bits in a PTE. Note that the PTE format - * matches the VHPT short format, the firt doubleword of the VHPD long - * format, and the first doubleword of the TLB insertion format. - */ -#define _PAGE_P_BIT 0 -#define _PAGE_A_BIT 5 -#define _PAGE_D_BIT 6 - -#define _PAGE_P (1 << _PAGE_P_BIT) /* page present bit */ -#define _PAGE_MA_WB (0x0 << 2) /* write back memory attribute */ -#define _PAGE_MA_UC (0x4 << 2) /* uncacheable memory attribute */ -#define _PAGE_MA_UCE (0x5 << 2) /* UC exported attribute */ -#define _PAGE_MA_WC (0x6 << 2) /* write coalescing memory attribute */ -#define _PAGE_MA_NAT (0x7 << 2) /* not-a-thing attribute */ -#define _PAGE_MA_MASK (0x7 << 2) -#define _PAGE_PL_0 (0 << 7) /* privilege level 0 (kernel) */ -#define _PAGE_PL_1 (1 << 7) /* privilege level 1 (unused) */ -#define _PAGE_PL_2 (2 << 7) /* privilege level 2 (unused) */ -#define _PAGE_PL_3 (3 << 7) /* privilege level 3 (user) */ -#define _PAGE_PL_MASK (3 << 7) -#define _PAGE_AR_R (0 << 9) /* read only */ -#define _PAGE_AR_RX (1 << 9) /* read & execute */ -#define _PAGE_AR_RW (2 << 9) /* read & write */ -#define _PAGE_AR_RWX (3 << 9) /* read, write & execute */ -#define _PAGE_AR_R_RW (4 << 9) /* read / read & write */ -#define _PAGE_AR_RX_RWX (5 << 9) /* read & exec / read, write & exec */ -#define _PAGE_AR_RWX_RW (6 << 9) /* read, write & exec / read & write */ -#define _PAGE_AR_X_RX (7 << 9) /* exec & promote / read & exec */ -#define _PAGE_AR_MASK (7 << 9) -#define _PAGE_AR_SHIFT 9 -#define _PAGE_A (1 << _PAGE_A_BIT) /* page accessed bit */ -#define _PAGE_D (1 << _PAGE_D_BIT) /* page dirty bit */ -#define _PAGE_PPN_MASK (((__IA64_UL(1) << IA64_MAX_PHYS_BITS) - 1) & ~0xfffUL) -#define _PAGE_ED (__IA64_UL(1) << 52) /* exception deferral */ -#define _PAGE_PROTNONE (__IA64_UL(1) << 63) - -/* We borrow bit 7 to store the exclusive marker in swap PTEs. */ -#define _PAGE_SWP_EXCLUSIVE (1 << 7) - -#define _PFN_MASK _PAGE_PPN_MASK -/* Mask of bits which may be changed by pte_modify(); the odd bits are there for _PAGE_PROTNONE */ -#define _PAGE_CHG_MASK (_PAGE_P | _PAGE_PROTNONE | _PAGE_PL_MASK | _PAGE_AR_MASK | _PAGE_ED) - -#define _PAGE_SIZE_4K 12 -#define _PAGE_SIZE_8K 13 -#define _PAGE_SIZE_16K 14 -#define _PAGE_SIZE_64K 16 -#define _PAGE_SIZE_256K 18 -#define _PAGE_SIZE_1M 20 -#define _PAGE_SIZE_4M 22 -#define _PAGE_SIZE_16M 24 -#define _PAGE_SIZE_64M 26 -#define _PAGE_SIZE_256M 28 -#define _PAGE_SIZE_1G 30 -#define _PAGE_SIZE_4G 32 - -#define __ACCESS_BITS _PAGE_ED | _PAGE_A | _PAGE_P | _PAGE_MA_WB -#define __DIRTY_BITS_NO_ED _PAGE_A | _PAGE_P | _PAGE_D | _PAGE_MA_WB -#define __DIRTY_BITS _PAGE_ED | __DIRTY_BITS_NO_ED - -/* - * How many pointers will a page table level hold expressed in shift - */ -#define PTRS_PER_PTD_SHIFT (PAGE_SHIFT-3) - -/* - * Definitions for fourth level: - */ -#define PTRS_PER_PTE (__IA64_UL(1) << (PTRS_PER_PTD_SHIFT)) - -/* - * Definitions for third level: - * - * PMD_SHIFT determines the size of the area a third-level page table - * can map. - */ -#define PMD_SHIFT (PAGE_SHIFT + (PTRS_PER_PTD_SHIFT)) -#define PMD_SIZE (1UL << PMD_SHIFT) -#define PMD_MASK (~(PMD_SIZE-1)) -#define PTRS_PER_PMD (1UL << (PTRS_PER_PTD_SHIFT)) - -#if CONFIG_PGTABLE_LEVELS == 4 -/* - * Definitions for second level: - * - * PUD_SHIFT determines the size of the area a second-level page table - * can map. - */ -#define PUD_SHIFT (PMD_SHIFT + (PTRS_PER_PTD_SHIFT)) -#define PUD_SIZE (1UL << PUD_SHIFT) -#define PUD_MASK (~(PUD_SIZE-1)) -#define PTRS_PER_PUD (1UL << (PTRS_PER_PTD_SHIFT)) -#endif - -/* - * Definitions for first level: - * - * PGDIR_SHIFT determines what a first-level page table entry can map. - */ -#if CONFIG_PGTABLE_LEVELS == 4 -#define PGDIR_SHIFT (PUD_SHIFT + (PTRS_PER_PTD_SHIFT)) -#else -#define PGDIR_SHIFT (PMD_SHIFT + (PTRS_PER_PTD_SHIFT)) -#endif -#define PGDIR_SIZE (__IA64_UL(1) << PGDIR_SHIFT) -#define PGDIR_MASK (~(PGDIR_SIZE-1)) -#define PTRS_PER_PGD_SHIFT PTRS_PER_PTD_SHIFT -#define PTRS_PER_PGD (1UL << PTRS_PER_PGD_SHIFT) -#define USER_PTRS_PER_PGD (5*PTRS_PER_PGD/8) /* regions 0-4 are user regions */ - -/* - * All the normal masks have the "page accessed" bits on, as any time - * they are used, the page is accessed. They are cleared only by the - * page-out routines. - */ -#define PAGE_NONE __pgprot(_PAGE_PROTNONE | _PAGE_A) -#define PAGE_SHARED __pgprot(__ACCESS_BITS | _PAGE_PL_3 | _PAGE_AR_RW) -#define PAGE_READONLY __pgprot(__ACCESS_BITS | _PAGE_PL_3 | _PAGE_AR_R) -#define PAGE_COPY __pgprot(__ACCESS_BITS | _PAGE_PL_3 | _PAGE_AR_R) -#define PAGE_COPY_EXEC __pgprot(__ACCESS_BITS | _PAGE_PL_3 | _PAGE_AR_RX) -#define PAGE_GATE __pgprot(__ACCESS_BITS | _PAGE_PL_0 | _PAGE_AR_X_RX) -#define PAGE_KERNEL __pgprot(__DIRTY_BITS | _PAGE_PL_0 | _PAGE_AR_RWX) -#define PAGE_KERNELRX __pgprot(__ACCESS_BITS | _PAGE_PL_0 | _PAGE_AR_RX) -#define PAGE_KERNEL_UC __pgprot(__DIRTY_BITS | _PAGE_PL_0 | _PAGE_AR_RWX | \ - _PAGE_MA_UC) - -# ifndef __ASSEMBLY__ - -#include /* for mm_struct */ -#include -#include -#include - -/* - * Next come the mappings that determine how mmap() protection bits - * (PROT_EXEC, PROT_READ, PROT_WRITE, PROT_NONE) get implemented. The - * _P version gets used for a private shared memory segment, the _S - * version gets used for a shared memory segment with MAP_SHARED on. - * In a private shared memory segment, we do a copy-on-write if a task - * attempts to write to the page. - */ - /* xwr */ -#define pgd_ERROR(e) printk("%s:%d: bad pgd %016lx.\n", __FILE__, __LINE__, pgd_val(e)) -#if CONFIG_PGTABLE_LEVELS == 4 -#define pud_ERROR(e) printk("%s:%d: bad pud %016lx.\n", __FILE__, __LINE__, pud_val(e)) -#endif -#define pmd_ERROR(e) printk("%s:%d: bad pmd %016lx.\n", __FILE__, __LINE__, pmd_val(e)) -#define pte_ERROR(e) printk("%s:%d: bad pte %016lx.\n", __FILE__, __LINE__, pte_val(e)) - - -/* - * Some definitions to translate between mem_map, PTEs, and page addresses: - */ - - -/* Quick test to see if ADDR is a (potentially) valid physical address. */ -static inline long -ia64_phys_addr_valid (unsigned long addr) -{ - return (addr & (local_cpu_data->unimpl_pa_mask)) == 0; -} - -/* - * Now come the defines and routines to manage and access the three-level - * page table. - */ - - -#define VMALLOC_START (RGN_BASE(RGN_GATE) + 0x200000000UL) -#if defined(CONFIG_SPARSEMEM) && defined(CONFIG_SPARSEMEM_VMEMMAP) -/* SPARSEMEM_VMEMMAP uses half of vmalloc... */ -# define VMALLOC_END (RGN_BASE(RGN_GATE) + (1UL << (4*PAGE_SHIFT - 10))) -# define vmemmap ((struct page *)VMALLOC_END) -#else -# define VMALLOC_END (RGN_BASE(RGN_GATE) + (1UL << (4*PAGE_SHIFT - 9))) -#endif - -/* fs/proc/kcore.c */ -#define kc_vaddr_to_offset(v) ((v) - RGN_BASE(RGN_GATE)) -#define kc_offset_to_vaddr(o) ((o) + RGN_BASE(RGN_GATE)) - -#define RGN_MAP_SHIFT (PGDIR_SHIFT + PTRS_PER_PGD_SHIFT - 3) -#define RGN_MAP_LIMIT ((1UL << RGN_MAP_SHIFT) - PAGE_SIZE) /* per region addr limit */ - -#define PFN_PTE_SHIFT PAGE_SHIFT -/* - * Conversion functions: convert page frame number (pfn) and a protection value to a page - * table entry (pte). - */ -#define pfn_pte(pfn, pgprot) \ -({ pte_t __pte; pte_val(__pte) = ((pfn) << PAGE_SHIFT) | pgprot_val(pgprot); __pte; }) - -/* Extract pfn from pte. */ -#define pte_pfn(_pte) ((pte_val(_pte) & _PFN_MASK) >> PAGE_SHIFT) - -#define mk_pte(page, pgprot) pfn_pte(page_to_pfn(page), (pgprot)) - -/* This takes a physical page address that is used by the remapping functions */ -#define mk_pte_phys(physpage, pgprot) \ -({ pte_t __pte; pte_val(__pte) = physpage + pgprot_val(pgprot); __pte; }) - -#define pte_modify(_pte, newprot) \ - (__pte((pte_val(_pte) & ~_PAGE_CHG_MASK) | (pgprot_val(newprot) & _PAGE_CHG_MASK))) - -#define pte_none(pte) (!pte_val(pte)) -#define pte_present(pte) (pte_val(pte) & (_PAGE_P | _PAGE_PROTNONE)) -#define pte_clear(mm,addr,pte) (pte_val(*(pte)) = 0UL) -/* pte_page() returns the "struct page *" corresponding to the PTE: */ -#define pte_page(pte) virt_to_page(((pte_val(pte) & _PFN_MASK) + PAGE_OFFSET)) - -#define pmd_none(pmd) (!pmd_val(pmd)) -#define pmd_bad(pmd) (!ia64_phys_addr_valid(pmd_val(pmd))) -#define pmd_present(pmd) (pmd_val(pmd) != 0UL) -#define pmd_clear(pmdp) (pmd_val(*(pmdp)) = 0UL) -#define pmd_page_vaddr(pmd) ((unsigned long) __va(pmd_val(pmd) & _PFN_MASK)) -#define pmd_pfn(pmd) ((pmd_val(pmd) & _PFN_MASK) >> PAGE_SHIFT) -#define pmd_page(pmd) virt_to_page((pmd_val(pmd) + PAGE_OFFSET)) - -#define pud_none(pud) (!pud_val(pud)) -#define pud_bad(pud) (!ia64_phys_addr_valid(pud_val(pud))) -#define pud_present(pud) (pud_val(pud) != 0UL) -#define pud_clear(pudp) (pud_val(*(pudp)) = 0UL) -#define pud_pgtable(pud) ((pmd_t *) __va(pud_val(pud) & _PFN_MASK)) -#define pud_page(pud) virt_to_page((pud_val(pud) + PAGE_OFFSET)) - -#if CONFIG_PGTABLE_LEVELS == 4 -#define p4d_none(p4d) (!p4d_val(p4d)) -#define p4d_bad(p4d) (!ia64_phys_addr_valid(p4d_val(p4d))) -#define p4d_present(p4d) (p4d_val(p4d) != 0UL) -#define p4d_clear(p4dp) (p4d_val(*(p4dp)) = 0UL) -#define p4d_pgtable(p4d) ((pud_t *) __va(p4d_val(p4d) & _PFN_MASK)) -#define p4d_page(p4d) virt_to_page((p4d_val(p4d) + PAGE_OFFSET)) -#endif - -/* - * The following have defined behavior only work if pte_present() is true. - */ -#define pte_write(pte) ((unsigned) (((pte_val(pte) & _PAGE_AR_MASK) >> _PAGE_AR_SHIFT) - 2) <= 4) -#define pte_exec(pte) ((pte_val(pte) & _PAGE_AR_RX) != 0) -#define pte_dirty(pte) ((pte_val(pte) & _PAGE_D) != 0) -#define pte_young(pte) ((pte_val(pte) & _PAGE_A) != 0) - -/* - * Note: we convert AR_RWX to AR_RX and AR_RW to AR_R by clearing the 2nd bit in the - * access rights: - */ -#define pte_wrprotect(pte) (__pte(pte_val(pte) & ~_PAGE_AR_RW)) -#define pte_mkwrite_novma(pte) (__pte(pte_val(pte) | _PAGE_AR_RW)) -#define pte_mkold(pte) (__pte(pte_val(pte) & ~_PAGE_A)) -#define pte_mkyoung(pte) (__pte(pte_val(pte) | _PAGE_A)) -#define pte_mkclean(pte) (__pte(pte_val(pte) & ~_PAGE_D)) -#define pte_mkdirty(pte) (__pte(pte_val(pte) | _PAGE_D)) -#define pte_mkhuge(pte) (__pte(pte_val(pte))) - -/* - * Because ia64's Icache and Dcache is not coherent (on a cpu), we need to - * sync icache and dcache when we insert *new* executable page. - * __ia64_sync_icache_dcache() check Pg_arch_1 bit and flush icache - * if necessary. - * - * set_pte() is also called by the kernel, but we can expect that the kernel - * flushes icache explicitly if necessary. - */ -#define pte_present_exec_user(pte)\ - ((pte_val(pte) & (_PAGE_P | _PAGE_PL_MASK | _PAGE_AR_RX)) == \ - (_PAGE_P | _PAGE_PL_3 | _PAGE_AR_RX)) - -extern void __ia64_sync_icache_dcache(pte_t pteval); -static inline void set_pte(pte_t *ptep, pte_t pteval) -{ - /* page is present && page is user && page is executable - * && (page swapin or new page or page migration - * || copy_on_write with page copying.) - */ - if (pte_present_exec_user(pteval) && - (!pte_present(*ptep) || - pte_pfn(*ptep) != pte_pfn(pteval))) - /* load_module() calles flush_icache_range() explicitly*/ - __ia64_sync_icache_dcache(pteval); - *ptep = pteval; -} - -/* - * Make page protection values cacheable, uncacheable, or write- - * combining. Note that "protection" is really a misnomer here as the - * protection value contains the memory attribute bits, dirty bits, and - * various other bits as well. - */ -#define pgprot_cacheable(prot) __pgprot((pgprot_val(prot) & ~_PAGE_MA_MASK) | _PAGE_MA_WB) -#define pgprot_noncached(prot) __pgprot((pgprot_val(prot) & ~_PAGE_MA_MASK) | _PAGE_MA_UC) -#define pgprot_writecombine(prot) __pgprot((pgprot_val(prot) & ~_PAGE_MA_MASK) | _PAGE_MA_WC) - -struct file; -extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, - unsigned long size, pgprot_t vma_prot); -#define __HAVE_PHYS_MEM_ACCESS_PROT - -static inline unsigned long -pgd_index (unsigned long address) -{ - unsigned long region = address >> 61; - unsigned long l1index = (address >> PGDIR_SHIFT) & ((PTRS_PER_PGD >> 3) - 1); - - return (region << (PAGE_SHIFT - 6)) | l1index; -} -#define pgd_index pgd_index - -/* - * In the kernel's mapped region we know everything is in region number 5, so - * as an optimisation its PGD already points to the area for that region. - * However, this also means that we cannot use pgd_index() and we must - * never add the region here. - */ -#define pgd_offset_k(addr) \ - (init_mm.pgd + (((addr) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1))) - -/* Look up a pgd entry in the gate area. On IA-64, the gate-area - resides in the kernel-mapped segment, hence we use pgd_offset_k() - here. */ -#define pgd_offset_gate(mm, addr) pgd_offset_k(addr) - -/* atomic versions of the some PTE manipulations: */ - -static inline int -ptep_test_and_clear_young (struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) -{ -#ifdef CONFIG_SMP - if (!pte_young(*ptep)) - return 0; - return test_and_clear_bit(_PAGE_A_BIT, ptep); -#else - pte_t pte = *ptep; - if (!pte_young(pte)) - return 0; - set_pte_at(vma->vm_mm, addr, ptep, pte_mkold(pte)); - return 1; -#endif -} - -static inline pte_t -ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) -{ -#ifdef CONFIG_SMP - return __pte(xchg((long *) ptep, 0)); -#else - pte_t pte = *ptep; - pte_clear(mm, addr, ptep); - return pte; -#endif -} - -static inline void -ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep) -{ -#ifdef CONFIG_SMP - unsigned long new, old; - - do { - old = pte_val(*ptep); - new = pte_val(pte_wrprotect(__pte (old))); - } while (cmpxchg((unsigned long *) ptep, old, new) != old); -#else - pte_t old_pte = *ptep; - set_pte_at(mm, addr, ptep, pte_wrprotect(old_pte)); -#endif -} - -static inline int -pte_same (pte_t a, pte_t b) -{ - return pte_val(a) == pte_val(b); -} - -#define update_mmu_cache_range(vmf, vma, address, ptep, nr) do { } while (0) -#define update_mmu_cache(vma, address, ptep) do { } while (0) - -extern pgd_t swapper_pg_dir[PTRS_PER_PGD]; -extern void paging_init (void); - -/* - * Encode/decode swap entries and swap PTEs. Swap PTEs are all PTEs that - * are !pte_none() && !pte_present(). - * - * Note: The macros below rely on the fact that MAX_SWAPFILES_SHIFT <= number of - * bits in the swap-type field of the swap pte. It would be nice to - * enforce that, but we can't easily include here. - * (Of course, better still would be to define MAX_SWAPFILES_SHIFT here...). - * - * Format of swap pte: - * bit 0 : present bit (must be zero) - * bits 1- 6: swap type - * bit 7 : exclusive marker - * bits 8-62: swap offset - * bit 63 : _PAGE_PROTNONE bit - */ -#define __swp_type(entry) (((entry).val >> 1) & 0x3f) -#define __swp_offset(entry) (((entry).val << 1) >> 9) -#define __swp_entry(type, offset) ((swp_entry_t) { ((type & 0x3f) << 1) | \ - ((long) (offset) << 8) }) -#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) -#define __swp_entry_to_pte(x) ((pte_t) { (x).val }) - -static inline int pte_swp_exclusive(pte_t pte) -{ - return pte_val(pte) & _PAGE_SWP_EXCLUSIVE; -} - -static inline pte_t pte_swp_mkexclusive(pte_t pte) -{ - pte_val(pte) |= _PAGE_SWP_EXCLUSIVE; - return pte; -} - -static inline pte_t pte_swp_clear_exclusive(pte_t pte) -{ - pte_val(pte) &= ~_PAGE_SWP_EXCLUSIVE; - return pte; -} - -/* - * ZERO_PAGE is a global shared page that is always zero: used - * for zero-mapped memory areas etc.. - */ -extern unsigned long empty_zero_page[PAGE_SIZE/sizeof(unsigned long)]; -extern struct page *zero_page_memmap_ptr; -#define ZERO_PAGE(vaddr) (zero_page_memmap_ptr) - -/* We provide our own get_unmapped_area to cope with VA holes for userland */ -#define HAVE_ARCH_UNMAPPED_AREA - -#ifdef CONFIG_HUGETLB_PAGE -#define HUGETLB_PGDIR_SHIFT (HPAGE_SHIFT + 2*(PAGE_SHIFT-3)) -#define HUGETLB_PGDIR_SIZE (__IA64_UL(1) << HUGETLB_PGDIR_SHIFT) -#define HUGETLB_PGDIR_MASK (~(HUGETLB_PGDIR_SIZE-1)) -#endif - - -#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS -/* - * Update PTEP with ENTRY, which is guaranteed to be a less - * restrictive PTE. That is, ENTRY may have the ACCESSED, DIRTY, and - * WRITABLE bits turned on, when the value at PTEP did not. The - * WRITABLE bit may only be turned if SAFELY_WRITABLE is TRUE. - * - * SAFELY_WRITABLE is TRUE if we can update the value at PTEP without - * having to worry about races. On SMP machines, there are only two - * cases where this is true: - * - * (1) *PTEP has the PRESENT bit turned OFF - * (2) ENTRY has the DIRTY bit turned ON - * - * On ia64, we could implement this routine with a cmpxchg()-loop - * which ORs in the _PAGE_A/_PAGE_D bit if they're set in ENTRY. - * However, like on x86, we can get a more streamlined version by - * observing that it is OK to drop ACCESSED bit updates when - * SAFELY_WRITABLE is FALSE. Besides being rare, all that would do is - * result in an extra Access-bit fault, which would then turn on the - * ACCESSED bit in the low-level fault handler (iaccess_bit or - * daccess_bit in ivt.S). - */ -#ifdef CONFIG_SMP -# define ptep_set_access_flags(__vma, __addr, __ptep, __entry, __safely_writable) \ -({ \ - int __changed = !pte_same(*(__ptep), __entry); \ - if (__changed && __safely_writable) { \ - set_pte(__ptep, __entry); \ - flush_tlb_page(__vma, __addr); \ - } \ - __changed; \ -}) -#else -# define ptep_set_access_flags(__vma, __addr, __ptep, __entry, __safely_writable) \ -({ \ - int __changed = !pte_same(*(__ptep), __entry); \ - if (__changed) { \ - set_pte_at((__vma)->vm_mm, (__addr), __ptep, __entry); \ - flush_tlb_page(__vma, __addr); \ - } \ - __changed; \ -}) -#endif -# endif /* !__ASSEMBLY__ */ - -/* - * Identity-mapped regions use a large page size. We'll call such large pages - * "granules". If you can think of a better name that's unambiguous, let me - * know... - */ -#if defined(CONFIG_IA64_GRANULE_64MB) -# define IA64_GRANULE_SHIFT _PAGE_SIZE_64M -#elif defined(CONFIG_IA64_GRANULE_16MB) -# define IA64_GRANULE_SHIFT _PAGE_SIZE_16M -#endif -#define IA64_GRANULE_SIZE (1 << IA64_GRANULE_SHIFT) -/* - * log2() of the page size we use to map the kernel image (IA64_TR_KERNEL): - */ -#define KERNEL_TR_PAGE_SHIFT _PAGE_SIZE_64M -#define KERNEL_TR_PAGE_SIZE (1 << KERNEL_TR_PAGE_SHIFT) - -/* These tell get_user_pages() that the first gate page is accessible from user-level. */ -#define FIXADDR_USER_START GATE_ADDR -#ifdef HAVE_BUGGY_SEGREL -# define FIXADDR_USER_END (GATE_ADDR + 2*PAGE_SIZE) -#else -# define FIXADDR_USER_END (GATE_ADDR + 2*PERCPU_PAGE_SIZE) -#endif - -#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG -#define __HAVE_ARCH_PTEP_GET_AND_CLEAR -#define __HAVE_ARCH_PTEP_SET_WRPROTECT -#define __HAVE_ARCH_PTE_SAME -#define __HAVE_ARCH_PGD_OFFSET_GATE - - -#if CONFIG_PGTABLE_LEVELS == 3 -#include -#endif -#include - -#endif /* _ASM_IA64_PGTABLE_H */ diff --git a/arch/ia64/include/asm/processor.h b/arch/ia64/include/asm/processor.h deleted file mode 100644 index 47e3801b526a..000000000000 --- a/arch/ia64/include/asm/processor.h +++ /dev/null @@ -1,660 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_IA64_PROCESSOR_H -#define _ASM_IA64_PROCESSOR_H - -/* - * Copyright (C) 1998-2004 Hewlett-Packard Co - * David Mosberger-Tang - * Stephane Eranian - * Copyright (C) 1999 Asit Mallick - * Copyright (C) 1999 Don Dugger - * - * 11/24/98 S.Eranian added ia64_set_iva() - * 12/03/99 D. Mosberger implement thread_saved_pc() via kernel unwind API - * 06/16/00 A. Mallick added csd/ssd/tssd for ia32 support - */ - - -#include -#include -#include -#include - -#define IA64_NUM_PHYS_STACK_REG 96 -#define IA64_NUM_DBG_REGS 8 - -#define DEFAULT_MAP_BASE __IA64_UL_CONST(0x2000000000000000) -#define DEFAULT_TASK_SIZE __IA64_UL_CONST(0xa000000000000000) - -/* - * TASK_SIZE really is a mis-named. It really is the maximum user - * space address (plus one). On IA-64, there are five regions of 2TB - * each (assuming 8KB page size), for a total of 8TB of user virtual - * address space. - */ -#define TASK_SIZE DEFAULT_TASK_SIZE - -/* - * This decides where the kernel will search for a free chunk of vm - * space during mmap's. - */ -#define TASK_UNMAPPED_BASE (current->thread.map_base) - -#define IA64_THREAD_FPH_VALID (__IA64_UL(1) << 0) /* floating-point high state valid? */ -#define IA64_THREAD_DBG_VALID (__IA64_UL(1) << 1) /* debug registers valid? */ -#define IA64_THREAD_PM_VALID (__IA64_UL(1) << 2) /* performance registers valid? */ -#define IA64_THREAD_UAC_NOPRINT (__IA64_UL(1) << 3) /* don't log unaligned accesses */ -#define IA64_THREAD_UAC_SIGBUS (__IA64_UL(1) << 4) /* generate SIGBUS on unaligned acc. */ -#define IA64_THREAD_MIGRATION (__IA64_UL(1) << 5) /* require migration - sync at ctx sw */ -#define IA64_THREAD_FPEMU_NOPRINT (__IA64_UL(1) << 6) /* don't log any fpswa faults */ -#define IA64_THREAD_FPEMU_SIGFPE (__IA64_UL(1) << 7) /* send a SIGFPE for fpswa faults */ - -#define IA64_THREAD_UAC_SHIFT 3 -#define IA64_THREAD_UAC_MASK (IA64_THREAD_UAC_NOPRINT | IA64_THREAD_UAC_SIGBUS) -#define IA64_THREAD_FPEMU_SHIFT 6 -#define IA64_THREAD_FPEMU_MASK (IA64_THREAD_FPEMU_NOPRINT | IA64_THREAD_FPEMU_SIGFPE) - - -/* - * This shift should be large enough to be able to represent 1000000000/itc_freq with good - * accuracy while being small enough to fit 10*1000000000< -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#ifdef CONFIG_NUMA -#include -#endif - -/* like above but expressed as bitfields for more efficient access: */ -struct ia64_psr { - __u64 reserved0 : 1; - __u64 be : 1; - __u64 up : 1; - __u64 ac : 1; - __u64 mfl : 1; - __u64 mfh : 1; - __u64 reserved1 : 7; - __u64 ic : 1; - __u64 i : 1; - __u64 pk : 1; - __u64 reserved2 : 1; - __u64 dt : 1; - __u64 dfl : 1; - __u64 dfh : 1; - __u64 sp : 1; - __u64 pp : 1; - __u64 di : 1; - __u64 si : 1; - __u64 db : 1; - __u64 lp : 1; - __u64 tb : 1; - __u64 rt : 1; - __u64 reserved3 : 4; - __u64 cpl : 2; - __u64 is : 1; - __u64 mc : 1; - __u64 it : 1; - __u64 id : 1; - __u64 da : 1; - __u64 dd : 1; - __u64 ss : 1; - __u64 ri : 2; - __u64 ed : 1; - __u64 bn : 1; - __u64 reserved4 : 19; -}; - -union ia64_isr { - __u64 val; - struct { - __u64 code : 16; - __u64 vector : 8; - __u64 reserved1 : 8; - __u64 x : 1; - __u64 w : 1; - __u64 r : 1; - __u64 na : 1; - __u64 sp : 1; - __u64 rs : 1; - __u64 ir : 1; - __u64 ni : 1; - __u64 so : 1; - __u64 ei : 2; - __u64 ed : 1; - __u64 reserved2 : 20; - }; -}; - -union ia64_lid { - __u64 val; - struct { - __u64 rv : 16; - __u64 eid : 8; - __u64 id : 8; - __u64 ig : 32; - }; -}; - -union ia64_tpr { - __u64 val; - struct { - __u64 ig0 : 4; - __u64 mic : 4; - __u64 rsv : 8; - __u64 mmi : 1; - __u64 ig1 : 47; - }; -}; - -union ia64_itir { - __u64 val; - struct { - __u64 rv3 : 2; /* 0-1 */ - __u64 ps : 6; /* 2-7 */ - __u64 key : 24; /* 8-31 */ - __u64 rv4 : 32; /* 32-63 */ - }; -}; - -union ia64_rr { - __u64 val; - struct { - __u64 ve : 1; /* enable hw walker */ - __u64 reserved0: 1; /* reserved */ - __u64 ps : 6; /* log page size */ - __u64 rid : 24; /* region id */ - __u64 reserved1: 32; /* reserved */ - }; -}; - -/* - * CPU type, hardware bug flags, and per-CPU state. Frequently used - * state comes earlier: - */ -struct cpuinfo_ia64 { - unsigned int softirq_pending; - unsigned long itm_delta; /* # of clock cycles between clock ticks */ - unsigned long itm_next; /* interval timer mask value to use for next clock tick */ - unsigned long nsec_per_cyc; /* (1000000000<thread.flags = (((task)->thread.flags & ~IA64_THREAD_UAC_MASK) \ - | (((value) << IA64_THREAD_UAC_SHIFT) & IA64_THREAD_UAC_MASK)); \ - 0; \ -}) -#define GET_UNALIGN_CTL(task,addr) \ -({ \ - put_user(((task)->thread.flags & IA64_THREAD_UAC_MASK) >> IA64_THREAD_UAC_SHIFT, \ - (int __user *) (addr)); \ -}) - -#define SET_FPEMU_CTL(task,value) \ -({ \ - (task)->thread.flags = (((task)->thread.flags & ~IA64_THREAD_FPEMU_MASK) \ - | (((value) << IA64_THREAD_FPEMU_SHIFT) & IA64_THREAD_FPEMU_MASK)); \ - 0; \ -}) -#define GET_FPEMU_CTL(task,addr) \ -({ \ - put_user(((task)->thread.flags & IA64_THREAD_FPEMU_MASK) >> IA64_THREAD_FPEMU_SHIFT, \ - (int __user *) (addr)); \ -}) - -struct thread_struct { - __u32 flags; /* various thread flags (see IA64_THREAD_*) */ - /* writing on_ustack is performance-critical, so it's worth spending 8 bits on it... */ - __u8 on_ustack; /* executing on user-stacks? */ - __u8 pad[3]; - __u64 ksp; /* kernel stack pointer */ - __u64 map_base; /* base address for get_unmapped_area() */ - __u64 rbs_bot; /* the base address for the RBS */ - int last_fph_cpu; /* CPU that may hold the contents of f32-f127 */ - unsigned long dbr[IA64_NUM_DBG_REGS]; - unsigned long ibr[IA64_NUM_DBG_REGS]; - struct ia64_fpreg fph[96]; /* saved/loaded on demand */ -}; - -#define INIT_THREAD { \ - .flags = 0, \ - .on_ustack = 0, \ - .ksp = 0, \ - .map_base = DEFAULT_MAP_BASE, \ - .rbs_bot = STACK_TOP - DEFAULT_USER_STACK_SIZE, \ - .last_fph_cpu = -1, \ - .dbr = {0, }, \ - .ibr = {0, }, \ - .fph = {{{{0}}}, } \ -} - -#define start_thread(regs,new_ip,new_sp) do { \ - regs->cr_ipsr = ((regs->cr_ipsr | (IA64_PSR_BITS_TO_SET | IA64_PSR_CPL)) \ - & ~(IA64_PSR_BITS_TO_CLEAR | IA64_PSR_RI | IA64_PSR_IS)); \ - regs->cr_iip = new_ip; \ - regs->ar_rsc = 0xf; /* eager mode, privilege level 3 */ \ - regs->ar_rnat = 0; \ - regs->ar_bspstore = current->thread.rbs_bot; \ - regs->ar_fpsr = FPSR_DEFAULT; \ - regs->loadrs = 0; \ - regs->r8 = get_dumpable(current->mm); /* set "don't zap registers" flag */ \ - regs->r12 = new_sp - 16; /* allocate 16 byte scratch area */ \ - if (unlikely(get_dumpable(current->mm) != SUID_DUMP_USER)) { \ - /* \ - * Zap scratch regs to avoid leaking bits between processes with different \ - * uid/privileges. \ - */ \ - regs->ar_pfs = 0; regs->b0 = 0; regs->pr = 0; \ - regs->r1 = 0; regs->r9 = 0; regs->r11 = 0; regs->r13 = 0; regs->r15 = 0; \ - } \ -} while (0) - -/* Forward declarations, a strange C thing... */ -struct mm_struct; -struct task_struct; - -/* Get wait channel for task P. */ -extern unsigned long __get_wchan (struct task_struct *p); - -/* Return instruction pointer of blocked task TSK. */ -#define KSTK_EIP(tsk) \ - ({ \ - struct pt_regs *_regs = task_pt_regs(tsk); \ - _regs->cr_iip + ia64_psr(_regs)->ri; \ - }) - -/* Return stack pointer of blocked task TSK. */ -#define KSTK_ESP(tsk) ((tsk)->thread.ksp) - -extern void ia64_getreg_unknown_kr (void); -extern void ia64_setreg_unknown_kr (void); - -#define ia64_get_kr(regnum) \ -({ \ - unsigned long r = 0; \ - \ - switch (regnum) { \ - case 0: r = ia64_getreg(_IA64_REG_AR_KR0); break; \ - case 1: r = ia64_getreg(_IA64_REG_AR_KR1); break; \ - case 2: r = ia64_getreg(_IA64_REG_AR_KR2); break; \ - case 3: r = ia64_getreg(_IA64_REG_AR_KR3); break; \ - case 4: r = ia64_getreg(_IA64_REG_AR_KR4); break; \ - case 5: r = ia64_getreg(_IA64_REG_AR_KR5); break; \ - case 6: r = ia64_getreg(_IA64_REG_AR_KR6); break; \ - case 7: r = ia64_getreg(_IA64_REG_AR_KR7); break; \ - default: ia64_getreg_unknown_kr(); break; \ - } \ - r; \ -}) - -#define ia64_set_kr(regnum, r) \ -({ \ - switch (regnum) { \ - case 0: ia64_setreg(_IA64_REG_AR_KR0, r); break; \ - case 1: ia64_setreg(_IA64_REG_AR_KR1, r); break; \ - case 2: ia64_setreg(_IA64_REG_AR_KR2, r); break; \ - case 3: ia64_setreg(_IA64_REG_AR_KR3, r); break; \ - case 4: ia64_setreg(_IA64_REG_AR_KR4, r); break; \ - case 5: ia64_setreg(_IA64_REG_AR_KR5, r); break; \ - case 6: ia64_setreg(_IA64_REG_AR_KR6, r); break; \ - case 7: ia64_setreg(_IA64_REG_AR_KR7, r); break; \ - default: ia64_setreg_unknown_kr(); break; \ - } \ -}) - -/* - * The following three macros can't be inline functions because we don't have struct - * task_struct at this point. - */ - -/* - * Return TRUE if task T owns the fph partition of the CPU we're running on. - * Must be called from code that has preemption disabled. - */ -#define ia64_is_local_fpu_owner(t) \ -({ \ - struct task_struct *__ia64_islfo_task = (t); \ - (__ia64_islfo_task->thread.last_fph_cpu == smp_processor_id() \ - && __ia64_islfo_task == (struct task_struct *) ia64_get_kr(IA64_KR_FPU_OWNER)); \ -}) - -/* - * Mark task T as owning the fph partition of the CPU we're running on. - * Must be called from code that has preemption disabled. - */ -#define ia64_set_local_fpu_owner(t) do { \ - struct task_struct *__ia64_slfo_task = (t); \ - __ia64_slfo_task->thread.last_fph_cpu = smp_processor_id(); \ - ia64_set_kr(IA64_KR_FPU_OWNER, (unsigned long) __ia64_slfo_task); \ -} while (0) - -/* Mark the fph partition of task T as being invalid on all CPUs. */ -#define ia64_drop_fpu(t) ((t)->thread.last_fph_cpu = -1) - -extern void __ia64_init_fpu (void); -extern void __ia64_save_fpu (struct ia64_fpreg *fph); -extern void __ia64_load_fpu (struct ia64_fpreg *fph); -extern void ia64_save_debug_regs (unsigned long *save_area); -extern void ia64_load_debug_regs (unsigned long *save_area); - -#define ia64_fph_enable() do { ia64_rsm(IA64_PSR_DFH); ia64_srlz_d(); } while (0) -#define ia64_fph_disable() do { ia64_ssm(IA64_PSR_DFH); ia64_srlz_d(); } while (0) - -/* load fp 0.0 into fph */ -static inline void -ia64_init_fpu (void) { - ia64_fph_enable(); - __ia64_init_fpu(); - ia64_fph_disable(); -} - -/* save f32-f127 at FPH */ -static inline void -ia64_save_fpu (struct ia64_fpreg *fph) { - ia64_fph_enable(); - __ia64_save_fpu(fph); - ia64_fph_disable(); -} - -/* load f32-f127 from FPH */ -static inline void -ia64_load_fpu (struct ia64_fpreg *fph) { - ia64_fph_enable(); - __ia64_load_fpu(fph); - ia64_fph_disable(); -} - -static inline __u64 -ia64_clear_ic (void) -{ - __u64 psr; - psr = ia64_getreg(_IA64_REG_PSR); - ia64_stop(); - ia64_rsm(IA64_PSR_I | IA64_PSR_IC); - ia64_srlz_i(); - return psr; -} - -/* - * Restore the psr. - */ -static inline void -ia64_set_psr (__u64 psr) -{ - ia64_stop(); - ia64_setreg(_IA64_REG_PSR_L, psr); - ia64_srlz_i(); -} - -/* - * Insert a translation into an instruction and/or data translation - * register. - */ -static inline void -ia64_itr (__u64 target_mask, __u64 tr_num, - __u64 vmaddr, __u64 pte, - __u64 log_page_size) -{ - ia64_setreg(_IA64_REG_CR_ITIR, (log_page_size << 2)); - ia64_setreg(_IA64_REG_CR_IFA, vmaddr); - ia64_stop(); - if (target_mask & 0x1) - ia64_itri(tr_num, pte); - if (target_mask & 0x2) - ia64_itrd(tr_num, pte); -} - -/* - * Insert a translation into the instruction and/or data translation - * cache. - */ -static inline void -ia64_itc (__u64 target_mask, __u64 vmaddr, __u64 pte, - __u64 log_page_size) -{ - ia64_setreg(_IA64_REG_CR_ITIR, (log_page_size << 2)); - ia64_setreg(_IA64_REG_CR_IFA, vmaddr); - ia64_stop(); - /* as per EAS2.6, itc must be the last instruction in an instruction group */ - if (target_mask & 0x1) - ia64_itci(pte); - if (target_mask & 0x2) - ia64_itcd(pte); -} - -/* - * Purge a range of addresses from instruction and/or data translation - * register(s). - */ -static inline void -ia64_ptr (__u64 target_mask, __u64 vmaddr, __u64 log_size) -{ - if (target_mask & 0x1) - ia64_ptri(vmaddr, (log_size << 2)); - if (target_mask & 0x2) - ia64_ptrd(vmaddr, (log_size << 2)); -} - -/* Set the interrupt vector address. The address must be suitably aligned (32KB). */ -static inline void -ia64_set_iva (void *ivt_addr) -{ - ia64_setreg(_IA64_REG_CR_IVA, (__u64) ivt_addr); - ia64_srlz_i(); -} - -/* Set the page table address and control bits. */ -static inline void -ia64_set_pta (__u64 pta) -{ - /* Note: srlz.i implies srlz.d */ - ia64_setreg(_IA64_REG_CR_PTA, pta); - ia64_srlz_i(); -} - -static inline void -ia64_eoi (void) -{ - ia64_setreg(_IA64_REG_CR_EOI, 0); - ia64_srlz_d(); -} - -#define cpu_relax() ia64_hint(ia64_hint_pause) - -static inline int -ia64_get_irr(unsigned int vector) -{ - unsigned int reg = vector / 64; - unsigned int bit = vector % 64; - unsigned long irr; - - switch (reg) { - case 0: irr = ia64_getreg(_IA64_REG_CR_IRR0); break; - case 1: irr = ia64_getreg(_IA64_REG_CR_IRR1); break; - case 2: irr = ia64_getreg(_IA64_REG_CR_IRR2); break; - case 3: irr = ia64_getreg(_IA64_REG_CR_IRR3); break; - } - - return test_bit(bit, &irr); -} - -static inline void -ia64_set_lrr0 (unsigned long val) -{ - ia64_setreg(_IA64_REG_CR_LRR0, val); - ia64_srlz_d(); -} - -static inline void -ia64_set_lrr1 (unsigned long val) -{ - ia64_setreg(_IA64_REG_CR_LRR1, val); - ia64_srlz_d(); -} - - -/* - * Given the address to which a spill occurred, return the unat bit - * number that corresponds to this address. - */ -static inline __u64 -ia64_unat_pos (void *spill_addr) -{ - return ((__u64) spill_addr >> 3) & 0x3f; -} - -/* - * Set the NaT bit of an integer register which was spilled at address - * SPILL_ADDR. UNAT is the mask to be updated. - */ -static inline void -ia64_set_unat (__u64 *unat, void *spill_addr, unsigned long nat) -{ - __u64 bit = ia64_unat_pos(spill_addr); - __u64 mask = 1UL << bit; - - *unat = (*unat & ~mask) | (nat << bit); -} - -static inline __u64 -ia64_get_ivr (void) -{ - __u64 r; - ia64_srlz_d(); - r = ia64_getreg(_IA64_REG_CR_IVR); - ia64_srlz_d(); - return r; -} - -static inline void -ia64_set_dbr (__u64 regnum, __u64 value) -{ - __ia64_set_dbr(regnum, value); -#ifdef CONFIG_ITANIUM - ia64_srlz_d(); -#endif -} - -static inline __u64 -ia64_get_dbr (__u64 regnum) -{ - __u64 retval; - - retval = __ia64_get_dbr(regnum); -#ifdef CONFIG_ITANIUM - ia64_srlz_d(); -#endif - return retval; -} - -static inline __u64 -ia64_rotr (__u64 w, __u64 n) -{ - return (w >> n) | (w << (64 - n)); -} - -#define ia64_rotl(w,n) ia64_rotr((w), (64) - (n)) - -/* - * Take a mapped kernel address and return the equivalent address - * in the region 7 identity mapped virtual area. - */ -static inline void * -ia64_imva (void *addr) -{ - void *result; - result = (void *) ia64_tpa(addr); - return __va(result); -} - -#define ARCH_HAS_PREFETCH -#define ARCH_HAS_PREFETCHW -#define PREFETCH_STRIDE L1_CACHE_BYTES - -static inline void -prefetch (const void *x) -{ - ia64_lfetch(ia64_lfhint_none, x); -} - -static inline void -prefetchw (const void *x) -{ - ia64_lfetch_excl(ia64_lfhint_none, x); -} - -extern unsigned long boot_option_idle_override; - -enum idle_boot_override {IDLE_NO_OVERRIDE=0, IDLE_HALT, IDLE_FORCE_MWAIT, - IDLE_NOMWAIT, IDLE_POLL}; - -void default_idle(void); - -#endif /* !__ASSEMBLY__ */ - -#endif /* _ASM_IA64_PROCESSOR_H */ diff --git a/arch/ia64/include/asm/ptrace.h b/arch/ia64/include/asm/ptrace.h deleted file mode 100644 index 402874489890..000000000000 --- a/arch/ia64/include/asm/ptrace.h +++ /dev/null @@ -1,146 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Copyright (C) 1998-2004 Hewlett-Packard Co - * David Mosberger-Tang - * Stephane Eranian - * Copyright (C) 2003 Intel Co - * Suresh Siddha - * Fenghua Yu - * Arun Sharma - * - * 12/07/98 S. Eranian added pt_regs & switch_stack - * 12/21/98 D. Mosberger updated to match latest code - * 6/17/99 D. Mosberger added second unat member to "struct switch_stack" - * - */ -#ifndef _ASM_IA64_PTRACE_H -#define _ASM_IA64_PTRACE_H - -#ifndef ASM_OFFSETS_C -#include -#endif -#include - -/* - * Base-2 logarithm of number of pages to allocate per task structure - * (including register backing store and memory stack): - */ -#if defined(CONFIG_IA64_PAGE_SIZE_4KB) -# define KERNEL_STACK_SIZE_ORDER 3 -#elif defined(CONFIG_IA64_PAGE_SIZE_8KB) -# define KERNEL_STACK_SIZE_ORDER 2 -#elif defined(CONFIG_IA64_PAGE_SIZE_16KB) -# define KERNEL_STACK_SIZE_ORDER 1 -#else -# define KERNEL_STACK_SIZE_ORDER 0 -#endif - -#define IA64_RBS_OFFSET ((IA64_TASK_SIZE + IA64_THREAD_INFO_SIZE + 31) & ~31) -#define IA64_STK_OFFSET ((1 << KERNEL_STACK_SIZE_ORDER)*PAGE_SIZE) - -#define KERNEL_STACK_SIZE IA64_STK_OFFSET - -#ifndef __ASSEMBLY__ - -#include -#include - -/* - * We use the ia64_psr(regs)->ri to determine which of the three - * instructions in bundle (16 bytes) took the sample. Generate - * the canonical representation by adding to instruction pointer. - */ -# define instruction_pointer(regs) ((regs)->cr_iip + ia64_psr(regs)->ri) -# define instruction_pointer_set(regs, val) \ -({ \ - ia64_psr(regs)->ri = (val & 0xf); \ - regs->cr_iip = (val & ~0xfULL); \ -}) - -static inline unsigned long user_stack_pointer(struct pt_regs *regs) -{ - return regs->r12; -} - -static inline int is_syscall_success(struct pt_regs *regs) -{ - return regs->r10 != -1; -} - -static inline long regs_return_value(struct pt_regs *regs) -{ - if (is_syscall_success(regs)) - return regs->r8; - else - return -regs->r8; -} - -/* Conserve space in histogram by encoding slot bits in address - * bits 2 and 3 rather than bits 0 and 1. - */ -#define profile_pc(regs) \ -({ \ - unsigned long __ip = instruction_pointer(regs); \ - (__ip & ~3UL) + ((__ip & 3UL) << 2); \ -}) - - /* given a pointer to a task_struct, return the user's pt_regs */ -# define task_pt_regs(t) (((struct pt_regs *) ((char *) (t) + IA64_STK_OFFSET)) - 1) -# define ia64_psr(regs) ((struct ia64_psr *) &(regs)->cr_ipsr) -# define user_mode(regs) (((struct ia64_psr *) &(regs)->cr_ipsr)->cpl != 0) -# define user_stack(task,regs) ((long) regs - (long) task == IA64_STK_OFFSET - sizeof(*regs)) -# define fsys_mode(task,regs) \ - ({ \ - struct task_struct *_task = (task); \ - struct pt_regs *_regs = (regs); \ - !user_mode(_regs) && user_stack(_task, _regs); \ - }) - - /* - * System call handlers that, upon successful completion, need to return a negative value - * should call force_successful_syscall_return() right before returning. On architectures - * where the syscall convention provides for a separate error flag (e.g., alpha, ia64, - * ppc{,64}, sparc{,64}, possibly others), this macro can be used to ensure that the error - * flag will not get set. On architectures which do not support a separate error flag, - * the macro is a no-op and the spurious error condition needs to be filtered out by some - * other means (e.g., in user-level, by passing an extra argument to the syscall handler, - * or something along those lines). - * - * On ia64, we can clear the user's pt_regs->r8 to force a successful syscall. - */ -# define force_successful_syscall_return() (task_pt_regs(current)->r8 = 0) - - struct task_struct; /* forward decl */ - struct unw_frame_info; /* forward decl */ - - extern unsigned long ia64_get_user_rbs_end (struct task_struct *, struct pt_regs *, - unsigned long *); - extern long ia64_peek (struct task_struct *, struct switch_stack *, unsigned long, - unsigned long, long *); - extern long ia64_poke (struct task_struct *, struct switch_stack *, unsigned long, - unsigned long, long); - extern void ia64_flush_fph (struct task_struct *); - extern void ia64_sync_fph (struct task_struct *); - extern void ia64_sync_krbs(void); - extern long ia64_sync_user_rbs (struct task_struct *, struct switch_stack *, - unsigned long, unsigned long); - - /* get nat bits for scratch registers such that bit N==1 iff scratch register rN is a NaT */ - extern unsigned long ia64_get_scratch_nat_bits (struct pt_regs *pt, unsigned long scratch_unat); - /* put nat bits for scratch registers such that scratch register rN is a NaT iff bit N==1 */ - extern unsigned long ia64_put_scratch_nat_bits (struct pt_regs *pt, unsigned long nat); - - extern void ia64_increment_ip (struct pt_regs *pt); - extern void ia64_decrement_ip (struct pt_regs *pt); - - extern void ia64_ptrace_stop(void); - #define arch_ptrace_stop() \ - ia64_ptrace_stop() - #define arch_ptrace_stop_needed() \ - (!test_thread_flag(TIF_RESTORE_RSE)) - - #define arch_has_single_step() (1) - #define arch_has_block_step() (1) - -#endif /* !__ASSEMBLY__ */ -#endif /* _ASM_IA64_PTRACE_H */ diff --git a/arch/ia64/include/asm/sal.h b/arch/ia64/include/asm/sal.h deleted file mode 100644 index 22749a201e92..000000000000 --- a/arch/ia64/include/asm/sal.h +++ /dev/null @@ -1,919 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_IA64_SAL_H -#define _ASM_IA64_SAL_H - -/* - * System Abstraction Layer definitions. - * - * This is based on version 2.5 of the manual "IA-64 System - * Abstraction Layer". - * - * Copyright (C) 2001 Intel - * Copyright (C) 2002 Jenna Hall - * Copyright (C) 2001 Fred Lewis - * Copyright (C) 1998, 1999, 2001, 2003 Hewlett-Packard Co - * David Mosberger-Tang - * Copyright (C) 1999 Srinivasa Prasad Thirumalachar - * - * 02/01/04 J. Hall Updated Error Record Structures to conform to July 2001 - * revision of the SAL spec. - * 01/01/03 fvlewis Updated Error Record Structures to conform with Nov. 2000 - * revision of the SAL spec. - * 99/09/29 davidm Updated for SAL 2.6. - * 00/03/29 cfleck Updated SAL Error Logging info for processor (SAL 2.6) - * (plus examples of platform error info structures from smariset @ Intel) - */ - -#define IA64_SAL_PLATFORM_FEATURE_BUS_LOCK_BIT 0 -#define IA64_SAL_PLATFORM_FEATURE_IRQ_REDIR_HINT_BIT 1 -#define IA64_SAL_PLATFORM_FEATURE_IPI_REDIR_HINT_BIT 2 -#define IA64_SAL_PLATFORM_FEATURE_ITC_DRIFT_BIT 3 - -#define IA64_SAL_PLATFORM_FEATURE_BUS_LOCK (1< -#include -#include - -#include -#include - -extern unsigned long sal_systab_phys; -extern spinlock_t sal_lock; - -/* SAL spec _requires_ eight args for each call. */ -#define __IA64_FW_CALL(entry,result,a0,a1,a2,a3,a4,a5,a6,a7) \ - result = (*entry)(a0,a1,a2,a3,a4,a5,a6,a7) - -# define IA64_FW_CALL(entry,result,args...) do { \ - unsigned long __ia64_sc_flags; \ - struct ia64_fpreg __ia64_sc_fr[6]; \ - ia64_save_scratch_fpregs(__ia64_sc_fr); \ - spin_lock_irqsave(&sal_lock, __ia64_sc_flags); \ - __IA64_FW_CALL(entry, result, args); \ - spin_unlock_irqrestore(&sal_lock, __ia64_sc_flags); \ - ia64_load_scratch_fpregs(__ia64_sc_fr); \ -} while (0) - -# define SAL_CALL(result,args...) \ - IA64_FW_CALL(ia64_sal, result, args); - -# define SAL_CALL_NOLOCK(result,args...) do { \ - unsigned long __ia64_scn_flags; \ - struct ia64_fpreg __ia64_scn_fr[6]; \ - ia64_save_scratch_fpregs(__ia64_scn_fr); \ - local_irq_save(__ia64_scn_flags); \ - __IA64_FW_CALL(ia64_sal, result, args); \ - local_irq_restore(__ia64_scn_flags); \ - ia64_load_scratch_fpregs(__ia64_scn_fr); \ -} while (0) - -# define SAL_CALL_REENTRANT(result,args...) do { \ - struct ia64_fpreg __ia64_scs_fr[6]; \ - ia64_save_scratch_fpregs(__ia64_scs_fr); \ - preempt_disable(); \ - __IA64_FW_CALL(ia64_sal, result, args); \ - preempt_enable(); \ - ia64_load_scratch_fpregs(__ia64_scs_fr); \ -} while (0) - -#define SAL_SET_VECTORS 0x01000000 -#define SAL_GET_STATE_INFO 0x01000001 -#define SAL_GET_STATE_INFO_SIZE 0x01000002 -#define SAL_CLEAR_STATE_INFO 0x01000003 -#define SAL_MC_RENDEZ 0x01000004 -#define SAL_MC_SET_PARAMS 0x01000005 -#define SAL_REGISTER_PHYSICAL_ADDR 0x01000006 - -#define SAL_CACHE_FLUSH 0x01000008 -#define SAL_CACHE_INIT 0x01000009 -#define SAL_PCI_CONFIG_READ 0x01000010 -#define SAL_PCI_CONFIG_WRITE 0x01000011 -#define SAL_FREQ_BASE 0x01000012 -#define SAL_PHYSICAL_ID_INFO 0x01000013 - -#define SAL_UPDATE_PAL 0x01000020 - -struct ia64_sal_retval { - /* - * A zero status value indicates call completed without error. - * A negative status value indicates reason of call failure. - * A positive status value indicates success but an - * informational value should be printed (e.g., "reboot for - * change to take effect"). - */ - long status; - unsigned long v0; - unsigned long v1; - unsigned long v2; -}; - -typedef struct ia64_sal_retval (*ia64_sal_handler) (u64, ...); - -enum { - SAL_FREQ_BASE_PLATFORM = 0, - SAL_FREQ_BASE_INTERVAL_TIMER = 1, - SAL_FREQ_BASE_REALTIME_CLOCK = 2 -}; - -/* - * The SAL system table is followed by a variable number of variable - * length descriptors. The structure of these descriptors follows - * below. - * The defininition follows SAL specs from July 2000 - */ -struct ia64_sal_systab { - u8 signature[4]; /* should be "SST_" */ - u32 size; /* size of this table in bytes */ - u8 sal_rev_minor; - u8 sal_rev_major; - u16 entry_count; /* # of entries in variable portion */ - u8 checksum; - u8 reserved1[7]; - u8 sal_a_rev_minor; - u8 sal_a_rev_major; - u8 sal_b_rev_minor; - u8 sal_b_rev_major; - /* oem_id & product_id: terminating NUL is missing if string is exactly 32 bytes long. */ - u8 oem_id[32]; - u8 product_id[32]; /* ASCII product id */ - u8 reserved2[8]; -}; - -enum sal_systab_entry_type { - SAL_DESC_ENTRY_POINT = 0, - SAL_DESC_MEMORY = 1, - SAL_DESC_PLATFORM_FEATURE = 2, - SAL_DESC_TR = 3, - SAL_DESC_PTC = 4, - SAL_DESC_AP_WAKEUP = 5 -}; - -/* - * Entry type: Size: - * 0 48 - * 1 32 - * 2 16 - * 3 32 - * 4 16 - * 5 16 - */ -#define SAL_DESC_SIZE(type) "\060\040\020\040\020\020"[(unsigned) type] - -typedef struct ia64_sal_desc_entry_point { - u8 type; - u8 reserved1[7]; - u64 pal_proc; - u64 sal_proc; - u64 gp; - u8 reserved2[16]; -}ia64_sal_desc_entry_point_t; - -typedef struct ia64_sal_desc_memory { - u8 type; - u8 used_by_sal; /* needs to be mapped for SAL? */ - u8 mem_attr; /* current memory attribute setting */ - u8 access_rights; /* access rights set up by SAL */ - u8 mem_attr_mask; /* mask of supported memory attributes */ - u8 reserved1; - u8 mem_type; /* memory type */ - u8 mem_usage; /* memory usage */ - u64 addr; /* physical address of memory */ - u32 length; /* length (multiple of 4KB pages) */ - u32 reserved2; - u8 oem_reserved[8]; -} ia64_sal_desc_memory_t; - -typedef struct ia64_sal_desc_platform_feature { - u8 type; - u8 feature_mask; - u8 reserved1[14]; -} ia64_sal_desc_platform_feature_t; - -typedef struct ia64_sal_desc_tr { - u8 type; - u8 tr_type; /* 0 == instruction, 1 == data */ - u8 regnum; /* translation register number */ - u8 reserved1[5]; - u64 addr; /* virtual address of area covered */ - u64 page_size; /* encoded page size */ - u8 reserved2[8]; -} ia64_sal_desc_tr_t; - -typedef struct ia64_sal_desc_ptc { - u8 type; - u8 reserved1[3]; - u32 num_domains; /* # of coherence domains */ - u64 domain_info; /* physical address of domain info table */ -} ia64_sal_desc_ptc_t; - -typedef struct ia64_sal_ptc_domain_info { - u64 proc_count; /* number of processors in domain */ - u64 proc_list; /* physical address of LID array */ -} ia64_sal_ptc_domain_info_t; - -typedef struct ia64_sal_ptc_domain_proc_entry { - u64 id : 8; /* id of processor */ - u64 eid : 8; /* eid of processor */ -} ia64_sal_ptc_domain_proc_entry_t; - - -#define IA64_SAL_AP_EXTERNAL_INT 0 - -typedef struct ia64_sal_desc_ap_wakeup { - u8 type; - u8 mechanism; /* 0 == external interrupt */ - u8 reserved1[6]; - u64 vector; /* interrupt vector in range 0x10-0xff */ -} ia64_sal_desc_ap_wakeup_t ; - -extern ia64_sal_handler ia64_sal; -extern struct ia64_sal_desc_ptc *ia64_ptc_domain_info; - -extern unsigned short sal_revision; /* supported SAL spec revision */ -extern unsigned short sal_version; /* SAL version; OEM dependent */ -#define SAL_VERSION_CODE(major, minor) ((bin2bcd(major) << 8) | bin2bcd(minor)) - -extern const char *ia64_sal_strerror (long status); -extern void ia64_sal_init (struct ia64_sal_systab *sal_systab); - -/* SAL information type encodings */ -enum { - SAL_INFO_TYPE_MCA = 0, /* Machine check abort information */ - SAL_INFO_TYPE_INIT = 1, /* Init information */ - SAL_INFO_TYPE_CMC = 2, /* Corrected machine check information */ - SAL_INFO_TYPE_CPE = 3 /* Corrected platform error information */ -}; - -/* Encodings for machine check parameter types */ -enum { - SAL_MC_PARAM_RENDEZ_INT = 1, /* Rendezvous interrupt */ - SAL_MC_PARAM_RENDEZ_WAKEUP = 2, /* Wakeup */ - SAL_MC_PARAM_CPE_INT = 3 /* Corrected Platform Error Int */ -}; - -/* Encodings for rendezvous mechanisms */ -enum { - SAL_MC_PARAM_MECHANISM_INT = 1, /* Use interrupt */ - SAL_MC_PARAM_MECHANISM_MEM = 2 /* Use memory synchronization variable*/ -}; - -/* Encodings for vectors which can be registered by the OS with SAL */ -enum { - SAL_VECTOR_OS_MCA = 0, - SAL_VECTOR_OS_INIT = 1, - SAL_VECTOR_OS_BOOT_RENDEZ = 2 -}; - -/* Encodings for mca_opt parameter sent to SAL_MC_SET_PARAMS */ -#define SAL_MC_PARAM_RZ_ALWAYS 0x1 -#define SAL_MC_PARAM_BINIT_ESCALATE 0x10 - -/* - * Definition of the SAL Error Log from the SAL spec - */ - -/* SAL Error Record Section GUID Definitions */ -#define SAL_PROC_DEV_ERR_SECT_GUID \ - EFI_GUID(0xe429faf1, 0x3cb7, 0x11d4, 0xbc, 0xa7, 0x0, 0x80, 0xc7, 0x3c, 0x88, 0x81) -#define SAL_PLAT_MEM_DEV_ERR_SECT_GUID \ - EFI_GUID(0xe429faf2, 0x3cb7, 0x11d4, 0xbc, 0xa7, 0x0, 0x80, 0xc7, 0x3c, 0x88, 0x81) -#define SAL_PLAT_SEL_DEV_ERR_SECT_GUID \ - EFI_GUID(0xe429faf3, 0x3cb7, 0x11d4, 0xbc, 0xa7, 0x0, 0x80, 0xc7, 0x3c, 0x88, 0x81) -#define SAL_PLAT_PCI_BUS_ERR_SECT_GUID \ - EFI_GUID(0xe429faf4, 0x3cb7, 0x11d4, 0xbc, 0xa7, 0x0, 0x80, 0xc7, 0x3c, 0x88, 0x81) -#define SAL_PLAT_SMBIOS_DEV_ERR_SECT_GUID \ - EFI_GUID(0xe429faf5, 0x3cb7, 0x11d4, 0xbc, 0xa7, 0x0, 0x80, 0xc7, 0x3c, 0x88, 0x81) -#define SAL_PLAT_PCI_COMP_ERR_SECT_GUID \ - EFI_GUID(0xe429faf6, 0x3cb7, 0x11d4, 0xbc, 0xa7, 0x0, 0x80, 0xc7, 0x3c, 0x88, 0x81) -#define SAL_PLAT_SPECIFIC_ERR_SECT_GUID \ - EFI_GUID(0xe429faf7, 0x3cb7, 0x11d4, 0xbc, 0xa7, 0x0, 0x80, 0xc7, 0x3c, 0x88, 0x81) -#define SAL_PLAT_HOST_CTLR_ERR_SECT_GUID \ - EFI_GUID(0xe429faf8, 0x3cb7, 0x11d4, 0xbc, 0xa7, 0x0, 0x80, 0xc7, 0x3c, 0x88, 0x81) -#define SAL_PLAT_BUS_ERR_SECT_GUID \ - EFI_GUID(0xe429faf9, 0x3cb7, 0x11d4, 0xbc, 0xa7, 0x0, 0x80, 0xc7, 0x3c, 0x88, 0x81) -#define PROCESSOR_ABSTRACTION_LAYER_OVERWRITE_GUID \ - EFI_GUID(0x6cb0a200, 0x893a, 0x11da, 0x96, 0xd2, 0x0, 0x10, 0x83, 0xff, \ - 0xca, 0x4d) - -#define MAX_CACHE_ERRORS 6 -#define MAX_TLB_ERRORS 6 -#define MAX_BUS_ERRORS 1 - -/* Definition of version according to SAL spec for logging purposes */ -typedef struct sal_log_revision { - u8 minor; /* BCD (0..99) */ - u8 major; /* BCD (0..99) */ -} sal_log_revision_t; - -/* Definition of timestamp according to SAL spec for logging purposes */ -typedef struct sal_log_timestamp { - u8 slh_second; /* Second (0..59) */ - u8 slh_minute; /* Minute (0..59) */ - u8 slh_hour; /* Hour (0..23) */ - u8 slh_reserved; - u8 slh_day; /* Day (1..31) */ - u8 slh_month; /* Month (1..12) */ - u8 slh_year; /* Year (00..99) */ - u8 slh_century; /* Century (19, 20, 21, ...) */ -} sal_log_timestamp_t; - -/* Definition of log record header structures */ -typedef struct sal_log_record_header { - u64 id; /* Unique monotonically increasing ID */ - sal_log_revision_t revision; /* Major and Minor revision of header */ - u8 severity; /* Error Severity */ - u8 validation_bits; /* 0: platform_guid, 1: !timestamp */ - u32 len; /* Length of this error log in bytes */ - sal_log_timestamp_t timestamp; /* Timestamp */ - efi_guid_t platform_guid; /* Unique OEM Platform ID */ -} sal_log_record_header_t; - -#define sal_log_severity_recoverable 0 -#define sal_log_severity_fatal 1 -#define sal_log_severity_corrected 2 - -/* - * Error Recovery Info (ERI) bit decode. From SAL Spec section B.2.2 Table B-3 - * Error Section Error_Recovery_Info Field Definition. - */ -#define ERI_NOT_VALID 0x0 /* Error Recovery Field is not valid */ -#define ERI_NOT_ACCESSIBLE 0x30 /* Resource not accessible */ -#define ERI_CONTAINMENT_WARN 0x22 /* Corrupt data propagated */ -#define ERI_UNCORRECTED_ERROR 0x20 /* Uncorrected error */ -#define ERI_COMPONENT_RESET 0x24 /* Component must be reset */ -#define ERI_CORR_ERROR_LOG 0x21 /* Corrected error, needs logging */ -#define ERI_CORR_ERROR_THRESH 0x29 /* Corrected error threshold exceeded */ - -/* Definition of log section header structures */ -typedef struct sal_log_sec_header { - efi_guid_t guid; /* Unique Section ID */ - sal_log_revision_t revision; /* Major and Minor revision of Section */ - u8 error_recovery_info; /* Platform error recovery status */ - u8 reserved; - u32 len; /* Section length */ -} sal_log_section_hdr_t; - -typedef struct sal_log_mod_error_info { - struct { - u64 check_info : 1, - requestor_identifier : 1, - responder_identifier : 1, - target_identifier : 1, - precise_ip : 1, - reserved : 59; - } valid; - u64 check_info; - u64 requestor_identifier; - u64 responder_identifier; - u64 target_identifier; - u64 precise_ip; -} sal_log_mod_error_info_t; - -typedef struct sal_processor_static_info { - struct { - u64 minstate : 1, - br : 1, - cr : 1, - ar : 1, - rr : 1, - fr : 1, - reserved : 58; - } valid; - struct pal_min_state_area min_state_area; - u64 br[8]; - u64 cr[128]; - u64 ar[128]; - u64 rr[8]; - struct ia64_fpreg __attribute__ ((packed)) fr[128]; -} sal_processor_static_info_t; - -struct sal_cpuid_info { - u64 regs[5]; - u64 reserved; -}; - -typedef struct sal_log_processor_info { - sal_log_section_hdr_t header; - struct { - u64 proc_error_map : 1, - proc_state_param : 1, - proc_cr_lid : 1, - psi_static_struct : 1, - num_cache_check : 4, - num_tlb_check : 4, - num_bus_check : 4, - num_reg_file_check : 4, - num_ms_check : 4, - cpuid_info : 1, - reserved1 : 39; - } valid; - u64 proc_error_map; - u64 proc_state_parameter; - u64 proc_cr_lid; - /* - * The rest of this structure consists of variable-length arrays, which can't be - * expressed in C. - */ - sal_log_mod_error_info_t info[]; - /* - * This is what the rest looked like if C supported variable-length arrays: - * - * sal_log_mod_error_info_t cache_check_info[.valid.num_cache_check]; - * sal_log_mod_error_info_t tlb_check_info[.valid.num_tlb_check]; - * sal_log_mod_error_info_t bus_check_info[.valid.num_bus_check]; - * sal_log_mod_error_info_t reg_file_check_info[.valid.num_reg_file_check]; - * sal_log_mod_error_info_t ms_check_info[.valid.num_ms_check]; - * struct sal_cpuid_info cpuid_info; - * sal_processor_static_info_t processor_static_info; - */ -} sal_log_processor_info_t; - -/* Given a sal_log_processor_info_t pointer, return a pointer to the processor_static_info: */ -#define SAL_LPI_PSI_INFO(l) \ -({ sal_log_processor_info_t *_l = (l); \ - ((sal_processor_static_info_t *) \ - ((char *) _l->info + ((_l->valid.num_cache_check + _l->valid.num_tlb_check \ - + _l->valid.num_bus_check + _l->valid.num_reg_file_check \ - + _l->valid.num_ms_check) * sizeof(sal_log_mod_error_info_t) \ - + sizeof(struct sal_cpuid_info)))); \ -}) - -/* platform error log structures */ - -typedef struct sal_log_mem_dev_err_info { - sal_log_section_hdr_t header; - struct { - u64 error_status : 1, - physical_addr : 1, - addr_mask : 1, - node : 1, - card : 1, - module : 1, - bank : 1, - device : 1, - row : 1, - column : 1, - bit_position : 1, - requestor_id : 1, - responder_id : 1, - target_id : 1, - bus_spec_data : 1, - oem_id : 1, - oem_data : 1, - reserved : 47; - } valid; - u64 error_status; - u64 physical_addr; - u64 addr_mask; - u16 node; - u16 card; - u16 module; - u16 bank; - u16 device; - u16 row; - u16 column; - u16 bit_position; - u64 requestor_id; - u64 responder_id; - u64 target_id; - u64 bus_spec_data; - u8 oem_id[16]; - u8 oem_data[1]; /* Variable length data */ -} sal_log_mem_dev_err_info_t; - -typedef struct sal_log_sel_dev_err_info { - sal_log_section_hdr_t header; - struct { - u64 record_id : 1, - record_type : 1, - generator_id : 1, - evm_rev : 1, - sensor_type : 1, - sensor_num : 1, - event_dir : 1, - event_data1 : 1, - event_data2 : 1, - event_data3 : 1, - reserved : 54; - } valid; - u16 record_id; - u8 record_type; - u8 timestamp[4]; - u16 generator_id; - u8 evm_rev; - u8 sensor_type; - u8 sensor_num; - u8 event_dir; - u8 event_data1; - u8 event_data2; - u8 event_data3; -} sal_log_sel_dev_err_info_t; - -typedef struct sal_log_pci_bus_err_info { - sal_log_section_hdr_t header; - struct { - u64 err_status : 1, - err_type : 1, - bus_id : 1, - bus_address : 1, - bus_data : 1, - bus_cmd : 1, - requestor_id : 1, - responder_id : 1, - target_id : 1, - oem_data : 1, - reserved : 54; - } valid; - u64 err_status; - u16 err_type; - u16 bus_id; - u32 reserved; - u64 bus_address; - u64 bus_data; - u64 bus_cmd; - u64 requestor_id; - u64 responder_id; - u64 target_id; - u8 oem_data[1]; /* Variable length data */ -} sal_log_pci_bus_err_info_t; - -typedef struct sal_log_smbios_dev_err_info { - sal_log_section_hdr_t header; - struct { - u64 event_type : 1, - length : 1, - time_stamp : 1, - data : 1, - reserved1 : 60; - } valid; - u8 event_type; - u8 length; - u8 time_stamp[6]; - u8 data[1]; /* data of variable length, length == slsmb_length */ -} sal_log_smbios_dev_err_info_t; - -typedef struct sal_log_pci_comp_err_info { - sal_log_section_hdr_t header; - struct { - u64 err_status : 1, - comp_info : 1, - num_mem_regs : 1, - num_io_regs : 1, - reg_data_pairs : 1, - oem_data : 1, - reserved : 58; - } valid; - u64 err_status; - struct { - u16 vendor_id; - u16 device_id; - u8 class_code[3]; - u8 func_num; - u8 dev_num; - u8 bus_num; - u8 seg_num; - u8 reserved[5]; - } comp_info; - u32 num_mem_regs; - u32 num_io_regs; - u64 reg_data_pairs[1]; - /* - * array of address/data register pairs is num_mem_regs + num_io_regs elements - * long. Each array element consists of a u64 address followed by a u64 data - * value. The oem_data array immediately follows the reg_data_pairs array - */ - u8 oem_data[1]; /* Variable length data */ -} sal_log_pci_comp_err_info_t; - -typedef struct sal_log_plat_specific_err_info { - sal_log_section_hdr_t header; - struct { - u64 err_status : 1, - guid : 1, - oem_data : 1, - reserved : 61; - } valid; - u64 err_status; - efi_guid_t guid; - u8 oem_data[1]; /* platform specific variable length data */ -} sal_log_plat_specific_err_info_t; - -typedef struct sal_log_host_ctlr_err_info { - sal_log_section_hdr_t header; - struct { - u64 err_status : 1, - requestor_id : 1, - responder_id : 1, - target_id : 1, - bus_spec_data : 1, - oem_data : 1, - reserved : 58; - } valid; - u64 err_status; - u64 requestor_id; - u64 responder_id; - u64 target_id; - u64 bus_spec_data; - u8 oem_data[1]; /* Variable length OEM data */ -} sal_log_host_ctlr_err_info_t; - -typedef struct sal_log_plat_bus_err_info { - sal_log_section_hdr_t header; - struct { - u64 err_status : 1, - requestor_id : 1, - responder_id : 1, - target_id : 1, - bus_spec_data : 1, - oem_data : 1, - reserved : 58; - } valid; - u64 err_status; - u64 requestor_id; - u64 responder_id; - u64 target_id; - u64 bus_spec_data; - u8 oem_data[1]; /* Variable length OEM data */ -} sal_log_plat_bus_err_info_t; - -/* Overall platform error section structure */ -typedef union sal_log_platform_err_info { - sal_log_mem_dev_err_info_t mem_dev_err; - sal_log_sel_dev_err_info_t sel_dev_err; - sal_log_pci_bus_err_info_t pci_bus_err; - sal_log_smbios_dev_err_info_t smbios_dev_err; - sal_log_pci_comp_err_info_t pci_comp_err; - sal_log_plat_specific_err_info_t plat_specific_err; - sal_log_host_ctlr_err_info_t host_ctlr_err; - sal_log_plat_bus_err_info_t plat_bus_err; -} sal_log_platform_err_info_t; - -/* SAL log over-all, multi-section error record structure (processor+platform) */ -typedef struct err_rec { - sal_log_record_header_t sal_elog_header; - sal_log_processor_info_t proc_err; - sal_log_platform_err_info_t plat_err; - u8 oem_data_pad[1024]; -} ia64_err_rec_t; - -/* - * Now define a couple of inline functions for improved type checking - * and convenience. - */ - -extern s64 ia64_sal_cache_flush (u64 cache_type); -extern void __init check_sal_cache_flush (void); - -/* Initialize all the processor and platform level instruction and data caches */ -static inline s64 -ia64_sal_cache_init (void) -{ - struct ia64_sal_retval isrv; - SAL_CALL(isrv, SAL_CACHE_INIT, 0, 0, 0, 0, 0, 0, 0); - return isrv.status; -} - -/* - * Clear the processor and platform information logged by SAL with respect to the machine - * state at the time of MCA's, INITs, CMCs, or CPEs. - */ -static inline s64 -ia64_sal_clear_state_info (u64 sal_info_type) -{ - struct ia64_sal_retval isrv; - SAL_CALL_REENTRANT(isrv, SAL_CLEAR_STATE_INFO, sal_info_type, 0, - 0, 0, 0, 0, 0); - return isrv.status; -} - - -/* Get the processor and platform information logged by SAL with respect to the machine - * state at the time of the MCAs, INITs, CMCs, or CPEs. - */ -static inline u64 -ia64_sal_get_state_info (u64 sal_info_type, u64 *sal_info) -{ - struct ia64_sal_retval isrv; - SAL_CALL_REENTRANT(isrv, SAL_GET_STATE_INFO, sal_info_type, 0, - sal_info, 0, 0, 0, 0); - if (isrv.status) - return 0; - - return isrv.v0; -} - -/* - * Get the maximum size of the information logged by SAL with respect to the machine state - * at the time of MCAs, INITs, CMCs, or CPEs. - */ -static inline u64 -ia64_sal_get_state_info_size (u64 sal_info_type) -{ - struct ia64_sal_retval isrv; - SAL_CALL_REENTRANT(isrv, SAL_GET_STATE_INFO_SIZE, sal_info_type, 0, - 0, 0, 0, 0, 0); - if (isrv.status) - return 0; - return isrv.v0; -} - -/* - * Causes the processor to go into a spin loop within SAL where SAL awaits a wakeup from - * the monarch processor. Must not lock, because it will not return on any cpu until the - * monarch processor sends a wake up. - */ -static inline s64 -ia64_sal_mc_rendez (void) -{ - struct ia64_sal_retval isrv; - SAL_CALL_NOLOCK(isrv, SAL_MC_RENDEZ, 0, 0, 0, 0, 0, 0, 0); - return isrv.status; -} - -/* - * Allow the OS to specify the interrupt number to be used by SAL to interrupt OS during - * the machine check rendezvous sequence as well as the mechanism to wake up the - * non-monarch processor at the end of machine check processing. - * Returns the complete ia64_sal_retval because some calls return more than just a status - * value. - */ -static inline struct ia64_sal_retval -ia64_sal_mc_set_params (u64 param_type, u64 i_or_m, u64 i_or_m_val, u64 timeout, u64 rz_always) -{ - struct ia64_sal_retval isrv; - SAL_CALL(isrv, SAL_MC_SET_PARAMS, param_type, i_or_m, i_or_m_val, - timeout, rz_always, 0, 0); - return isrv; -} - -/* Read from PCI configuration space */ -static inline s64 -ia64_sal_pci_config_read (u64 pci_config_addr, int type, u64 size, u64 *value) -{ - struct ia64_sal_retval isrv; - SAL_CALL(isrv, SAL_PCI_CONFIG_READ, pci_config_addr, size, type, 0, 0, 0, 0); - if (value) - *value = isrv.v0; - return isrv.status; -} - -/* Write to PCI configuration space */ -static inline s64 -ia64_sal_pci_config_write (u64 pci_config_addr, int type, u64 size, u64 value) -{ - struct ia64_sal_retval isrv; - SAL_CALL(isrv, SAL_PCI_CONFIG_WRITE, pci_config_addr, size, value, - type, 0, 0, 0); - return isrv.status; -} - -/* - * Register physical addresses of locations needed by SAL when SAL procedures are invoked - * in virtual mode. - */ -static inline s64 -ia64_sal_register_physical_addr (u64 phys_entry, u64 phys_addr) -{ - struct ia64_sal_retval isrv; - SAL_CALL(isrv, SAL_REGISTER_PHYSICAL_ADDR, phys_entry, phys_addr, - 0, 0, 0, 0, 0); - return isrv.status; -} - -/* - * Register software dependent code locations within SAL. These locations are handlers or - * entry points where SAL will pass control for the specified event. These event handlers - * are for the bott rendezvous, MCAs and INIT scenarios. - */ -static inline s64 -ia64_sal_set_vectors (u64 vector_type, - u64 handler_addr1, u64 gp1, u64 handler_len1, - u64 handler_addr2, u64 gp2, u64 handler_len2) -{ - struct ia64_sal_retval isrv; - SAL_CALL(isrv, SAL_SET_VECTORS, vector_type, - handler_addr1, gp1, handler_len1, - handler_addr2, gp2, handler_len2); - - return isrv.status; -} - -/* Update the contents of PAL block in the non-volatile storage device */ -static inline s64 -ia64_sal_update_pal (u64 param_buf, u64 scratch_buf, u64 scratch_buf_size, - u64 *error_code, u64 *scratch_buf_size_needed) -{ - struct ia64_sal_retval isrv; - SAL_CALL(isrv, SAL_UPDATE_PAL, param_buf, scratch_buf, scratch_buf_size, - 0, 0, 0, 0); - if (error_code) - *error_code = isrv.v0; - if (scratch_buf_size_needed) - *scratch_buf_size_needed = isrv.v1; - return isrv.status; -} - -/* Get physical processor die mapping in the platform. */ -static inline s64 -ia64_sal_physical_id_info(u16 *splid) -{ - struct ia64_sal_retval isrv; - - if (sal_revision < SAL_VERSION_CODE(3,2)) - return -1; - - SAL_CALL(isrv, SAL_PHYSICAL_ID_INFO, 0, 0, 0, 0, 0, 0, 0); - if (splid) - *splid = isrv.v0; - return isrv.status; -} - -extern unsigned long sal_platform_features; - -extern int (*salinfo_platform_oemdata)(const u8 *, u8 **, u64 *); - -struct sal_ret_values { - long r8; long r9; long r10; long r11; -}; - -#define IA64_SAL_OEMFUNC_MIN 0x02000000 -#define IA64_SAL_OEMFUNC_MAX 0x03ffffff - -extern int ia64_sal_oemcall(struct ia64_sal_retval *, u64, u64, u64, u64, u64, - u64, u64, u64); -extern int ia64_sal_oemcall_nolock(struct ia64_sal_retval *, u64, u64, u64, - u64, u64, u64, u64, u64); -extern int ia64_sal_oemcall_reentrant(struct ia64_sal_retval *, u64, u64, u64, - u64, u64, u64, u64, u64); -extern long -ia64_sal_freq_base (unsigned long which, unsigned long *ticks_per_second, - unsigned long *drift_info); -#ifdef CONFIG_HOTPLUG_CPU -/* - * System Abstraction Layer Specification - * Section 3.2.5.1: OS_BOOT_RENDEZ to SAL return State. - * Note: region regs are stored first in head.S _start. Hence they must - * stay up front. - */ -struct sal_to_os_boot { - u64 rr[8]; /* Region Registers */ - u64 br[6]; /* br0: - * return addr into SAL boot rendez routine */ - u64 gr1; /* SAL:GP */ - u64 gr12; /* SAL:SP */ - u64 gr13; /* SAL: Task Pointer */ - u64 fpsr; - u64 pfs; - u64 rnat; - u64 unat; - u64 bspstore; - u64 dcr; /* Default Control Register */ - u64 iva; - u64 pta; - u64 itv; - u64 pmv; - u64 cmcv; - u64 lrr[2]; - u64 gr[4]; - u64 pr; /* Predicate registers */ - u64 lc; /* Loop Count */ - struct ia64_fpreg fp[20]; -}; - -/* - * Global array allocated for NR_CPUS at boot time - */ -extern struct sal_to_os_boot sal_boot_rendez_state[NR_CPUS]; - -extern void ia64_jump_to_sal(struct sal_to_os_boot *); -#endif - -extern void ia64_sal_handler_init(void *entry_point, void *gpval); - -#define PALO_MAX_TLB_PURGES 0xFFFF -#define PALO_SIG "PALO" - -struct palo_table { - u8 signature[4]; /* Should be "PALO" */ - u32 length; - u8 minor_revision; - u8 major_revision; - u8 checksum; - u8 reserved1[5]; - u16 max_tlb_purges; - u8 reserved2[6]; -}; - -#define NPTCG_FROM_PAL 0 -#define NPTCG_FROM_PALO 1 -#define NPTCG_FROM_KERNEL_PARAMETER 2 - -#endif /* __ASSEMBLY__ */ - -#endif /* _ASM_IA64_SAL_H */ diff --git a/arch/ia64/include/asm/sections.h b/arch/ia64/include/asm/sections.h deleted file mode 100644 index 8e0875cf6071..000000000000 --- a/arch/ia64/include/asm/sections.h +++ /dev/null @@ -1,33 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_IA64_SECTIONS_H -#define _ASM_IA64_SECTIONS_H - -/* - * Copyright (C) 1998-2003 Hewlett-Packard Co - * David Mosberger-Tang - */ - -#include -#include - -typedef struct fdesc func_desc_t; - -#include - -extern char __phys_per_cpu_start[]; -#ifdef CONFIG_SMP -extern char __cpu0_per_cpu[]; -#endif -extern char __start___vtop_patchlist[], __end___vtop_patchlist[]; -extern char __start___rse_patchlist[], __end___rse_patchlist[]; -extern char __start___mckinley_e9_bundles[], __end___mckinley_e9_bundles[]; -extern char __start___phys_stack_reg_patchlist[], __end___phys_stack_reg_patchlist[]; -extern char __start_gate_section[]; -extern char __start_gate_mckinley_e9_patchlist[], __end_gate_mckinley_e9_patchlist[]; -extern char __start_gate_vtop_patchlist[], __end_gate_vtop_patchlist[]; -extern char __start_gate_fsyscall_patchlist[], __end_gate_fsyscall_patchlist[]; -extern char __start_gate_brl_fsys_bubble_down_patchlist[], __end_gate_brl_fsys_bubble_down_patchlist[]; -extern char __start_unwind[], __end_unwind[]; -extern char __start_ivt_text[], __end_ivt_text[]; - -#endif /* _ASM_IA64_SECTIONS_H */ diff --git a/arch/ia64/include/asm/serial.h b/arch/ia64/include/asm/serial.h deleted file mode 100644 index 068be11583df..000000000000 --- a/arch/ia64/include/asm/serial.h +++ /dev/null @@ -1,17 +0,0 @@ -/* - * Derived from the i386 version. - */ - -/* - * This assumes you have a 1.8432 MHz clock for your UART. - * - * It'd be nice if someone built a serial card with a 24.576 MHz - * clock, since the 16550A is capable of handling a top speed of 1.5 - * megabits/second; but this requires the faster clock. - */ -#define BASE_BAUD ( 1843200 / 16 ) - -/* - * All legacy serial ports should be enumerated via ACPI namespace, so - * we need not list them here. - */ diff --git a/arch/ia64/include/asm/shmparam.h b/arch/ia64/include/asm/shmparam.h deleted file mode 100644 index 43bd8324ab71..000000000000 --- a/arch/ia64/include/asm/shmparam.h +++ /dev/null @@ -1,13 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_IA64_SHMPARAM_H -#define _ASM_IA64_SHMPARAM_H - -/* - * SHMLBA controls minimum alignment at which shared memory segments - * get attached. The IA-64 architecture says that there may be a - * performance degradation when there are virtual aliases within 1MB. - * To reduce the chance of this, we set SHMLBA to 1MB. --davidm 00/12/20 - */ -#define SHMLBA (1024*1024) - -#endif /* _ASM_IA64_SHMPARAM_H */ diff --git a/arch/ia64/include/asm/signal.h b/arch/ia64/include/asm/signal.h deleted file mode 100644 index 80f067f9b3ce..000000000000 --- a/arch/ia64/include/asm/signal.h +++ /dev/null @@ -1,33 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Modified 1998-2001, 2003 - * David Mosberger-Tang , Hewlett-Packard Co - * - * Unfortunately, this file is being included by bits/signal.h in - * glibc-2.x. Hence the #ifdef __KERNEL__ ugliness. - */ -#ifndef _ASM_IA64_SIGNAL_H -#define _ASM_IA64_SIGNAL_H - -#include - - -#define _NSIG 64 -#define _NSIG_BPW 64 -#define _NSIG_WORDS (_NSIG / _NSIG_BPW) - -# ifndef __ASSEMBLY__ - -/* Most things should be clean enough to redefine this at will, if care - is taken to make libc match. */ - -typedef unsigned long old_sigset_t; - -typedef struct { - unsigned long sig[_NSIG_WORDS]; -} sigset_t; - -# include - -# endif /* !__ASSEMBLY__ */ -#endif /* _ASM_IA64_SIGNAL_H */ diff --git a/arch/ia64/include/asm/smp.h b/arch/ia64/include/asm/smp.h deleted file mode 100644 index aa92234c0142..000000000000 --- a/arch/ia64/include/asm/smp.h +++ /dev/null @@ -1,103 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * SMP Support - * - * Copyright (C) 1999 VA Linux Systems - * Copyright (C) 1999 Walt Drummond - * (c) Copyright 2001-2003, 2005 Hewlett-Packard Development Company, L.P. - * David Mosberger-Tang - * Bjorn Helgaas - */ -#ifndef _ASM_IA64_SMP_H -#define _ASM_IA64_SMP_H - -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -static inline unsigned int -ia64_get_lid (void) -{ - union { - struct { - unsigned long reserved : 16; - unsigned long eid : 8; - unsigned long id : 8; - unsigned long ignored : 32; - } f; - unsigned long bits; - } lid; - - lid.bits = ia64_getreg(_IA64_REG_CR_LID); - return lid.f.id << 8 | lid.f.eid; -} - -#define hard_smp_processor_id() ia64_get_lid() - -#ifdef CONFIG_SMP - -#define raw_smp_processor_id() (current_thread_info()->cpu) - -extern struct smp_boot_data { - int cpu_count; - int cpu_phys_id[NR_CPUS]; -} smp_boot_data __initdata; - -extern char no_int_routing; - -extern cpumask_t cpu_core_map[NR_CPUS]; -DECLARE_PER_CPU_SHARED_ALIGNED(cpumask_t, cpu_sibling_map); -extern int smp_num_siblings; -extern void __iomem *ipi_base_addr; - -extern volatile int ia64_cpu_to_sapicid[]; -#define cpu_physical_id(i) ia64_cpu_to_sapicid[i] - -extern unsigned long ap_wakeup_vector; - -/* - * Function to map hard smp processor id to logical id. Slow, so don't use this in - * performance-critical code. - */ -static inline int -cpu_logical_id (int cpuid) -{ - int i; - - for (i = 0; i < NR_CPUS; ++i) - if (cpu_physical_id(i) == cpuid) - break; - return i; -} - -/* Upping and downing of CPUs */ -extern int __cpu_disable (void); -extern void __cpu_die (unsigned int cpu); -extern void cpu_die (void) __attribute__ ((noreturn)); -extern void __init smp_build_cpu_map(void); - -extern void __init init_smp_config (void); -extern void smp_do_timer (struct pt_regs *regs); - -extern irqreturn_t handle_IPI(int irq, void *dev_id); -extern void smp_send_reschedule (int cpu); -extern void identify_siblings (struct cpuinfo_ia64 *); -extern int is_multithreading_enabled(void); - -extern void arch_send_call_function_single_ipi(int cpu); -extern void arch_send_call_function_ipi_mask(const struct cpumask *mask); - -#else /* CONFIG_SMP */ - -#define cpu_logical_id(i) 0 -#define cpu_physical_id(i) ia64_get_lid() - -#endif /* CONFIG_SMP */ -#endif /* _ASM_IA64_SMP_H */ diff --git a/arch/ia64/include/asm/sn/intr.h b/arch/ia64/include/asm/sn/intr.h deleted file mode 100644 index 3885a77b21df..000000000000 --- a/arch/ia64/include/asm/sn/intr.h +++ /dev/null @@ -1,15 +0,0 @@ -/* - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - * - * Copyright (C) 1992 - 1997, 2000-2006 Silicon Graphics, Inc. All rights reserved. - */ - -#ifndef _ASM_IA64_SN_INTR_H -#define _ASM_IA64_SN_INTR_H - -#define SGI_XPC_ACTIVATE 0x30 -#define SGI_XPC_NOTIFY 0xe7 - -#endif /* _ASM_IA64_SN_INTR_H */ diff --git a/arch/ia64/include/asm/sn/sn_sal.h b/arch/ia64/include/asm/sn/sn_sal.h deleted file mode 100644 index d437aa43343b..000000000000 --- a/arch/ia64/include/asm/sn/sn_sal.h +++ /dev/null @@ -1,124 +0,0 @@ -#ifndef _ASM_IA64_SN_SN_SAL_H -#define _ASM_IA64_SN_SN_SAL_H - -/* - * System Abstraction Layer definitions for IA64 - * - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - * - * Copyright (c) 2000-2006 Silicon Graphics, Inc. All rights reserved. - */ - -#include -#include - -// SGI Specific Calls -#define SN_SAL_GET_PARTITION_ADDR 0x02000009 -#define SN_SAL_MEMPROTECT 0x0200003e - -#define SN_SAL_WATCHLIST_ALLOC 0x02000070 -#define SN_SAL_WATCHLIST_FREE 0x02000071 - -/* - * SAL Error Codes - */ -#define SALRET_MORE_PASSES 1 -#define SALRET_OK 0 -#define SALRET_NOT_IMPLEMENTED (-1) -#define SALRET_INVALID_ARG (-2) -#define SALRET_ERROR (-3) - -/* - * Returns the physical address of the partition's reserved page through - * an iterative number of calls. - * - * On first call, 'cookie' and 'len' should be set to 0, and 'addr' - * set to the nasid of the partition whose reserved page's address is - * being sought. - * On subsequent calls, pass the values, that were passed back on the - * previous call. - * - * While the return status equals SALRET_MORE_PASSES, keep calling - * this function after first copying 'len' bytes starting at 'addr' - * into 'buf'. Once the return status equals SALRET_OK, 'addr' will - * be the physical address of the partition's reserved page. If the - * return status equals neither of these, an error as occurred. - */ -static inline s64 -sn_partition_reserved_page_pa(u64 buf, u64 *cookie, u64 *addr, u64 *len) -{ - struct ia64_sal_retval rv; - ia64_sal_oemcall_reentrant(&rv, SN_SAL_GET_PARTITION_ADDR, *cookie, - *addr, buf, *len, 0, 0, 0); - *cookie = rv.v0; - *addr = rv.v1; - *len = rv.v2; - return rv.status; -} - -/* - * Change memory access protections for a physical address range. - * nasid_array is not used on Altix, but may be in future architectures. - * Available memory protection access classes are defined after the function. - */ -static inline int -sn_change_memprotect(u64 paddr, u64 len, u64 perms, u64 *nasid_array) -{ - struct ia64_sal_retval ret_stuff; - - ia64_sal_oemcall_nolock(&ret_stuff, SN_SAL_MEMPROTECT, paddr, len, - (u64)nasid_array, perms, 0, 0, 0); - return ret_stuff.status; -} -#define SN_MEMPROT_ACCESS_CLASS_0 0x14a080 -#define SN_MEMPROT_ACCESS_CLASS_1 0x2520c2 -#define SN_MEMPROT_ACCESS_CLASS_2 0x14a1ca -#define SN_MEMPROT_ACCESS_CLASS_3 0x14a290 -#define SN_MEMPROT_ACCESS_CLASS_6 0x084080 -#define SN_MEMPROT_ACCESS_CLASS_7 0x021080 - -union sn_watchlist_u { - u64 val; - struct { - u64 blade : 16, - size : 32, - filler : 16; - }; -}; - -static inline int -sn_mq_watchlist_alloc(int blade, void *mq, unsigned int mq_size, - unsigned long *intr_mmr_offset) -{ - struct ia64_sal_retval rv; - unsigned long addr; - union sn_watchlist_u size_blade; - int watchlist; - - addr = (unsigned long)mq; - size_blade.size = mq_size; - size_blade.blade = blade; - - /* - * bios returns watchlist number or negative error number. - */ - ia64_sal_oemcall_nolock(&rv, SN_SAL_WATCHLIST_ALLOC, addr, - size_blade.val, (u64)intr_mmr_offset, - (u64)&watchlist, 0, 0, 0); - if (rv.status < 0) - return rv.status; - - return watchlist; -} - -static inline int -sn_mq_watchlist_free(int blade, int watchlist_num) -{ - struct ia64_sal_retval rv; - ia64_sal_oemcall_nolock(&rv, SN_SAL_WATCHLIST_FREE, blade, - watchlist_num, 0, 0, 0, 0, 0); - return rv.status; -} -#endif /* _ASM_IA64_SN_SN_SAL_H */ diff --git a/arch/ia64/include/asm/sparsemem.h b/arch/ia64/include/asm/sparsemem.h deleted file mode 100644 index a58f8b466d96..000000000000 --- a/arch/ia64/include/asm/sparsemem.h +++ /dev/null @@ -1,28 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_IA64_SPARSEMEM_H -#define _ASM_IA64_SPARSEMEM_H - -#ifdef CONFIG_SPARSEMEM -#include -/* - * SECTION_SIZE_BITS 2^N: how big each section will be - * MAX_PHYSMEM_BITS 2^N: how much memory we can have in that space - */ - -#define SECTION_SIZE_BITS (30) -#define MAX_PHYSMEM_BITS (50) -#ifdef CONFIG_ARCH_FORCE_MAX_ORDER -#if (CONFIG_ARCH_FORCE_MAX_ORDER + PAGE_SHIFT > SECTION_SIZE_BITS) -#undef SECTION_SIZE_BITS -#define SECTION_SIZE_BITS (CONFIG_ARCH_FORCE_MAX_ORDER + PAGE_SHIFT) -#endif -#endif - -#endif /* CONFIG_SPARSEMEM */ - -#ifdef CONFIG_MEMORY_HOTPLUG -int memory_add_physaddr_to_nid(u64 addr); -#define memory_add_physaddr_to_nid memory_add_physaddr_to_nid -#endif - -#endif /* _ASM_IA64_SPARSEMEM_H */ diff --git a/arch/ia64/include/asm/spinlock.h b/arch/ia64/include/asm/spinlock.h deleted file mode 100644 index 0e5c1ad3239c..000000000000 --- a/arch/ia64/include/asm/spinlock.h +++ /dev/null @@ -1,265 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_IA64_SPINLOCK_H -#define _ASM_IA64_SPINLOCK_H - -/* - * Copyright (C) 1998-2003 Hewlett-Packard Co - * David Mosberger-Tang - * Copyright (C) 1999 Walt Drummond - * - * This file is used for SMP configurations only. - */ - -#include -#include -#include - -#include -#include -#include -#include - -#define arch_spin_lock_init(x) ((x)->lock = 0) - -/* - * Ticket locks are conceptually two parts, one indicating the current head of - * the queue, and the other indicating the current tail. The lock is acquired - * by atomically noting the tail and incrementing it by one (thus adding - * ourself to the queue and noting our position), then waiting until the head - * becomes equal to the initial value of the tail. - * The pad bits in the middle are used to prevent the next_ticket number - * overflowing into the now_serving number. - * - * 31 17 16 15 14 0 - * +----------------------------------------------------+ - * | now_serving | padding | next_ticket | - * +----------------------------------------------------+ - */ - -#define TICKET_SHIFT 17 -#define TICKET_BITS 15 -#define TICKET_MASK ((1 << TICKET_BITS) - 1) - -static __always_inline void __ticket_spin_lock(arch_spinlock_t *lock) -{ - int *p = (int *)&lock->lock, ticket, serve; - - ticket = ia64_fetchadd(1, p, acq); - - if (!(((ticket >> TICKET_SHIFT) ^ ticket) & TICKET_MASK)) - return; - - ia64_invala(); - - for (;;) { - asm volatile ("ld4.c.nc %0=[%1]" : "=r"(serve) : "r"(p) : "memory"); - - if (!(((serve >> TICKET_SHIFT) ^ ticket) & TICKET_MASK)) - return; - cpu_relax(); - } -} - -static __always_inline int __ticket_spin_trylock(arch_spinlock_t *lock) -{ - int tmp = READ_ONCE(lock->lock); - - if (!(((tmp >> TICKET_SHIFT) ^ tmp) & TICKET_MASK)) - return ia64_cmpxchg(acq, &lock->lock, tmp, tmp + 1, sizeof (tmp)) == tmp; - return 0; -} - -static __always_inline void __ticket_spin_unlock(arch_spinlock_t *lock) -{ - unsigned short *p = (unsigned short *)&lock->lock + 1, tmp; - - /* This could be optimised with ARCH_HAS_MMIOWB */ - mmiowb(); - asm volatile ("ld2.bias %0=[%1]" : "=r"(tmp) : "r"(p)); - WRITE_ONCE(*p, (tmp + 2) & ~1); -} - -static inline int __ticket_spin_is_locked(arch_spinlock_t *lock) -{ - long tmp = READ_ONCE(lock->lock); - - return !!(((tmp >> TICKET_SHIFT) ^ tmp) & TICKET_MASK); -} - -static inline int __ticket_spin_is_contended(arch_spinlock_t *lock) -{ - long tmp = READ_ONCE(lock->lock); - - return ((tmp - (tmp >> TICKET_SHIFT)) & TICKET_MASK) > 1; -} - -static __always_inline int arch_spin_value_unlocked(arch_spinlock_t lock) -{ - return !(((lock.lock >> TICKET_SHIFT) ^ lock.lock) & TICKET_MASK); -} - -static inline int arch_spin_is_locked(arch_spinlock_t *lock) -{ - return __ticket_spin_is_locked(lock); -} - -static inline int arch_spin_is_contended(arch_spinlock_t *lock) -{ - return __ticket_spin_is_contended(lock); -} -#define arch_spin_is_contended arch_spin_is_contended - -static __always_inline void arch_spin_lock(arch_spinlock_t *lock) -{ - __ticket_spin_lock(lock); -} - -static __always_inline int arch_spin_trylock(arch_spinlock_t *lock) -{ - return __ticket_spin_trylock(lock); -} - -static __always_inline void arch_spin_unlock(arch_spinlock_t *lock) -{ - __ticket_spin_unlock(lock); -} - -#ifdef ASM_SUPPORTED - -static __always_inline void -arch_read_lock(arch_rwlock_t *lock) -{ - unsigned long flags = 0; - - __asm__ __volatile__ ( - "tbit.nz p6, p0 = %1,%2\n" - "br.few 3f\n" - "1:\n" - "fetchadd4.rel r2 = [%0], -1;;\n" - "(p6) ssm psr.i\n" - "2:\n" - "hint @pause\n" - "ld4 r2 = [%0];;\n" - "cmp4.lt p7,p0 = r2, r0\n" - "(p7) br.cond.spnt.few 2b\n" - "(p6) rsm psr.i\n" - ";;\n" - "3:\n" - "fetchadd4.acq r2 = [%0], 1;;\n" - "cmp4.lt p7,p0 = r2, r0\n" - "(p7) br.cond.spnt.few 1b\n" - : : "r"(lock), "r"(flags), "i"(IA64_PSR_I_BIT) - : "p6", "p7", "r2", "memory"); -} - -#else /* !ASM_SUPPORTED */ - -#define arch_read_lock(rw) \ -do { \ - arch_rwlock_t *__read_lock_ptr = (rw); \ - \ - while (unlikely(ia64_fetchadd(1, (int *) __read_lock_ptr, acq) < 0)) { \ - ia64_fetchadd(-1, (int *) __read_lock_ptr, rel); \ - while (*(volatile int *)__read_lock_ptr < 0) \ - cpu_relax(); \ - } \ -} while (0) - -#endif /* !ASM_SUPPORTED */ - -#define arch_read_unlock(rw) \ -do { \ - arch_rwlock_t *__read_lock_ptr = (rw); \ - ia64_fetchadd(-1, (int *) __read_lock_ptr, rel); \ -} while (0) - -#ifdef ASM_SUPPORTED - -static __always_inline void -arch_write_lock(arch_rwlock_t *lock) -{ - unsigned long flags = 0; - - __asm__ __volatile__ ( - "tbit.nz p6, p0 = %1, %2\n" - "mov ar.ccv = r0\n" - "dep r29 = -1, r0, 31, 1\n" - "br.few 3f;;\n" - "1:\n" - "(p6) ssm psr.i\n" - "2:\n" - "hint @pause\n" - "ld4 r2 = [%0];;\n" - "cmp4.eq p0,p7 = r0, r2\n" - "(p7) br.cond.spnt.few 2b\n" - "(p6) rsm psr.i\n" - ";;\n" - "3:\n" - "cmpxchg4.acq r2 = [%0], r29, ar.ccv;;\n" - "cmp4.eq p0,p7 = r0, r2\n" - "(p7) br.cond.spnt.few 1b;;\n" - : : "r"(lock), "r"(flags), "i"(IA64_PSR_I_BIT) - : "ar.ccv", "p6", "p7", "r2", "r29", "memory"); -} - -#define arch_write_trylock(rw) \ -({ \ - register long result; \ - \ - __asm__ __volatile__ ( \ - "mov ar.ccv = r0\n" \ - "dep r29 = -1, r0, 31, 1;;\n" \ - "cmpxchg4.acq %0 = [%1], r29, ar.ccv\n" \ - : "=r"(result) : "r"(rw) : "ar.ccv", "r29", "memory"); \ - (result == 0); \ -}) - -static inline void arch_write_unlock(arch_rwlock_t *x) -{ - u8 *y = (u8 *)x; - barrier(); - asm volatile ("st1.rel.nta [%0] = r0\n\t" :: "r"(y+3) : "memory" ); -} - -#else /* !ASM_SUPPORTED */ - -#define arch_write_lock(l) \ -({ \ - __u64 ia64_val, ia64_set_val = ia64_dep_mi(-1, 0, 31, 1); \ - __u32 *ia64_write_lock_ptr = (__u32 *) (l); \ - do { \ - while (*ia64_write_lock_ptr) \ - ia64_barrier(); \ - ia64_val = ia64_cmpxchg4_acq(ia64_write_lock_ptr, ia64_set_val, 0); \ - } while (ia64_val); \ -}) - -#define arch_write_trylock(rw) \ -({ \ - __u64 ia64_val; \ - __u64 ia64_set_val = ia64_dep_mi(-1, 0, 31,1); \ - ia64_val = ia64_cmpxchg4_acq((__u32 *)(rw), ia64_set_val, 0); \ - (ia64_val == 0); \ -}) - -static inline void arch_write_unlock(arch_rwlock_t *x) -{ - barrier(); - x->write_lock = 0; -} - -#endif /* !ASM_SUPPORTED */ - -static inline int arch_read_trylock(arch_rwlock_t *x) -{ - union { - arch_rwlock_t lock; - __u32 word; - } old, new; - old.lock = new.lock = *x; - old.lock.write_lock = new.lock.write_lock = 0; - ++new.lock.read_counter; - return (u32)ia64_cmpxchg4_acq((__u32 *)(x), new.word, old.word) == old.word; -} - -#endif /* _ASM_IA64_SPINLOCK_H */ diff --git a/arch/ia64/include/asm/spinlock_types.h b/arch/ia64/include/asm/spinlock_types.h deleted file mode 100644 index 14b8a161c165..000000000000 --- a/arch/ia64/include/asm/spinlock_types.h +++ /dev/null @@ -1,22 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_IA64_SPINLOCK_TYPES_H -#define _ASM_IA64_SPINLOCK_TYPES_H - -#ifndef __LINUX_SPINLOCK_TYPES_RAW_H -# error "please don't include this file directly" -#endif - -typedef struct { - volatile unsigned int lock; -} arch_spinlock_t; - -#define __ARCH_SPIN_LOCK_UNLOCKED { 0 } - -typedef struct { - volatile unsigned int read_counter : 31; - volatile unsigned int write_lock : 1; -} arch_rwlock_t; - -#define __ARCH_RW_LOCK_UNLOCKED { 0, 0 } - -#endif diff --git a/arch/ia64/include/asm/string.h b/arch/ia64/include/asm/string.h deleted file mode 100644 index 8b84df0dbfad..000000000000 --- a/arch/ia64/include/asm/string.h +++ /dev/null @@ -1,22 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_IA64_STRING_H -#define _ASM_IA64_STRING_H - -/* - * Here is where we want to put optimized versions of the string - * routines. - * - * Copyright (C) 1998-2000, 2002 Hewlett-Packard Co - * David Mosberger-Tang - */ - - -#define __HAVE_ARCH_STRLEN 1 /* see arch/ia64/lib/strlen.S */ -#define __HAVE_ARCH_MEMSET 1 /* see arch/ia64/lib/memset.S */ -#define __HAVE_ARCH_MEMCPY 1 /* see arch/ia64/lib/memcpy.S */ - -extern __kernel_size_t strlen (const char *); -extern void *memcpy (void *, const void *, __kernel_size_t); -extern void *memset (void *, int, __kernel_size_t); - -#endif /* _ASM_IA64_STRING_H */ diff --git a/arch/ia64/include/asm/switch_to.h b/arch/ia64/include/asm/switch_to.h deleted file mode 100644 index a5a4e09468fa..000000000000 --- a/arch/ia64/include/asm/switch_to.h +++ /dev/null @@ -1,71 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Low-level task switching. This is based on information published in - * the Processor Abstraction Layer and the System Abstraction Layer - * manual. - * - * Copyright (C) 1998-2003 Hewlett-Packard Co - * David Mosberger-Tang - * Copyright (C) 1999 Asit Mallick - * Copyright (C) 1999 Don Dugger - */ -#ifndef _ASM_IA64_SWITCH_TO_H -#define _ASM_IA64_SWITCH_TO_H - -#include - -struct task_struct; - -/* - * Context switch from one thread to another. If the two threads have - * different address spaces, schedule() has already taken care of - * switching to the new address space by calling switch_mm(). - * - * Disabling access to the fph partition and the debug-register - * context switch MUST be done before calling ia64_switch_to() since a - * newly created thread returns directly to - * ia64_ret_from_syscall_clear_r8. - */ -extern struct task_struct *ia64_switch_to (void *next_task); - -extern void ia64_save_extra (struct task_struct *task); -extern void ia64_load_extra (struct task_struct *task); - -#define IA64_HAS_EXTRA_STATE(t) \ - ((t)->thread.flags & (IA64_THREAD_DBG_VALID|IA64_THREAD_PM_VALID)) - -#define __switch_to(prev,next,last) do { \ - if (IA64_HAS_EXTRA_STATE(prev)) \ - ia64_save_extra(prev); \ - if (IA64_HAS_EXTRA_STATE(next)) \ - ia64_load_extra(next); \ - ia64_psr(task_pt_regs(next))->dfh = !ia64_is_local_fpu_owner(next); \ - (last) = ia64_switch_to((next)); \ -} while (0) - -#ifdef CONFIG_SMP -/* - * In the SMP case, we save the fph state when context-switching away from a thread that - * modified fph. This way, when the thread gets scheduled on another CPU, the CPU can - * pick up the state from task->thread.fph, avoiding the complication of having to fetch - * the latest fph state from another CPU. In other words: eager save, lazy restore. - */ -# define switch_to(prev,next,last) do { \ - if (ia64_psr(task_pt_regs(prev))->mfh && ia64_is_local_fpu_owner(prev)) { \ - ia64_psr(task_pt_regs(prev))->mfh = 0; \ - (prev)->thread.flags |= IA64_THREAD_FPH_VALID; \ - __ia64_save_fpu((prev)->thread.fph); \ - } \ - __switch_to(prev, next, last); \ - /* "next" in old context is "current" in new context */ \ - if (unlikely((current->thread.flags & IA64_THREAD_MIGRATION) && \ - (task_cpu(current) != \ - task_thread_info(current)->last_cpu))) { \ - task_thread_info(current)->last_cpu = task_cpu(current); \ - } \ -} while (0) -#else -# define switch_to(prev,next,last) __switch_to(prev, next, last) -#endif - -#endif /* _ASM_IA64_SWITCH_TO_H */ diff --git a/arch/ia64/include/asm/syscall.h b/arch/ia64/include/asm/syscall.h deleted file mode 100644 index 2b02a3fb862a..000000000000 --- a/arch/ia64/include/asm/syscall.h +++ /dev/null @@ -1,65 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Access to user system call parameters and results - * - * Copyright (C) 2008 Intel Corp. Shaohua Li - * - * See asm-generic/syscall.h for descriptions of what we must do here. - */ - -#ifndef _ASM_SYSCALL_H -#define _ASM_SYSCALL_H 1 - -#include -#include -#include - -static inline long syscall_get_nr(struct task_struct *task, - struct pt_regs *regs) -{ - if ((long)regs->cr_ifs < 0) /* Not a syscall */ - return -1; - - return regs->r15; -} - -static inline void syscall_rollback(struct task_struct *task, - struct pt_regs *regs) -{ - /* do nothing */ -} - -static inline long syscall_get_error(struct task_struct *task, - struct pt_regs *regs) -{ - return regs->r10 == -1 ? -regs->r8:0; -} - -static inline long syscall_get_return_value(struct task_struct *task, - struct pt_regs *regs) -{ - return regs->r8; -} - -static inline void syscall_set_return_value(struct task_struct *task, - struct pt_regs *regs, - int error, long val) -{ - if (error) { - /* error < 0, but ia64 uses > 0 return value */ - regs->r8 = -error; - regs->r10 = -1; - } else { - regs->r8 = val; - regs->r10 = 0; - } -} - -extern void syscall_get_arguments(struct task_struct *task, - struct pt_regs *regs, unsigned long *args); - -static inline int syscall_get_arch(struct task_struct *task) -{ - return AUDIT_ARCH_IA64; -} -#endif /* _ASM_SYSCALL_H */ diff --git a/arch/ia64/include/asm/thread_info.h b/arch/ia64/include/asm/thread_info.h deleted file mode 100644 index 21b257117e0a..000000000000 --- a/arch/ia64/include/asm/thread_info.h +++ /dev/null @@ -1,131 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Copyright (C) 2002-2003 Hewlett-Packard Co - * David Mosberger-Tang - */ -#ifndef _ASM_IA64_THREAD_INFO_H -#define _ASM_IA64_THREAD_INFO_H - -#ifndef ASM_OFFSETS_C -#include -#endif -#include -#include - -#define THREAD_SIZE KERNEL_STACK_SIZE - -#ifndef __ASSEMBLY__ - -/* - * On IA-64, we want to keep the task structure and kernel stack together, so they can be - * mapped by a single TLB entry and so they can be addressed by the "current" pointer - * without having to do pointer masking. - */ -struct thread_info { - struct task_struct *task; /* XXX not really needed, except for dup_task_struct() */ - __u32 flags; /* thread_info flags (see TIF_*) */ - __u32 cpu; /* current CPU */ - __u32 last_cpu; /* Last CPU thread ran on */ - __u32 status; /* Thread synchronous flags */ - int preempt_count; /* 0=premptable, <0=BUG; will also serve as bh-counter */ -#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE - __u64 utime; - __u64 stime; - __u64 gtime; - __u64 hardirq_time; - __u64 softirq_time; - __u64 idle_time; - __u64 ac_stamp; - __u64 ac_leave; - __u64 ac_stime; - __u64 ac_utime; -#endif -}; - -#define INIT_THREAD_INFO(tsk) \ -{ \ - .task = &tsk, \ - .flags = 0, \ - .cpu = 0, \ - .preempt_count = INIT_PREEMPT_COUNT, \ -} - -#ifndef ASM_OFFSETS_C -/* how to get the thread information struct from C */ -#define current_thread_info() ((struct thread_info *) ((char *) current + IA64_TASK_SIZE)) -#define arch_alloc_thread_stack_node(tsk, node) \ - ((unsigned long *) ((char *) (tsk) + IA64_TASK_SIZE)) -#define task_thread_info(tsk) ((struct thread_info *) ((char *) (tsk) + IA64_TASK_SIZE)) -#else -#define current_thread_info() ((struct thread_info *) 0) -#define arch_alloc_thread_stack_node(tsk, node) ((unsigned long *) 0) -#define task_thread_info(tsk) ((struct thread_info *) 0) -#endif -#define arch_free_thread_stack(tsk) /* nothing */ -#define task_stack_page(tsk) ((void *)(tsk)) - -#define __HAVE_THREAD_FUNCTIONS -#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE -#define setup_thread_stack(p, org) \ - *task_thread_info(p) = *task_thread_info(org); \ - task_thread_info(p)->ac_stime = 0; \ - task_thread_info(p)->ac_utime = 0; \ - task_thread_info(p)->task = (p); -#else -#define setup_thread_stack(p, org) \ - *task_thread_info(p) = *task_thread_info(org); \ - task_thread_info(p)->task = (p); -#endif -#define end_of_stack(p) (unsigned long *)((void *)(p) + IA64_RBS_OFFSET) - -#define alloc_task_struct_node(node) \ -({ \ - struct page *page = alloc_pages_node(node, GFP_KERNEL | __GFP_COMP, \ - KERNEL_STACK_SIZE_ORDER); \ - struct task_struct *ret = page ? page_address(page) : NULL; \ - \ - ret; \ -}) -#define free_task_struct(tsk) free_pages((unsigned long) (tsk), KERNEL_STACK_SIZE_ORDER) - -#endif /* !__ASSEMBLY */ - -/* - * thread information flags - * - these are process state flags that various assembly files may need to access - * - pending work-to-be-done flags are in least-significant 16 bits, other flags - * in top 16 bits - */ -#define TIF_SIGPENDING 0 /* signal pending */ -#define TIF_NEED_RESCHED 1 /* rescheduling necessary */ -#define TIF_SYSCALL_TRACE 2 /* syscall trace active */ -#define TIF_SYSCALL_AUDIT 3 /* syscall auditing active */ -#define TIF_SINGLESTEP 4 /* restore singlestep on return to user mode */ -#define TIF_NOTIFY_SIGNAL 5 /* signal notification exist */ -#define TIF_NOTIFY_RESUME 6 /* resumption notification requested */ -#define TIF_MEMDIE 17 /* is terminating due to OOM killer */ -#define TIF_MCA_INIT 18 /* this task is processing MCA or INIT */ -#define TIF_DB_DISABLED 19 /* debug trap disabled for fsyscall */ -#define TIF_RESTORE_RSE 21 /* user RBS is newer than kernel RBS */ -#define TIF_POLLING_NRFLAG 22 /* idle is polling for TIF_NEED_RESCHED */ - -#define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) -#define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) -#define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP) -#define _TIF_SYSCALL_TRACEAUDIT (_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP) -#define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) -#define _TIF_NOTIFY_SIGNAL (1 << TIF_NOTIFY_SIGNAL) -#define _TIF_SIGPENDING (1 << TIF_SIGPENDING) -#define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) -#define _TIF_MCA_INIT (1 << TIF_MCA_INIT) -#define _TIF_DB_DISABLED (1 << TIF_DB_DISABLED) -#define _TIF_RESTORE_RSE (1 << TIF_RESTORE_RSE) -#define _TIF_POLLING_NRFLAG (1 << TIF_POLLING_NRFLAG) - -/* "work to do on user-return" bits */ -#define TIF_ALLWORK_MASK (_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SYSCALL_AUDIT|\ - _TIF_NEED_RESCHED|_TIF_SYSCALL_TRACE|_TIF_NOTIFY_SIGNAL) -/* like TIF_ALLWORK_BITS but sans TIF_SYSCALL_TRACE or TIF_SYSCALL_AUDIT */ -#define TIF_WORK_MASK (TIF_ALLWORK_MASK&~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT)) - -#endif /* _ASM_IA64_THREAD_INFO_H */ diff --git a/arch/ia64/include/asm/timex.h b/arch/ia64/include/asm/timex.h deleted file mode 100644 index 7ccc077a60be..000000000000 --- a/arch/ia64/include/asm/timex.h +++ /dev/null @@ -1,47 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_IA64_TIMEX_H -#define _ASM_IA64_TIMEX_H - -/* - * Copyright (C) 1998-2001, 2003 Hewlett-Packard Co - * David Mosberger-Tang - */ -/* - * 2001/01/18 davidm Removed CLOCK_TICK_RATE. It makes no sense on IA-64. - * Also removed cacheflush_time as it's entirely unused. - */ - -#include -#include - -typedef unsigned long cycles_t; - -extern void (*ia64_udelay)(unsigned long usecs); - -/* - * For performance reasons, we don't want to define CLOCK_TICK_TRATE as - * local_cpu_data->itc_rate. Fortunately, we don't have to, either: according to George - * Anzinger, 1/CLOCK_TICK_RATE is taken as the resolution of the timer clock. The time - * calculation assumes that you will use enough of these so that your tick size <= 1/HZ. - * If the calculation shows that your CLOCK_TICK_RATE can not supply exactly 1/HZ ticks, - * the actual value is calculated and used to update the wall clock each jiffie. Setting - * the CLOCK_TICK_RATE to x*HZ insures that the calculation will find no errors. Hence we - * pick a multiple of HZ which gives us a (totally virtual) CLOCK_TICK_RATE of about - * 100MHz. - */ -#define CLOCK_TICK_RATE (HZ * 100000UL) - -static inline cycles_t -get_cycles (void) -{ - cycles_t ret; - - ret = ia64_getreg(_IA64_REG_AR_ITC); - return ret; -} -#define get_cycles get_cycles - -extern void ia64_cpu_local_tick (void); -extern unsigned long long ia64_native_sched_clock (void); - -#endif /* _ASM_IA64_TIMEX_H */ diff --git a/arch/ia64/include/asm/tlb.h b/arch/ia64/include/asm/tlb.h deleted file mode 100644 index a15fe0809aae..000000000000 --- a/arch/ia64/include/asm/tlb.h +++ /dev/null @@ -1,50 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_IA64_TLB_H -#define _ASM_IA64_TLB_H -/* - * Based on . - * - * Copyright (C) 2002-2003 Hewlett-Packard Co - * David Mosberger-Tang - */ -/* - * Removing a translation from a page table (including TLB-shootdown) is a four-step - * procedure: - * - * (1) Flush (virtual) caches --- ensures virtual memory is coherent with kernel memory - * (this is a no-op on ia64). - * (2) Clear the relevant portions of the page-table - * (3) Flush the TLBs --- ensures that stale content is gone from CPU TLBs - * (4) Release the pages that were freed up in step (2). - * - * Note that the ordering of these steps is crucial to avoid races on MP machines. - * - * The Linux kernel defines several platform-specific hooks for TLB-shootdown. When - * unmapping a portion of the virtual address space, these hooks are called according to - * the following template: - * - * tlb <- tlb_gather_mmu(mm); // start unmap for address space MM - * { - * for each vma that needs a shootdown do { - * tlb_start_vma(tlb, vma); - * for each page-table-entry PTE that needs to be removed do { - * tlb_remove_tlb_entry(tlb, pte, address); - * if (pte refers to a normal page) { - * tlb_remove_page(tlb, page); - * } - * } - * tlb_end_vma(tlb, vma); - * } - * } - * tlb_finish_mmu(tlb); // finish unmap for address space MM - */ -#include -#include -#include - -#include -#include - -#include - -#endif /* _ASM_IA64_TLB_H */ diff --git a/arch/ia64/include/asm/tlbflush.h b/arch/ia64/include/asm/tlbflush.h deleted file mode 100644 index ceac10c4d6e2..000000000000 --- a/arch/ia64/include/asm/tlbflush.h +++ /dev/null @@ -1,128 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_IA64_TLBFLUSH_H -#define _ASM_IA64_TLBFLUSH_H - -/* - * Copyright (C) 2002 Hewlett-Packard Co - * David Mosberger-Tang - */ - - -#include - -#include -#include -#include - -struct ia64_tr_entry { - u64 ifa; - u64 itir; - u64 pte; - u64 rr; -}; /*Record for tr entry!*/ - -extern int ia64_itr_entry(u64 target_mask, u64 va, u64 pte, u64 log_size); -extern void ia64_ptr_entry(u64 target_mask, int slot); -extern struct ia64_tr_entry *ia64_idtrs[NR_CPUS]; - -/* - region register macros -*/ -#define RR_TO_VE(val) (((val) >> 0) & 0x0000000000000001) -#define RR_VE(val) (((val) & 0x0000000000000001) << 0) -#define RR_VE_MASK 0x0000000000000001L -#define RR_VE_SHIFT 0 -#define RR_TO_PS(val) (((val) >> 2) & 0x000000000000003f) -#define RR_PS(val) (((val) & 0x000000000000003f) << 2) -#define RR_PS_MASK 0x00000000000000fcL -#define RR_PS_SHIFT 2 -#define RR_RID_MASK 0x00000000ffffff00L -#define RR_TO_RID(val) ((val >> 8) & 0xffffff) - -/* - * Now for some TLB flushing routines. This is the kind of stuff that - * can be very expensive, so try to avoid them whenever possible. - */ -extern void setup_ptcg_sem(int max_purges, int from_palo); - -/* - * Flush everything (kernel mapping may also have changed due to - * vmalloc/vfree). - */ -extern void local_flush_tlb_all (void); - -#ifdef CONFIG_SMP - extern void smp_flush_tlb_all (void); - extern void smp_flush_tlb_mm (struct mm_struct *mm); - extern void smp_flush_tlb_cpumask (cpumask_t xcpumask); -# define flush_tlb_all() smp_flush_tlb_all() -#else -# define flush_tlb_all() local_flush_tlb_all() -# define smp_flush_tlb_cpumask(m) local_flush_tlb_all() -#endif - -static inline void -local_finish_flush_tlb_mm (struct mm_struct *mm) -{ - if (mm == current->active_mm) - activate_context(mm); -} - -/* - * Flush a specified user mapping. This is called, e.g., as a result of fork() and - * exit(). fork() ends up here because the copy-on-write mechanism needs to write-protect - * the PTEs of the parent task. - */ -static inline void -flush_tlb_mm (struct mm_struct *mm) -{ - if (!mm) - return; - - set_bit(mm->context, ia64_ctx.flushmap); - mm->context = 0; - - if (atomic_read(&mm->mm_users) == 0) - return; /* happens as a result of exit_mmap() */ - -#ifdef CONFIG_SMP - smp_flush_tlb_mm(mm); -#else - local_finish_flush_tlb_mm(mm); -#endif -} - -extern void flush_tlb_range (struct vm_area_struct *vma, unsigned long start, unsigned long end); - -/* - * Page-granular tlb flush. - */ -static inline void -flush_tlb_page (struct vm_area_struct *vma, unsigned long addr) -{ -#ifdef CONFIG_SMP - flush_tlb_range(vma, (addr & PAGE_MASK), (addr & PAGE_MASK) + PAGE_SIZE); -#else - if (vma->vm_mm == current->active_mm) - ia64_ptcl(addr, (PAGE_SHIFT << 2)); - else - vma->vm_mm->context = 0; -#endif -} - -/* - * Flush the local TLB. Invoked from another cpu using an IPI. - */ -#ifdef CONFIG_SMP -void smp_local_flush_tlb(void); -#else -#define smp_local_flush_tlb() -#endif - -static inline void flush_tlb_kernel_range(unsigned long start, - unsigned long end) -{ - flush_tlb_all(); /* XXX fix me */ -} - -#endif /* _ASM_IA64_TLBFLUSH_H */ diff --git a/arch/ia64/include/asm/topology.h b/arch/ia64/include/asm/topology.h deleted file mode 100644 index 43567240b0d6..000000000000 --- a/arch/ia64/include/asm/topology.h +++ /dev/null @@ -1,56 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Copyright (C) 2002, Erich Focht, NEC - * - * All rights reserved. - */ -#ifndef _ASM_IA64_TOPOLOGY_H -#define _ASM_IA64_TOPOLOGY_H - -#include -#include -#include - -#ifdef CONFIG_NUMA - -/* Nodes w/o CPUs are preferred for memory allocations, see build_zonelists */ -#define PENALTY_FOR_NODE_WITH_CPUS 255 - -/* - * Nodes within this distance are eligible for reclaim by zone_reclaim() when - * zone_reclaim_mode is enabled. - */ -#define RECLAIM_DISTANCE 15 - -/* - * Returns a bitmask of CPUs on Node 'node'. - */ -#define cpumask_of_node(node) ((node) == -1 ? \ - cpu_all_mask : \ - &node_to_cpu_mask[node]) - -/* - * Determines the node for a given pci bus - */ -#define pcibus_to_node(bus) PCI_CONTROLLER(bus)->node - -void build_cpu_to_node_map(void); - -#endif /* CONFIG_NUMA */ - -#ifdef CONFIG_SMP -#define topology_physical_package_id(cpu) (cpu_data(cpu)->socket_id) -#define topology_core_id(cpu) (cpu_data(cpu)->core_id) -#define topology_core_cpumask(cpu) (&cpu_core_map[cpu]) -#define topology_sibling_cpumask(cpu) (&per_cpu(cpu_sibling_map, cpu)) -#endif - -extern void arch_fix_phys_package_id(int num, u32 slot); - -#define cpumask_of_pcibus(bus) (pcibus_to_node(bus) == -1 ? \ - cpu_all_mask : \ - cpumask_of_node(pcibus_to_node(bus))) - -#include - -#endif /* _ASM_IA64_TOPOLOGY_H */ diff --git a/arch/ia64/include/asm/types.h b/arch/ia64/include/asm/types.h deleted file mode 100644 index 5ddc7703de99..000000000000 --- a/arch/ia64/include/asm/types.h +++ /dev/null @@ -1,32 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * This file is never included by application software unless explicitly - * requested (e.g., via linux/types.h) in which case the application is - * Linux specific so (user-) name space pollution is not a major issue. - * However, for interoperability, libraries still need to be careful to - * avoid naming clashes. - * - * Based on . - * - * Modified 1998-2000, 2002 - * David Mosberger-Tang , Hewlett-Packard Co - */ -#ifndef _ASM_IA64_TYPES_H -#define _ASM_IA64_TYPES_H - -#include -#include - -#ifdef __ASSEMBLY__ -#else -/* - * These aren't exported outside the kernel to avoid name space clashes - */ - -struct fnptr { - unsigned long ip; - unsigned long gp; -}; - -#endif /* !__ASSEMBLY__ */ -#endif /* _ASM_IA64_TYPES_H */ diff --git a/arch/ia64/include/asm/uaccess.h b/arch/ia64/include/asm/uaccess.h deleted file mode 100644 index 60adadeb3e9e..000000000000 --- a/arch/ia64/include/asm/uaccess.h +++ /dev/null @@ -1,265 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_IA64_UACCESS_H -#define _ASM_IA64_UACCESS_H - -/* - * This file defines various macros to transfer memory areas across - * the user/kernel boundary. This needs to be done carefully because - * this code is executed in kernel mode and uses user-specified - * addresses. Thus, we need to be careful not to let the user to - * trick us into accessing kernel memory that would normally be - * inaccessible. This code is also fairly performance sensitive, - * so we want to spend as little time doing safety checks as - * possible. - * - * To make matters a bit more interesting, these macros sometimes also - * called from within the kernel itself, in which case the address - * validity check must be skipped. The get_fs() macro tells us what - * to do: if get_fs()==USER_DS, checking is performed, if - * get_fs()==KERNEL_DS, checking is bypassed. - * - * Note that even if the memory area specified by the user is in a - * valid address range, it is still possible that we'll get a page - * fault while accessing it. This is handled by filling out an - * exception handler fixup entry for each instruction that has the - * potential to fault. When such a fault occurs, the page fault - * handler checks to see whether the faulting instruction has a fixup - * associated and, if so, sets r8 to -EFAULT and clears r9 to 0 and - * then resumes execution at the continuation point. - * - * Based on . - * - * Copyright (C) 1998, 1999, 2001-2004 Hewlett-Packard Co - * David Mosberger-Tang - */ - -#include -#include - -#include -#include -#include -#include - -/* - * When accessing user memory, we need to make sure the entire area really is - * in user-level space. We also need to make sure that the address doesn't - * point inside the virtually mapped linear page table. - */ -static inline int __access_ok(const void __user *p, unsigned long size) -{ - unsigned long limit = TASK_SIZE; - unsigned long addr = (unsigned long)p; - - return likely((size <= limit) && (addr <= (limit - size)) && - likely(REGION_OFFSET(addr) < RGN_MAP_LIMIT)); -} -#define __access_ok __access_ok -#include - -/* - * These are the main single-value transfer routines. They automatically - * use the right size if we just have the right pointer type. - * - * Careful to not - * (a) re-use the arguments for side effects (sizeof/typeof is ok) - * (b) require any knowledge of processes at this stage - */ -#define put_user(x, ptr) __put_user_check((__typeof__(*(ptr))) (x), (ptr), sizeof(*(ptr))) -#define get_user(x, ptr) __get_user_check((x), (ptr), sizeof(*(ptr))) - -/* - * The "__xxx" versions do not do address space checking, useful when - * doing multiple accesses to the same area (the programmer has to do the - * checks by hand with "access_ok()") - */ -#define __put_user(x, ptr) __put_user_nocheck((__typeof__(*(ptr))) (x), (ptr), sizeof(*(ptr))) -#define __get_user(x, ptr) __get_user_nocheck((x), (ptr), sizeof(*(ptr))) - -#ifdef ASM_SUPPORTED - struct __large_struct { unsigned long buf[100]; }; -# define __m(x) (*(struct __large_struct __user *)(x)) - -/* We need to declare the __ex_table section before we can use it in .xdata. */ -asm (".section \"__ex_table\", \"a\"\n\t.previous"); - -# define __get_user_size(val, addr, n, err) \ -do { \ - register long __gu_r8 asm ("r8") = 0; \ - register long __gu_r9 asm ("r9"); \ - asm ("\n[1:]\tld"#n" %0=%2%P2\t// %0 and %1 get overwritten by exception handler\n" \ - "\t.xdata4 \"__ex_table\", 1b-., 1f-.+4\n" \ - "[1:]" \ - : "=r"(__gu_r9), "=r"(__gu_r8) : "m"(__m(addr)), "1"(__gu_r8)); \ - (err) = __gu_r8; \ - (val) = __gu_r9; \ -} while (0) - -/* - * The "__put_user_size()" macro tells gcc it reads from memory instead of writing it. This - * is because they do not write to any memory gcc knows about, so there are no aliasing - * issues. - */ -# define __put_user_size(val, addr, n, err) \ -do { \ - register long __pu_r8 asm ("r8") = 0; \ - asm volatile ("\n[1:]\tst"#n" %1=%r2%P1\t// %0 gets overwritten by exception handler\n" \ - "\t.xdata4 \"__ex_table\", 1b-., 1f-.\n" \ - "[1:]" \ - : "=r"(__pu_r8) : "m"(__m(addr)), "rO"(val), "0"(__pu_r8)); \ - (err) = __pu_r8; \ -} while (0) - -#else /* !ASM_SUPPORTED */ -# define RELOC_TYPE 2 /* ip-rel */ -# define __get_user_size(val, addr, n, err) \ -do { \ - __ld_user("__ex_table", (unsigned long) addr, n, RELOC_TYPE); \ - (err) = ia64_getreg(_IA64_REG_R8); \ - (val) = ia64_getreg(_IA64_REG_R9); \ -} while (0) -# define __put_user_size(val, addr, n, err) \ -do { \ - __st_user("__ex_table", (unsigned long) addr, n, RELOC_TYPE, \ - (__force unsigned long) (val)); \ - (err) = ia64_getreg(_IA64_REG_R8); \ -} while (0) -#endif /* !ASM_SUPPORTED */ - -extern void __get_user_unknown (void); - -/* - * Evaluating arguments X, PTR, SIZE, and SEGMENT may involve subroutine-calls, which - * could clobber r8 and r9 (among others). Thus, be careful not to evaluate it while - * using r8/r9. - */ -#define __do_get_user(check, x, ptr, size) \ -({ \ - const __typeof__(*(ptr)) __user *__gu_ptr = (ptr); \ - __typeof__ (size) __gu_size = (size); \ - long __gu_err = -EFAULT; \ - unsigned long __gu_val = 0; \ - if (!check || __access_ok(__gu_ptr, size)) \ - switch (__gu_size) { \ - case 1: __get_user_size(__gu_val, __gu_ptr, 1, __gu_err); break; \ - case 2: __get_user_size(__gu_val, __gu_ptr, 2, __gu_err); break; \ - case 4: __get_user_size(__gu_val, __gu_ptr, 4, __gu_err); break; \ - case 8: __get_user_size(__gu_val, __gu_ptr, 8, __gu_err); break; \ - default: __get_user_unknown(); break; \ - } \ - (x) = (__force __typeof__(*(__gu_ptr))) __gu_val; \ - __gu_err; \ -}) - -#define __get_user_nocheck(x, ptr, size) __do_get_user(0, x, ptr, size) -#define __get_user_check(x, ptr, size) __do_get_user(1, x, ptr, size) - -extern void __put_user_unknown (void); - -/* - * Evaluating arguments X, PTR, SIZE, and SEGMENT may involve subroutine-calls, which - * could clobber r8 (among others). Thus, be careful not to evaluate them while using r8. - */ -#define __do_put_user(check, x, ptr, size) \ -({ \ - __typeof__ (x) __pu_x = (x); \ - __typeof__ (*(ptr)) __user *__pu_ptr = (ptr); \ - __typeof__ (size) __pu_size = (size); \ - long __pu_err = -EFAULT; \ - \ - if (!check || __access_ok(__pu_ptr, __pu_size)) \ - switch (__pu_size) { \ - case 1: __put_user_size(__pu_x, __pu_ptr, 1, __pu_err); break; \ - case 2: __put_user_size(__pu_x, __pu_ptr, 2, __pu_err); break; \ - case 4: __put_user_size(__pu_x, __pu_ptr, 4, __pu_err); break; \ - case 8: __put_user_size(__pu_x, __pu_ptr, 8, __pu_err); break; \ - default: __put_user_unknown(); break; \ - } \ - __pu_err; \ -}) - -#define __put_user_nocheck(x, ptr, size) __do_put_user(0, x, ptr, size) -#define __put_user_check(x, ptr, size) __do_put_user(1, x, ptr, size) - -/* - * Complex access routines - */ -extern unsigned long __must_check __copy_user (void __user *to, const void __user *from, - unsigned long count); - -static inline unsigned long -raw_copy_to_user(void __user *to, const void *from, unsigned long count) -{ - return __copy_user(to, (__force void __user *) from, count); -} - -static inline unsigned long -raw_copy_from_user(void *to, const void __user *from, unsigned long count) -{ - return __copy_user((__force void __user *) to, from, count); -} - -#define INLINE_COPY_FROM_USER -#define INLINE_COPY_TO_USER - -extern unsigned long __do_clear_user (void __user *, unsigned long); - -#define __clear_user(to, n) __do_clear_user(to, n) - -#define clear_user(to, n) \ -({ \ - unsigned long __cu_len = (n); \ - if (__access_ok(to, __cu_len)) \ - __cu_len = __do_clear_user(to, __cu_len); \ - __cu_len; \ -}) - - -/* - * Returns: -EFAULT if exception before terminator, N if the entire buffer filled, else - * strlen. - */ -extern long __must_check __strncpy_from_user (char *to, const char __user *from, long to_len); - -#define strncpy_from_user(to, from, n) \ -({ \ - const char __user * __sfu_from = (from); \ - long __sfu_ret = -EFAULT; \ - if (__access_ok(__sfu_from, 0)) \ - __sfu_ret = __strncpy_from_user((to), __sfu_from, (n)); \ - __sfu_ret; \ -}) - -/* - * Returns: 0 if exception before NUL or reaching the supplied limit - * (N), a value greater than N if the limit would be exceeded, else - * strlen. - */ -extern unsigned long __strnlen_user (const char __user *, long); - -#define strnlen_user(str, len) \ -({ \ - const char __user *__su_str = (str); \ - unsigned long __su_ret = 0; \ - if (__access_ok(__su_str, 0)) \ - __su_ret = __strnlen_user(__su_str, len); \ - __su_ret; \ -}) - -#define ARCH_HAS_TRANSLATE_MEM_PTR 1 -static __inline__ void * -xlate_dev_mem_ptr(phys_addr_t p) -{ - struct page *page; - void *ptr; - - page = pfn_to_page(p >> PAGE_SHIFT); - if (PageUncached(page)) - ptr = (void *)p + __IA64_UNCACHED_OFFSET; - else - ptr = __va(p); - - return ptr; -} - -#endif /* _ASM_IA64_UACCESS_H */ diff --git a/arch/ia64/include/asm/uncached.h b/arch/ia64/include/asm/uncached.h deleted file mode 100644 index 98f447fc77b7..000000000000 --- a/arch/ia64/include/asm/uncached.h +++ /dev/null @@ -1,9 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) 2001-2008 Silicon Graphics, Inc. All rights reserved. - * - * Prototypes for the uncached page allocator - */ - -extern unsigned long uncached_alloc_page(int starting_nid, int n_pages); -extern void uncached_free_page(unsigned long uc_addr, int n_pages); diff --git a/arch/ia64/include/asm/unistd.h b/arch/ia64/include/asm/unistd.h deleted file mode 100644 index 9ba6110b10b9..000000000000 --- a/arch/ia64/include/asm/unistd.h +++ /dev/null @@ -1,38 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * IA-64 Linux syscall numbers and inline-functions. - * - * Copyright (C) 1998-2005 Hewlett-Packard Co - * David Mosberger-Tang - */ -#ifndef _ASM_IA64_UNISTD_H -#define _ASM_IA64_UNISTD_H - -#include - -#define NR_syscalls __NR_syscalls /* length of syscall table */ - -#define __ARCH_WANT_NEW_STAT -#define __ARCH_WANT_SYS_UTIME - -#if !defined(__ASSEMBLY__) && !defined(ASSEMBLER) - -#include -#include -#include - -extern long __ia64_syscall (long a0, long a1, long a2, long a3, long a4, long nr); - -asmlinkage unsigned long sys_mmap( - unsigned long addr, unsigned long len, - int prot, int flags, - int fd, long off); -asmlinkage unsigned long sys_mmap2( - unsigned long addr, unsigned long len, - int prot, int flags, - int fd, long pgoff); -struct pt_regs; -asmlinkage long sys_ia64_pipe(void); - -#endif /* !__ASSEMBLY__ */ -#endif /* _ASM_IA64_UNISTD_H */ diff --git a/arch/ia64/include/asm/unwind.h b/arch/ia64/include/asm/unwind.h deleted file mode 100644 index c5bd4b3e3a36..000000000000 --- a/arch/ia64/include/asm/unwind.h +++ /dev/null @@ -1,234 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_IA64_UNWIND_H -#define _ASM_IA64_UNWIND_H - -/* - * Copyright (C) 1999-2000, 2003 Hewlett-Packard Co - * David Mosberger-Tang - * - * A simple API for unwinding kernel stacks. This is used for - * debugging and error reporting purposes. The kernel doesn't need - * full-blown stack unwinding with all the bells and whitles, so there - * is not much point in implementing the full IA-64 unwind API (though - * it would of course be possible to implement the kernel API on top - * of it). - */ - -struct task_struct; /* forward declaration */ -struct switch_stack; /* forward declaration */ - -enum unw_application_register { - UNW_AR_BSP, - UNW_AR_BSPSTORE, - UNW_AR_PFS, - UNW_AR_RNAT, - UNW_AR_UNAT, - UNW_AR_LC, - UNW_AR_EC, - UNW_AR_FPSR, - UNW_AR_RSC, - UNW_AR_CCV, - UNW_AR_CSD, - UNW_AR_SSD -}; - -/* - * The following declarations are private to the unwind - * implementation: - */ - -struct unw_stack { - unsigned long limit; - unsigned long top; -}; - -#define UNW_FLAG_INTERRUPT_FRAME (1UL << 0) - -/* - * No user of this module should every access this structure directly - * as it is subject to change. It is declared here solely so we can - * use automatic variables. - */ -struct unw_frame_info { - struct unw_stack regstk; - struct unw_stack memstk; - unsigned int flags; - short hint; - short prev_script; - - /* current frame info: */ - unsigned long bsp; /* backing store pointer value */ - unsigned long sp; /* stack pointer value */ - unsigned long psp; /* previous sp value */ - unsigned long ip; /* instruction pointer value */ - unsigned long pr; /* current predicate values */ - unsigned long *cfm_loc; /* cfm save location (or NULL) */ - unsigned long pt; /* struct pt_regs location */ - - struct task_struct *task; - struct switch_stack *sw; - - /* preserved state: */ - unsigned long *bsp_loc; /* previous bsp save location */ - unsigned long *bspstore_loc; - unsigned long *pfs_loc; - unsigned long *rnat_loc; - unsigned long *rp_loc; - unsigned long *pri_unat_loc; - unsigned long *unat_loc; - unsigned long *pr_loc; - unsigned long *lc_loc; - unsigned long *fpsr_loc; - struct unw_ireg { - unsigned long *loc; - struct unw_ireg_nat { - unsigned long type : 3; /* enum unw_nat_type */ - signed long off : 61; /* NaT word is at loc+nat.off */ - } nat; - } r4, r5, r6, r7; - unsigned long *b1_loc, *b2_loc, *b3_loc, *b4_loc, *b5_loc; - struct ia64_fpreg *f2_loc, *f3_loc, *f4_loc, *f5_loc, *fr_loc[16]; -}; - -/* - * The official API follows below: - */ - -struct unw_table_entry { - u64 start_offset; - u64 end_offset; - u64 info_offset; -}; - -/* - * Initialize unwind support. - */ -extern void unw_init (void); - -extern void *unw_add_unwind_table (const char *name, unsigned long segment_base, unsigned long gp, - const void *table_start, const void *table_end); - -extern void unw_remove_unwind_table (void *handle); - -/* - * Prepare to unwind blocked task t. - */ -extern void unw_init_from_blocked_task (struct unw_frame_info *info, struct task_struct *t); - -extern void unw_init_frame_info (struct unw_frame_info *info, struct task_struct *t, - struct switch_stack *sw); - -/* - * Prepare to unwind the currently running thread. - */ -extern void unw_init_running (void (*callback)(struct unw_frame_info *info, void *arg), void *arg); - -/* - * Unwind to previous to frame. Returns 0 if successful, negative - * number in case of an error. - */ -extern int unw_unwind (struct unw_frame_info *info); - -/* - * Unwind until the return pointer is in user-land (or until an error - * occurs). Returns 0 if successful, negative number in case of - * error. - */ -extern int unw_unwind_to_user (struct unw_frame_info *info); - -#define unw_is_intr_frame(info) (((info)->flags & UNW_FLAG_INTERRUPT_FRAME) != 0) - -static inline int -unw_get_ip (struct unw_frame_info *info, unsigned long *valp) -{ - *valp = (info)->ip; - return 0; -} - -static inline int -unw_get_sp (struct unw_frame_info *info, unsigned long *valp) -{ - *valp = (info)->sp; - return 0; -} - -static inline int -unw_get_psp (struct unw_frame_info *info, unsigned long *valp) -{ - *valp = (info)->psp; - return 0; -} - -static inline int -unw_get_bsp (struct unw_frame_info *info, unsigned long *valp) -{ - *valp = (info)->bsp; - return 0; -} - -static inline int -unw_get_cfm (struct unw_frame_info *info, unsigned long *valp) -{ - *valp = *(info)->cfm_loc; - return 0; -} - -static inline int -unw_set_cfm (struct unw_frame_info *info, unsigned long val) -{ - *(info)->cfm_loc = val; - return 0; -} - -static inline int -unw_get_rp (struct unw_frame_info *info, unsigned long *val) -{ - if (!info->rp_loc) - return -1; - *val = *info->rp_loc; - return 0; -} - -extern int unw_access_gr (struct unw_frame_info *, int, unsigned long *, char *, int); -extern int unw_access_br (struct unw_frame_info *, int, unsigned long *, int); -extern int unw_access_fr (struct unw_frame_info *, int, struct ia64_fpreg *, int); -extern int unw_access_ar (struct unw_frame_info *, int, unsigned long *, int); -extern int unw_access_pr (struct unw_frame_info *, unsigned long *, int); - -static inline int -unw_set_gr (struct unw_frame_info *i, int n, unsigned long v, char nat) -{ - return unw_access_gr(i, n, &v, &nat, 1); -} - -static inline int -unw_set_br (struct unw_frame_info *i, int n, unsigned long v) -{ - return unw_access_br(i, n, &v, 1); -} - -static inline int -unw_set_fr (struct unw_frame_info *i, int n, struct ia64_fpreg v) -{ - return unw_access_fr(i, n, &v, 1); -} - -static inline int -unw_set_ar (struct unw_frame_info *i, int n, unsigned long v) -{ - return unw_access_ar(i, n, &v, 1); -} - -static inline int -unw_set_pr (struct unw_frame_info *i, unsigned long v) -{ - return unw_access_pr(i, &v, 1); -} - -#define unw_get_gr(i,n,v,nat) unw_access_gr(i,n,v,nat,0) -#define unw_get_br(i,n,v) unw_access_br(i,n,v,0) -#define unw_get_fr(i,n,v) unw_access_fr(i,n,v,0) -#define unw_get_ar(i,n,v) unw_access_ar(i,n,v,0) -#define unw_get_pr(i,v) unw_access_pr(i,v,0) - -#endif /* _ASM_UNWIND_H */ diff --git a/arch/ia64/include/asm/user.h b/arch/ia64/include/asm/user.h deleted file mode 100644 index ec03d3ab8715..000000000000 --- a/arch/ia64/include/asm/user.h +++ /dev/null @@ -1,53 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_IA64_USER_H -#define _ASM_IA64_USER_H - -/* - * Core file format: The core file is written in such a way that gdb - * can understand it and provide useful information to the user (under - * linux we use the `trad-core' bfd). The file contents are as - * follows: - * - * upage: 1 page consisting of a user struct that tells gdb - * what is present in the file. Directly after this is a - * copy of the task_struct, which is currently not used by gdb, - * but it may come in handy at some point. All of the registers - * are stored as part of the upage. The upage should always be - * only one page long. - * data: The data segment follows next. We use current->end_text to - * current->brk to pick up all of the user variables, plus any memory - * that may have been sbrk'ed. No attempt is made to determine if a - * page is demand-zero or if a page is totally unused, we just cover - * the entire range. All of the addresses are rounded in such a way - * that an integral number of pages is written. - * stack: We need the stack information in order to get a meaningful - * backtrace. We need to write the data from usp to - * current->start_stack, so we round each of these in order to be able - * to write an integer number of pages. - * - * Modified 1998, 1999, 2001 - * David Mosberger-Tang , Hewlett-Packard Co - */ - -#include -#include - -#include - -#define EF_SIZE 3072 /* XXX fix me */ - -struct user { - unsigned long regs[EF_SIZE/8+32]; /* integer and fp regs */ - size_t u_tsize; /* text size (pages) */ - size_t u_dsize; /* data size (pages) */ - size_t u_ssize; /* stack size (pages) */ - unsigned long start_code; /* text starting address */ - unsigned long start_data; /* data starting address */ - unsigned long start_stack; /* stack starting address */ - long int signal; /* signal causing core dump */ - unsigned long u_ar0; /* help gdb find registers */ - unsigned long magic; /* identifies a core file */ - char u_comm[32]; /* user command name */ -}; - -#endif /* _ASM_IA64_USER_H */ diff --git a/arch/ia64/include/asm/ustack.h b/arch/ia64/include/asm/ustack.h deleted file mode 100644 index 112d40a0fec2..000000000000 --- a/arch/ia64/include/asm/ustack.h +++ /dev/null @@ -1,12 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_IA64_USTACK_H -#define _ASM_IA64_USTACK_H - -#include -#include - -/* The absolute hard limit for stack size is 1/2 of the mappable space in the region */ -#define MAX_USER_STACK_SIZE (RGN_MAP_LIMIT/2) -#define STACK_TOP (0x6000000000000000UL + RGN_MAP_LIMIT) -#define STACK_TOP_MAX STACK_TOP -#endif /* _ASM_IA64_USTACK_H */ diff --git a/arch/ia64/include/asm/uv/uv.h b/arch/ia64/include/asm/uv/uv.h deleted file mode 100644 index 48d4526bf4cd..000000000000 --- a/arch/ia64/include/asm/uv/uv.h +++ /dev/null @@ -1,30 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_IA64_UV_UV_H -#define _ASM_IA64_UV_UV_H - -#ifdef CONFIG_IA64_SGI_UV -extern bool ia64_is_uv; - -static inline int is_uv_system(void) -{ - return ia64_is_uv; -} - -void __init uv_probe_system_type(void); -void __init uv_setup(char **cmdline_p); -#else /* CONFIG_IA64_SGI_UV */ -static inline int is_uv_system(void) -{ - return false; -} - -static inline void __init uv_probe_system_type(void) -{ -} - -static inline void __init uv_setup(char **cmdline_p) -{ -} -#endif /* CONFIG_IA64_SGI_UV */ - -#endif /* _ASM_IA64_UV_UV_H */ diff --git a/arch/ia64/include/asm/uv/uv_hub.h b/arch/ia64/include/asm/uv/uv_hub.h deleted file mode 100644 index 809ddb6896db..000000000000 --- a/arch/ia64/include/asm/uv/uv_hub.h +++ /dev/null @@ -1,315 +0,0 @@ -/* - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - * - * SGI UV architectural definitions - * - * Copyright (C) 2008 Silicon Graphics, Inc. All rights reserved. - */ - -#ifndef __ASM_IA64_UV_HUB_H__ -#define __ASM_IA64_UV_HUB_H__ - -#include -#include -#include -#include - - -/* - * Addressing Terminology - * - * M - The low M bits of a physical address represent the offset - * into the blade local memory. RAM memory on a blade is physically - * contiguous (although various IO spaces may punch holes in - * it).. - * - * N - Number of bits in the node portion of a socket physical - * address. - * - * NASID - network ID of a router, Mbrick or Cbrick. Nasid values of - * routers always have low bit of 1, C/MBricks have low bit - * equal to 0. Most addressing macros that target UV hub chips - * right shift the NASID by 1 to exclude the always-zero bit. - * NASIDs contain up to 15 bits. - * - * GNODE - NASID right shifted by 1 bit. Most mmrs contain gnodes instead - * of nasids. - * - * PNODE - the low N bits of the GNODE. The PNODE is the most useful variant - * of the nasid for socket usage. - * - * - * NumaLink Global Physical Address Format: - * +--------------------------------+---------------------+ - * |00..000| GNODE | NodeOffset | - * +--------------------------------+---------------------+ - * |<-------53 - M bits --->|<--------M bits -----> - * - * M - number of node offset bits (35 .. 40) - * - * - * Memory/UV-HUB Processor Socket Address Format: - * +----------------+---------------+---------------------+ - * |00..000000000000| PNODE | NodeOffset | - * +----------------+---------------+---------------------+ - * <--- N bits --->|<--------M bits -----> - * - * M - number of node offset bits (35 .. 40) - * N - number of PNODE bits (0 .. 10) - * - * Note: M + N cannot currently exceed 44 (x86_64) or 46 (IA64). - * The actual values are configuration dependent and are set at - * boot time. M & N values are set by the hardware/BIOS at boot. - */ - - -/* - * Maximum number of bricks in all partitions and in all coherency domains. - * This is the total number of bricks accessible in the numalink fabric. It - * includes all C & M bricks. Routers are NOT included. - * - * This value is also the value of the maximum number of non-router NASIDs - * in the numalink fabric. - * - * NOTE: a brick may contain 1 or 2 OS nodes. Don't get these confused. - */ -#define UV_MAX_NUMALINK_BLADES 16384 - -/* - * Maximum number of C/Mbricks within a software SSI (hardware may support - * more). - */ -#define UV_MAX_SSI_BLADES 1 - -/* - * The largest possible NASID of a C or M brick (+ 2) - */ -#define UV_MAX_NASID_VALUE (UV_MAX_NUMALINK_NODES * 2) - -/* - * The following defines attributes of the HUB chip. These attributes are - * frequently referenced and are kept in the per-cpu data areas of each cpu. - * They are kept together in a struct to minimize cache misses. - */ -struct uv_hub_info_s { - unsigned long global_mmr_base; - unsigned long gpa_mask; - unsigned long gnode_upper; - unsigned long lowmem_remap_top; - unsigned long lowmem_remap_base; - unsigned short pnode; - unsigned short pnode_mask; - unsigned short coherency_domain_number; - unsigned short numa_blade_id; - unsigned char blade_processor_id; - unsigned char m_val; - unsigned char n_val; -}; -DECLARE_PER_CPU(struct uv_hub_info_s, __uv_hub_info); -#define uv_hub_info this_cpu_ptr(&__uv_hub_info) -#define uv_cpu_hub_info(cpu) (&per_cpu(__uv_hub_info, cpu)) - -/* - * Local & Global MMR space macros. - * Note: macros are intended to be used ONLY by inline functions - * in this file - not by other kernel code. - * n - NASID (full 15-bit global nasid) - * g - GNODE (full 15-bit global nasid, right shifted 1) - * p - PNODE (local part of nsids, right shifted 1) - */ -#define UV_NASID_TO_PNODE(n) (((n) >> 1) & uv_hub_info->pnode_mask) -#define UV_PNODE_TO_NASID(p) (((p) << 1) | uv_hub_info->gnode_upper) - -#define UV_LOCAL_MMR_BASE 0xf4000000UL -#define UV_GLOBAL_MMR32_BASE 0xf8000000UL -#define UV_GLOBAL_MMR64_BASE (uv_hub_info->global_mmr_base) - -#define UV_GLOBAL_MMR32_PNODE_SHIFT 15 -#define UV_GLOBAL_MMR64_PNODE_SHIFT 26 - -#define UV_GLOBAL_MMR32_PNODE_BITS(p) ((p) << (UV_GLOBAL_MMR32_PNODE_SHIFT)) - -#define UV_GLOBAL_MMR64_PNODE_BITS(p) \ - ((unsigned long)(p) << UV_GLOBAL_MMR64_PNODE_SHIFT) - -/* - * Macros for converting between kernel virtual addresses, socket local physical - * addresses, and UV global physical addresses. - * Note: use the standard __pa() & __va() macros for converting - * between socket virtual and socket physical addresses. - */ - -/* socket phys RAM --> UV global physical address */ -static inline unsigned long uv_soc_phys_ram_to_gpa(unsigned long paddr) -{ - if (paddr < uv_hub_info->lowmem_remap_top) - paddr += uv_hub_info->lowmem_remap_base; - return paddr | uv_hub_info->gnode_upper; -} - - -/* socket virtual --> UV global physical address */ -static inline unsigned long uv_gpa(void *v) -{ - return __pa(v) | uv_hub_info->gnode_upper; -} - -/* socket virtual --> UV global physical address */ -static inline void *uv_vgpa(void *v) -{ - return (void *)uv_gpa(v); -} - -/* UV global physical address --> socket virtual */ -static inline void *uv_va(unsigned long gpa) -{ - return __va(gpa & uv_hub_info->gpa_mask); -} - -/* pnode, offset --> socket virtual */ -static inline void *uv_pnode_offset_to_vaddr(int pnode, unsigned long offset) -{ - return __va(((unsigned long)pnode << uv_hub_info->m_val) | offset); -} - - -/* - * Access global MMRs using the low memory MMR32 space. This region supports - * faster MMR access but not all MMRs are accessible in this space. - */ -static inline unsigned long *uv_global_mmr32_address(int pnode, - unsigned long offset) -{ - return __va(UV_GLOBAL_MMR32_BASE | - UV_GLOBAL_MMR32_PNODE_BITS(pnode) | offset); -} - -static inline void uv_write_global_mmr32(int pnode, unsigned long offset, - unsigned long val) -{ - *uv_global_mmr32_address(pnode, offset) = val; -} - -static inline unsigned long uv_read_global_mmr32(int pnode, - unsigned long offset) -{ - return *uv_global_mmr32_address(pnode, offset); -} - -/* - * Access Global MMR space using the MMR space located at the top of physical - * memory. - */ -static inline unsigned long *uv_global_mmr64_address(int pnode, - unsigned long offset) -{ - return __va(UV_GLOBAL_MMR64_BASE | - UV_GLOBAL_MMR64_PNODE_BITS(pnode) | offset); -} - -static inline void uv_write_global_mmr64(int pnode, unsigned long offset, - unsigned long val) -{ - *uv_global_mmr64_address(pnode, offset) = val; -} - -static inline unsigned long uv_read_global_mmr64(int pnode, - unsigned long offset) -{ - return *uv_global_mmr64_address(pnode, offset); -} - -/* - * Access hub local MMRs. Faster than using global space but only local MMRs - * are accessible. - */ -static inline unsigned long *uv_local_mmr_address(unsigned long offset) -{ - return __va(UV_LOCAL_MMR_BASE | offset); -} - -static inline unsigned long uv_read_local_mmr(unsigned long offset) -{ - return *uv_local_mmr_address(offset); -} - -static inline void uv_write_local_mmr(unsigned long offset, unsigned long val) -{ - *uv_local_mmr_address(offset) = val; -} - -/* - * Structures and definitions for converting between cpu, node, pnode, and blade - * numbers. - */ - -/* Blade-local cpu number of current cpu. Numbered 0 .. <# cpus on the blade> */ -static inline int uv_blade_processor_id(void) -{ - return smp_processor_id(); -} - -/* Blade number of current cpu. Numnbered 0 .. <#blades -1> */ -static inline int uv_numa_blade_id(void) -{ - return 0; -} - -/* Convert a cpu number to the UV blade number */ -static inline int uv_cpu_to_blade_id(int cpu) -{ - return 0; -} - -/* Convert linux node number to the UV blade number */ -static inline int uv_node_to_blade_id(int nid) -{ - return 0; -} - -/* Convert a blade id to the PNODE of the blade */ -static inline int uv_blade_to_pnode(int bid) -{ - return 0; -} - -/* Determine the number of possible cpus on a blade */ -static inline int uv_blade_nr_possible_cpus(int bid) -{ - return num_possible_cpus(); -} - -/* Determine the number of online cpus on a blade */ -static inline int uv_blade_nr_online_cpus(int bid) -{ - return num_online_cpus(); -} - -/* Convert a cpu id to the PNODE of the blade containing the cpu */ -static inline int uv_cpu_to_pnode(int cpu) -{ - return 0; -} - -/* Convert a linux node number to the PNODE of the blade */ -static inline int uv_node_to_pnode(int nid) -{ - return 0; -} - -/* Maximum possible number of blades */ -static inline int uv_num_possible_blades(void) -{ - return 1; -} - -static inline void uv_hub_send_ipi(int pnode, int apicid, int vector) -{ - /* not currently needed on ia64 */ -} - - -#endif /* __ASM_IA64_UV_HUB__ */ - diff --git a/arch/ia64/include/asm/uv/uv_mmrs.h b/arch/ia64/include/asm/uv/uv_mmrs.h deleted file mode 100644 index fe0b8f05e1a8..000000000000 --- a/arch/ia64/include/asm/uv/uv_mmrs.h +++ /dev/null @@ -1,825 +0,0 @@ -/* - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - * - * SGI UV MMR definitions - * - * Copyright (C) 2007-2008 Silicon Graphics, Inc. All rights reserved. - */ - -#ifndef _ASM_IA64_UV_UV_MMRS_H -#define _ASM_IA64_UV_UV_MMRS_H - -#define UV_MMR_ENABLE (1UL << 63) - -/* ========================================================================= */ -/* UVH_BAU_DATA_CONFIG */ -/* ========================================================================= */ -#define UVH_BAU_DATA_CONFIG 0x61680UL -#define UVH_BAU_DATA_CONFIG_32 0x0438 - -#define UVH_BAU_DATA_CONFIG_VECTOR_SHFT 0 -#define UVH_BAU_DATA_CONFIG_VECTOR_MASK 0x00000000000000ffUL -#define UVH_BAU_DATA_CONFIG_DM_SHFT 8 -#define UVH_BAU_DATA_CONFIG_DM_MASK 0x0000000000000700UL -#define UVH_BAU_DATA_CONFIG_DESTMODE_SHFT 11 -#define UVH_BAU_DATA_CONFIG_DESTMODE_MASK 0x0000000000000800UL -#define UVH_BAU_DATA_CONFIG_STATUS_SHFT 12 -#define UVH_BAU_DATA_CONFIG_STATUS_MASK 0x0000000000001000UL -#define UVH_BAU_DATA_CONFIG_P_SHFT 13 -#define UVH_BAU_DATA_CONFIG_P_MASK 0x0000000000002000UL -#define UVH_BAU_DATA_CONFIG_T_SHFT 15 -#define UVH_BAU_DATA_CONFIG_T_MASK 0x0000000000008000UL -#define UVH_BAU_DATA_CONFIG_M_SHFT 16 -#define UVH_BAU_DATA_CONFIG_M_MASK 0x0000000000010000UL -#define UVH_BAU_DATA_CONFIG_APIC_ID_SHFT 32 -#define UVH_BAU_DATA_CONFIG_APIC_ID_MASK 0xffffffff00000000UL - -union uvh_bau_data_config_u { - unsigned long v; - struct uvh_bau_data_config_s { - unsigned long vector_ : 8; /* RW */ - unsigned long dm : 3; /* RW */ - unsigned long destmode : 1; /* RW */ - unsigned long status : 1; /* RO */ - unsigned long p : 1; /* RO */ - unsigned long rsvd_14 : 1; /* */ - unsigned long t : 1; /* RO */ - unsigned long m : 1; /* RW */ - unsigned long rsvd_17_31: 15; /* */ - unsigned long apic_id : 32; /* RW */ - } s; -}; - -/* ========================================================================= */ -/* UVH_EVENT_OCCURRED0 */ -/* ========================================================================= */ -#define UVH_EVENT_OCCURRED0 0x70000UL -#define UVH_EVENT_OCCURRED0_32 0x005e8 - -#define UVH_EVENT_OCCURRED0_LB_HCERR_SHFT 0 -#define UVH_EVENT_OCCURRED0_LB_HCERR_MASK 0x0000000000000001UL -#define UVH_EVENT_OCCURRED0_GR0_HCERR_SHFT 1 -#define UVH_EVENT_OCCURRED0_GR0_HCERR_MASK 0x0000000000000002UL -#define UVH_EVENT_OCCURRED0_GR1_HCERR_SHFT 2 -#define UVH_EVENT_OCCURRED0_GR1_HCERR_MASK 0x0000000000000004UL -#define UVH_EVENT_OCCURRED0_LH_HCERR_SHFT 3 -#define UVH_EVENT_OCCURRED0_LH_HCERR_MASK 0x0000000000000008UL -#define UVH_EVENT_OCCURRED0_RH_HCERR_SHFT 4 -#define UVH_EVENT_OCCURRED0_RH_HCERR_MASK 0x0000000000000010UL -#define UVH_EVENT_OCCURRED0_XN_HCERR_SHFT 5 -#define UVH_EVENT_OCCURRED0_XN_HCERR_MASK 0x0000000000000020UL -#define UVH_EVENT_OCCURRED0_SI_HCERR_SHFT 6 -#define UVH_EVENT_OCCURRED0_SI_HCERR_MASK 0x0000000000000040UL -#define UVH_EVENT_OCCURRED0_LB_AOERR0_SHFT 7 -#define UVH_EVENT_OCCURRED0_LB_AOERR0_MASK 0x0000000000000080UL -#define UVH_EVENT_OCCURRED0_GR0_AOERR0_SHFT 8 -#define UVH_EVENT_OCCURRED0_GR0_AOERR0_MASK 0x0000000000000100UL -#define UVH_EVENT_OCCURRED0_GR1_AOERR0_SHFT 9 -#define UVH_EVENT_OCCURRED0_GR1_AOERR0_MASK 0x0000000000000200UL -#define UVH_EVENT_OCCURRED0_LH_AOERR0_SHFT 10 -#define UVH_EVENT_OCCURRED0_LH_AOERR0_MASK 0x0000000000000400UL -#define UVH_EVENT_OCCURRED0_RH_AOERR0_SHFT 11 -#define UVH_EVENT_OCCURRED0_RH_AOERR0_MASK 0x0000000000000800UL -#define UVH_EVENT_OCCURRED0_XN_AOERR0_SHFT 12 -#define UVH_EVENT_OCCURRED0_XN_AOERR0_MASK 0x0000000000001000UL -#define UVH_EVENT_OCCURRED0_SI_AOERR0_SHFT 13 -#define UVH_EVENT_OCCURRED0_SI_AOERR0_MASK 0x0000000000002000UL -#define UVH_EVENT_OCCURRED0_LB_AOERR1_SHFT 14 -#define UVH_EVENT_OCCURRED0_LB_AOERR1_MASK 0x0000000000004000UL -#define UVH_EVENT_OCCURRED0_GR0_AOERR1_SHFT 15 -#define UVH_EVENT_OCCURRED0_GR0_AOERR1_MASK 0x0000000000008000UL -#define UVH_EVENT_OCCURRED0_GR1_AOERR1_SHFT 16 -#define UVH_EVENT_OCCURRED0_GR1_AOERR1_MASK 0x0000000000010000UL -#define UVH_EVENT_OCCURRED0_LH_AOERR1_SHFT 17 -#define UVH_EVENT_OCCURRED0_LH_AOERR1_MASK 0x0000000000020000UL -#define UVH_EVENT_OCCURRED0_RH_AOERR1_SHFT 18 -#define UVH_EVENT_OCCURRED0_RH_AOERR1_MASK 0x0000000000040000UL -#define UVH_EVENT_OCCURRED0_XN_AOERR1_SHFT 19 -#define UVH_EVENT_OCCURRED0_XN_AOERR1_MASK 0x0000000000080000UL -#define UVH_EVENT_OCCURRED0_SI_AOERR1_SHFT 20 -#define UVH_EVENT_OCCURRED0_SI_AOERR1_MASK 0x0000000000100000UL -#define UVH_EVENT_OCCURRED0_RH_VPI_INT_SHFT 21 -#define UVH_EVENT_OCCURRED0_RH_VPI_INT_MASK 0x0000000000200000UL -#define UVH_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_SHFT 22 -#define UVH_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_MASK 0x0000000000400000UL -#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_0_SHFT 23 -#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_0_MASK 0x0000000000800000UL -#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_1_SHFT 24 -#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_1_MASK 0x0000000001000000UL -#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_2_SHFT 25 -#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_2_MASK 0x0000000002000000UL -#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_3_SHFT 26 -#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_3_MASK 0x0000000004000000UL -#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_4_SHFT 27 -#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_4_MASK 0x0000000008000000UL -#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_5_SHFT 28 -#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_5_MASK 0x0000000010000000UL -#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_6_SHFT 29 -#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_6_MASK 0x0000000020000000UL -#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_7_SHFT 30 -#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_7_MASK 0x0000000040000000UL -#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_8_SHFT 31 -#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_8_MASK 0x0000000080000000UL -#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_9_SHFT 32 -#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_9_MASK 0x0000000100000000UL -#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_10_SHFT 33 -#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_10_MASK 0x0000000200000000UL -#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_11_SHFT 34 -#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_11_MASK 0x0000000400000000UL -#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_12_SHFT 35 -#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_12_MASK 0x0000000800000000UL -#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_13_SHFT 36 -#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_13_MASK 0x0000001000000000UL -#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_14_SHFT 37 -#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_14_MASK 0x0000002000000000UL -#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_15_SHFT 38 -#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_15_MASK 0x0000004000000000UL -#define UVH_EVENT_OCCURRED0_L1_NMI_INT_SHFT 39 -#define UVH_EVENT_OCCURRED0_L1_NMI_INT_MASK 0x0000008000000000UL -#define UVH_EVENT_OCCURRED0_STOP_CLOCK_SHFT 40 -#define UVH_EVENT_OCCURRED0_STOP_CLOCK_MASK 0x0000010000000000UL -#define UVH_EVENT_OCCURRED0_ASIC_TO_L1_SHFT 41 -#define UVH_EVENT_OCCURRED0_ASIC_TO_L1_MASK 0x0000020000000000UL -#define UVH_EVENT_OCCURRED0_L1_TO_ASIC_SHFT 42 -#define UVH_EVENT_OCCURRED0_L1_TO_ASIC_MASK 0x0000040000000000UL -#define UVH_EVENT_OCCURRED0_LTC_INT_SHFT 43 -#define UVH_EVENT_OCCURRED0_LTC_INT_MASK 0x0000080000000000UL -#define UVH_EVENT_OCCURRED0_LA_SEQ_TRIGGER_SHFT 44 -#define UVH_EVENT_OCCURRED0_LA_SEQ_TRIGGER_MASK 0x0000100000000000UL -#define UVH_EVENT_OCCURRED0_IPI_INT_SHFT 45 -#define UVH_EVENT_OCCURRED0_IPI_INT_MASK 0x0000200000000000UL -#define UVH_EVENT_OCCURRED0_EXTIO_INT0_SHFT 46 -#define UVH_EVENT_OCCURRED0_EXTIO_INT0_MASK 0x0000400000000000UL -#define UVH_EVENT_OCCURRED0_EXTIO_INT1_SHFT 47 -#define UVH_EVENT_OCCURRED0_EXTIO_INT1_MASK 0x0000800000000000UL -#define UVH_EVENT_OCCURRED0_EXTIO_INT2_SHFT 48 -#define UVH_EVENT_OCCURRED0_EXTIO_INT2_MASK 0x0001000000000000UL -#define UVH_EVENT_OCCURRED0_EXTIO_INT3_SHFT 49 -#define UVH_EVENT_OCCURRED0_EXTIO_INT3_MASK 0x0002000000000000UL -#define UVH_EVENT_OCCURRED0_PROFILE_INT_SHFT 50 -#define UVH_EVENT_OCCURRED0_PROFILE_INT_MASK 0x0004000000000000UL -#define UVH_EVENT_OCCURRED0_RTC0_SHFT 51 -#define UVH_EVENT_OCCURRED0_RTC0_MASK 0x0008000000000000UL -#define UVH_EVENT_OCCURRED0_RTC1_SHFT 52 -#define UVH_EVENT_OCCURRED0_RTC1_MASK 0x0010000000000000UL -#define UVH_EVENT_OCCURRED0_RTC2_SHFT 53 -#define UVH_EVENT_OCCURRED0_RTC2_MASK 0x0020000000000000UL -#define UVH_EVENT_OCCURRED0_RTC3_SHFT 54 -#define UVH_EVENT_OCCURRED0_RTC3_MASK 0x0040000000000000UL -#define UVH_EVENT_OCCURRED0_BAU_DATA_SHFT 55 -#define UVH_EVENT_OCCURRED0_BAU_DATA_MASK 0x0080000000000000UL -#define UVH_EVENT_OCCURRED0_POWER_MANAGEMENT_REQ_SHFT 56 -#define UVH_EVENT_OCCURRED0_POWER_MANAGEMENT_REQ_MASK 0x0100000000000000UL -union uvh_event_occurred0_u { - unsigned long v; - struct uvh_event_occurred0_s { - unsigned long lb_hcerr : 1; /* RW, W1C */ - unsigned long gr0_hcerr : 1; /* RW, W1C */ - unsigned long gr1_hcerr : 1; /* RW, W1C */ - unsigned long lh_hcerr : 1; /* RW, W1C */ - unsigned long rh_hcerr : 1; /* RW, W1C */ - unsigned long xn_hcerr : 1; /* RW, W1C */ - unsigned long si_hcerr : 1; /* RW, W1C */ - unsigned long lb_aoerr0 : 1; /* RW, W1C */ - unsigned long gr0_aoerr0 : 1; /* RW, W1C */ - unsigned long gr1_aoerr0 : 1; /* RW, W1C */ - unsigned long lh_aoerr0 : 1; /* RW, W1C */ - unsigned long rh_aoerr0 : 1; /* RW, W1C */ - unsigned long xn_aoerr0 : 1; /* RW, W1C */ - unsigned long si_aoerr0 : 1; /* RW, W1C */ - unsigned long lb_aoerr1 : 1; /* RW, W1C */ - unsigned long gr0_aoerr1 : 1; /* RW, W1C */ - unsigned long gr1_aoerr1 : 1; /* RW, W1C */ - unsigned long lh_aoerr1 : 1; /* RW, W1C */ - unsigned long rh_aoerr1 : 1; /* RW, W1C */ - unsigned long xn_aoerr1 : 1; /* RW, W1C */ - unsigned long si_aoerr1 : 1; /* RW, W1C */ - unsigned long rh_vpi_int : 1; /* RW, W1C */ - unsigned long system_shutdown_int : 1; /* RW, W1C */ - unsigned long lb_irq_int_0 : 1; /* RW, W1C */ - unsigned long lb_irq_int_1 : 1; /* RW, W1C */ - unsigned long lb_irq_int_2 : 1; /* RW, W1C */ - unsigned long lb_irq_int_3 : 1; /* RW, W1C */ - unsigned long lb_irq_int_4 : 1; /* RW, W1C */ - unsigned long lb_irq_int_5 : 1; /* RW, W1C */ - unsigned long lb_irq_int_6 : 1; /* RW, W1C */ - unsigned long lb_irq_int_7 : 1; /* RW, W1C */ - unsigned long lb_irq_int_8 : 1; /* RW, W1C */ - unsigned long lb_irq_int_9 : 1; /* RW, W1C */ - unsigned long lb_irq_int_10 : 1; /* RW, W1C */ - unsigned long lb_irq_int_11 : 1; /* RW, W1C */ - unsigned long lb_irq_int_12 : 1; /* RW, W1C */ - unsigned long lb_irq_int_13 : 1; /* RW, W1C */ - unsigned long lb_irq_int_14 : 1; /* RW, W1C */ - unsigned long lb_irq_int_15 : 1; /* RW, W1C */ - unsigned long l1_nmi_int : 1; /* RW, W1C */ - unsigned long stop_clock : 1; /* RW, W1C */ - unsigned long asic_to_l1 : 1; /* RW, W1C */ - unsigned long l1_to_asic : 1; /* RW, W1C */ - unsigned long ltc_int : 1; /* RW, W1C */ - unsigned long la_seq_trigger : 1; /* RW, W1C */ - unsigned long ipi_int : 1; /* RW, W1C */ - unsigned long extio_int0 : 1; /* RW, W1C */ - unsigned long extio_int1 : 1; /* RW, W1C */ - unsigned long extio_int2 : 1; /* RW, W1C */ - unsigned long extio_int3 : 1; /* RW, W1C */ - unsigned long profile_int : 1; /* RW, W1C */ - unsigned long rtc0 : 1; /* RW, W1C */ - unsigned long rtc1 : 1; /* RW, W1C */ - unsigned long rtc2 : 1; /* RW, W1C */ - unsigned long rtc3 : 1; /* RW, W1C */ - unsigned long bau_data : 1; /* RW, W1C */ - unsigned long power_management_req : 1; /* RW, W1C */ - unsigned long rsvd_57_63 : 7; /* */ - } s; -}; - -/* ========================================================================= */ -/* UVH_EVENT_OCCURRED0_ALIAS */ -/* ========================================================================= */ -#define UVH_EVENT_OCCURRED0_ALIAS 0x0000000000070008UL -#define UVH_EVENT_OCCURRED0_ALIAS_32 0x005f0 - -/* ========================================================================= */ -/* UVH_GR0_TLB_INT0_CONFIG */ -/* ========================================================================= */ -#define UVH_GR0_TLB_INT0_CONFIG 0x61b00UL - -#define UVH_GR0_TLB_INT0_CONFIG_VECTOR_SHFT 0 -#define UVH_GR0_TLB_INT0_CONFIG_VECTOR_MASK 0x00000000000000ffUL -#define UVH_GR0_TLB_INT0_CONFIG_DM_SHFT 8 -#define UVH_GR0_TLB_INT0_CONFIG_DM_MASK 0x0000000000000700UL -#define UVH_GR0_TLB_INT0_CONFIG_DESTMODE_SHFT 11 -#define UVH_GR0_TLB_INT0_CONFIG_DESTMODE_MASK 0x0000000000000800UL -#define UVH_GR0_TLB_INT0_CONFIG_STATUS_SHFT 12 -#define UVH_GR0_TLB_INT0_CONFIG_STATUS_MASK 0x0000000000001000UL -#define UVH_GR0_TLB_INT0_CONFIG_P_SHFT 13 -#define UVH_GR0_TLB_INT0_CONFIG_P_MASK 0x0000000000002000UL -#define UVH_GR0_TLB_INT0_CONFIG_T_SHFT 15 -#define UVH_GR0_TLB_INT0_CONFIG_T_MASK 0x0000000000008000UL -#define UVH_GR0_TLB_INT0_CONFIG_M_SHFT 16 -#define UVH_GR0_TLB_INT0_CONFIG_M_MASK 0x0000000000010000UL -#define UVH_GR0_TLB_INT0_CONFIG_APIC_ID_SHFT 32 -#define UVH_GR0_TLB_INT0_CONFIG_APIC_ID_MASK 0xffffffff00000000UL - -union uvh_gr0_tlb_int0_config_u { - unsigned long v; - struct uvh_gr0_tlb_int0_config_s { - unsigned long vector_ : 8; /* RW */ - unsigned long dm : 3; /* RW */ - unsigned long destmode : 1; /* RW */ - unsigned long status : 1; /* RO */ - unsigned long p : 1; /* RO */ - unsigned long rsvd_14 : 1; /* */ - unsigned long t : 1; /* RO */ - unsigned long m : 1; /* RW */ - unsigned long rsvd_17_31: 15; /* */ - unsigned long apic_id : 32; /* RW */ - } s; -}; - -/* ========================================================================= */ -/* UVH_GR0_TLB_INT1_CONFIG */ -/* ========================================================================= */ -#define UVH_GR0_TLB_INT1_CONFIG 0x61b40UL - -#define UVH_GR0_TLB_INT1_CONFIG_VECTOR_SHFT 0 -#define UVH_GR0_TLB_INT1_CONFIG_VECTOR_MASK 0x00000000000000ffUL -#define UVH_GR0_TLB_INT1_CONFIG_DM_SHFT 8 -#define UVH_GR0_TLB_INT1_CONFIG_DM_MASK 0x0000000000000700UL -#define UVH_GR0_TLB_INT1_CONFIG_DESTMODE_SHFT 11 -#define UVH_GR0_TLB_INT1_CONFIG_DESTMODE_MASK 0x0000000000000800UL -#define UVH_GR0_TLB_INT1_CONFIG_STATUS_SHFT 12 -#define UVH_GR0_TLB_INT1_CONFIG_STATUS_MASK 0x0000000000001000UL -#define UVH_GR0_TLB_INT1_CONFIG_P_SHFT 13 -#define UVH_GR0_TLB_INT1_CONFIG_P_MASK 0x0000000000002000UL -#define UVH_GR0_TLB_INT1_CONFIG_T_SHFT 15 -#define UVH_GR0_TLB_INT1_CONFIG_T_MASK 0x0000000000008000UL -#define UVH_GR0_TLB_INT1_CONFIG_M_SHFT 16 -#define UVH_GR0_TLB_INT1_CONFIG_M_MASK 0x0000000000010000UL -#define UVH_GR0_TLB_INT1_CONFIG_APIC_ID_SHFT 32 -#define UVH_GR0_TLB_INT1_CONFIG_APIC_ID_MASK 0xffffffff00000000UL - -union uvh_gr0_tlb_int1_config_u { - unsigned long v; - struct uvh_gr0_tlb_int1_config_s { - unsigned long vector_ : 8; /* RW */ - unsigned long dm : 3; /* RW */ - unsigned long destmode : 1; /* RW */ - unsigned long status : 1; /* RO */ - unsigned long p : 1; /* RO */ - unsigned long rsvd_14 : 1; /* */ - unsigned long t : 1; /* RO */ - unsigned long m : 1; /* RW */ - unsigned long rsvd_17_31: 15; /* */ - unsigned long apic_id : 32; /* RW */ - } s; -}; - -/* ========================================================================= */ -/* UVH_GR1_TLB_INT0_CONFIG */ -/* ========================================================================= */ -#define UVH_GR1_TLB_INT0_CONFIG 0x61f00UL - -#define UVH_GR1_TLB_INT0_CONFIG_VECTOR_SHFT 0 -#define UVH_GR1_TLB_INT0_CONFIG_VECTOR_MASK 0x00000000000000ffUL -#define UVH_GR1_TLB_INT0_CONFIG_DM_SHFT 8 -#define UVH_GR1_TLB_INT0_CONFIG_DM_MASK 0x0000000000000700UL -#define UVH_GR1_TLB_INT0_CONFIG_DESTMODE_SHFT 11 -#define UVH_GR1_TLB_INT0_CONFIG_DESTMODE_MASK 0x0000000000000800UL -#define UVH_GR1_TLB_INT0_CONFIG_STATUS_SHFT 12 -#define UVH_GR1_TLB_INT0_CONFIG_STATUS_MASK 0x0000000000001000UL -#define UVH_GR1_TLB_INT0_CONFIG_P_SHFT 13 -#define UVH_GR1_TLB_INT0_CONFIG_P_MASK 0x0000000000002000UL -#define UVH_GR1_TLB_INT0_CONFIG_T_SHFT 15 -#define UVH_GR1_TLB_INT0_CONFIG_T_MASK 0x0000000000008000UL -#define UVH_GR1_TLB_INT0_CONFIG_M_SHFT 16 -#define UVH_GR1_TLB_INT0_CONFIG_M_MASK 0x0000000000010000UL -#define UVH_GR1_TLB_INT0_CONFIG_APIC_ID_SHFT 32 -#define UVH_GR1_TLB_INT0_CONFIG_APIC_ID_MASK 0xffffffff00000000UL - -union uvh_gr1_tlb_int0_config_u { - unsigned long v; - struct uvh_gr1_tlb_int0_config_s { - unsigned long vector_ : 8; /* RW */ - unsigned long dm : 3; /* RW */ - unsigned long destmode : 1; /* RW */ - unsigned long status : 1; /* RO */ - unsigned long p : 1; /* RO */ - unsigned long rsvd_14 : 1; /* */ - unsigned long t : 1; /* RO */ - unsigned long m : 1; /* RW */ - unsigned long rsvd_17_31: 15; /* */ - unsigned long apic_id : 32; /* RW */ - } s; -}; - -/* ========================================================================= */ -/* UVH_GR1_TLB_INT1_CONFIG */ -/* ========================================================================= */ -#define UVH_GR1_TLB_INT1_CONFIG 0x61f40UL - -#define UVH_GR1_TLB_INT1_CONFIG_VECTOR_SHFT 0 -#define UVH_GR1_TLB_INT1_CONFIG_VECTOR_MASK 0x00000000000000ffUL -#define UVH_GR1_TLB_INT1_CONFIG_DM_SHFT 8 -#define UVH_GR1_TLB_INT1_CONFIG_DM_MASK 0x0000000000000700UL -#define UVH_GR1_TLB_INT1_CONFIG_DESTMODE_SHFT 11 -#define UVH_GR1_TLB_INT1_CONFIG_DESTMODE_MASK 0x0000000000000800UL -#define UVH_GR1_TLB_INT1_CONFIG_STATUS_SHFT 12 -#define UVH_GR1_TLB_INT1_CONFIG_STATUS_MASK 0x0000000000001000UL -#define UVH_GR1_TLB_INT1_CONFIG_P_SHFT 13 -#define UVH_GR1_TLB_INT1_CONFIG_P_MASK 0x0000000000002000UL -#define UVH_GR1_TLB_INT1_CONFIG_T_SHFT 15 -#define UVH_GR1_TLB_INT1_CONFIG_T_MASK 0x0000000000008000UL -#define UVH_GR1_TLB_INT1_CONFIG_M_SHFT 16 -#define UVH_GR1_TLB_INT1_CONFIG_M_MASK 0x0000000000010000UL -#define UVH_GR1_TLB_INT1_CONFIG_APIC_ID_SHFT 32 -#define UVH_GR1_TLB_INT1_CONFIG_APIC_ID_MASK 0xffffffff00000000UL - -union uvh_gr1_tlb_int1_config_u { - unsigned long v; - struct uvh_gr1_tlb_int1_config_s { - unsigned long vector_ : 8; /* RW */ - unsigned long dm : 3; /* RW */ - unsigned long destmode : 1; /* RW */ - unsigned long status : 1; /* RO */ - unsigned long p : 1; /* RO */ - unsigned long rsvd_14 : 1; /* */ - unsigned long t : 1; /* RO */ - unsigned long m : 1; /* RW */ - unsigned long rsvd_17_31: 15; /* */ - unsigned long apic_id : 32; /* RW */ - } s; -}; - -/* ========================================================================= */ -/* UVH_INT_CMPB */ -/* ========================================================================= */ -#define UVH_INT_CMPB 0x22080UL - -#define UVH_INT_CMPB_REAL_TIME_CMPB_SHFT 0 -#define UVH_INT_CMPB_REAL_TIME_CMPB_MASK 0x00ffffffffffffffUL - -union uvh_int_cmpb_u { - unsigned long v; - struct uvh_int_cmpb_s { - unsigned long real_time_cmpb : 56; /* RW */ - unsigned long rsvd_56_63 : 8; /* */ - } s; -}; - -/* ========================================================================= */ -/* UVH_INT_CMPC */ -/* ========================================================================= */ -#define UVH_INT_CMPC 0x22100UL - -#define UVH_INT_CMPC_REAL_TIME_CMPC_SHFT 0 -#define UVH_INT_CMPC_REAL_TIME_CMPC_MASK 0x00ffffffffffffffUL - -union uvh_int_cmpc_u { - unsigned long v; - struct uvh_int_cmpc_s { - unsigned long real_time_cmpc : 56; /* RW */ - unsigned long rsvd_56_63 : 8; /* */ - } s; -}; - -/* ========================================================================= */ -/* UVH_INT_CMPD */ -/* ========================================================================= */ -#define UVH_INT_CMPD 0x22180UL - -#define UVH_INT_CMPD_REAL_TIME_CMPD_SHFT 0 -#define UVH_INT_CMPD_REAL_TIME_CMPD_MASK 0x00ffffffffffffffUL - -union uvh_int_cmpd_u { - unsigned long v; - struct uvh_int_cmpd_s { - unsigned long real_time_cmpd : 56; /* RW */ - unsigned long rsvd_56_63 : 8; /* */ - } s; -}; - -/* ========================================================================= */ -/* UVH_NODE_ID */ -/* ========================================================================= */ -#define UVH_NODE_ID 0x0UL - -#define UVH_NODE_ID_FORCE1_SHFT 0 -#define UVH_NODE_ID_FORCE1_MASK 0x0000000000000001UL -#define UVH_NODE_ID_MANUFACTURER_SHFT 1 -#define UVH_NODE_ID_MANUFACTURER_MASK 0x0000000000000ffeUL -#define UVH_NODE_ID_PART_NUMBER_SHFT 12 -#define UVH_NODE_ID_PART_NUMBER_MASK 0x000000000ffff000UL -#define UVH_NODE_ID_REVISION_SHFT 28 -#define UVH_NODE_ID_REVISION_MASK 0x00000000f0000000UL -#define UVH_NODE_ID_NODE_ID_SHFT 32 -#define UVH_NODE_ID_NODE_ID_MASK 0x00007fff00000000UL -#define UVH_NODE_ID_NODES_PER_BIT_SHFT 48 -#define UVH_NODE_ID_NODES_PER_BIT_MASK 0x007f000000000000UL -#define UVH_NODE_ID_NI_PORT_SHFT 56 -#define UVH_NODE_ID_NI_PORT_MASK 0x0f00000000000000UL - -union uvh_node_id_u { - unsigned long v; - struct uvh_node_id_s { - unsigned long force1 : 1; /* RO */ - unsigned long manufacturer : 11; /* RO */ - unsigned long part_number : 16; /* RO */ - unsigned long revision : 4; /* RO */ - unsigned long node_id : 15; /* RW */ - unsigned long rsvd_47 : 1; /* */ - unsigned long nodes_per_bit : 7; /* RW */ - unsigned long rsvd_55 : 1; /* */ - unsigned long ni_port : 4; /* RO */ - unsigned long rsvd_60_63 : 4; /* */ - } s; -}; - -/* ========================================================================= */ -/* UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR */ -/* ========================================================================= */ -#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR 0x16000d0UL - -#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_SHFT 24 -#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_MASK 0x00003fffff000000UL - -union uvh_rh_gam_alias210_redirect_config_0_mmr_u { - unsigned long v; - struct uvh_rh_gam_alias210_redirect_config_0_mmr_s { - unsigned long rsvd_0_23 : 24; /* */ - unsigned long dest_base : 22; /* RW */ - unsigned long rsvd_46_63: 18; /* */ - } s; -}; - -/* ========================================================================= */ -/* UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR */ -/* ========================================================================= */ -#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR 0x16000e0UL - -#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR_DEST_BASE_SHFT 24 -#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR_DEST_BASE_MASK 0x00003fffff000000UL - -union uvh_rh_gam_alias210_redirect_config_1_mmr_u { - unsigned long v; - struct uvh_rh_gam_alias210_redirect_config_1_mmr_s { - unsigned long rsvd_0_23 : 24; /* */ - unsigned long dest_base : 22; /* RW */ - unsigned long rsvd_46_63: 18; /* */ - } s; -}; - -/* ========================================================================= */ -/* UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR */ -/* ========================================================================= */ -#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR 0x16000f0UL - -#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR_DEST_BASE_SHFT 24 -#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR_DEST_BASE_MASK 0x00003fffff000000UL - -union uvh_rh_gam_alias210_redirect_config_2_mmr_u { - unsigned long v; - struct uvh_rh_gam_alias210_redirect_config_2_mmr_s { - unsigned long rsvd_0_23 : 24; /* */ - unsigned long dest_base : 22; /* RW */ - unsigned long rsvd_46_63: 18; /* */ - } s; -}; - -/* ========================================================================= */ -/* UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR */ -/* ========================================================================= */ -#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR 0x1600010UL - -#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT 28 -#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_MASK 0x00003ffff0000000UL -#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_GR4_SHFT 48 -#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_GR4_MASK 0x0001000000000000UL -#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_SHFT 52 -#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_MASK 0x00f0000000000000UL -#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63 -#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL - -union uvh_rh_gam_gru_overlay_config_mmr_u { - unsigned long v; - struct uvh_rh_gam_gru_overlay_config_mmr_s { - unsigned long rsvd_0_27: 28; /* */ - unsigned long base : 18; /* RW */ - unsigned long rsvd_46_47: 2; /* */ - unsigned long gr4 : 1; /* RW */ - unsigned long rsvd_49_51: 3; /* */ - unsigned long n_gru : 4; /* RW */ - unsigned long rsvd_56_62: 7; /* */ - unsigned long enable : 1; /* RW */ - } s; -}; - -/* ========================================================================= */ -/* UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR */ -/* ========================================================================= */ -#define UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR 0x1600028UL - -#define UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_SHFT 26 -#define UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_MASK 0x00003ffffc000000UL -#define UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_DUAL_HUB_SHFT 46 -#define UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_DUAL_HUB_MASK 0x0000400000000000UL -#define UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63 -#define UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL - -union uvh_rh_gam_mmr_overlay_config_mmr_u { - unsigned long v; - struct uvh_rh_gam_mmr_overlay_config_mmr_s { - unsigned long rsvd_0_25: 26; /* */ - unsigned long base : 20; /* RW */ - unsigned long dual_hub : 1; /* RW */ - unsigned long rsvd_47_62: 16; /* */ - unsigned long enable : 1; /* RW */ - } s; -}; - -/* ========================================================================= */ -/* UVH_RTC */ -/* ========================================================================= */ -#define UVH_RTC 0x340000UL - -#define UVH_RTC_REAL_TIME_CLOCK_SHFT 0 -#define UVH_RTC_REAL_TIME_CLOCK_MASK 0x00ffffffffffffffUL - -union uvh_rtc_u { - unsigned long v; - struct uvh_rtc_s { - unsigned long real_time_clock : 56; /* RW */ - unsigned long rsvd_56_63 : 8; /* */ - } s; -}; - -/* ========================================================================= */ -/* UVH_RTC1_INT_CONFIG */ -/* ========================================================================= */ -#define UVH_RTC1_INT_CONFIG 0x615c0UL - -#define UVH_RTC1_INT_CONFIG_VECTOR_SHFT 0 -#define UVH_RTC1_INT_CONFIG_VECTOR_MASK 0x00000000000000ffUL -#define UVH_RTC1_INT_CONFIG_DM_SHFT 8 -#define UVH_RTC1_INT_CONFIG_DM_MASK 0x0000000000000700UL -#define UVH_RTC1_INT_CONFIG_DESTMODE_SHFT 11 -#define UVH_RTC1_INT_CONFIG_DESTMODE_MASK 0x0000000000000800UL -#define UVH_RTC1_INT_CONFIG_STATUS_SHFT 12 -#define UVH_RTC1_INT_CONFIG_STATUS_MASK 0x0000000000001000UL -#define UVH_RTC1_INT_CONFIG_P_SHFT 13 -#define UVH_RTC1_INT_CONFIG_P_MASK 0x0000000000002000UL -#define UVH_RTC1_INT_CONFIG_T_SHFT 15 -#define UVH_RTC1_INT_CONFIG_T_MASK 0x0000000000008000UL -#define UVH_RTC1_INT_CONFIG_M_SHFT 16 -#define UVH_RTC1_INT_CONFIG_M_MASK 0x0000000000010000UL -#define UVH_RTC1_INT_CONFIG_APIC_ID_SHFT 32 -#define UVH_RTC1_INT_CONFIG_APIC_ID_MASK 0xffffffff00000000UL - -union uvh_rtc1_int_config_u { - unsigned long v; - struct uvh_rtc1_int_config_s { - unsigned long vector_ : 8; /* RW */ - unsigned long dm : 3; /* RW */ - unsigned long destmode : 1; /* RW */ - unsigned long status : 1; /* RO */ - unsigned long p : 1; /* RO */ - unsigned long rsvd_14 : 1; /* */ - unsigned long t : 1; /* RO */ - unsigned long m : 1; /* RW */ - unsigned long rsvd_17_31: 15; /* */ - unsigned long apic_id : 32; /* RW */ - } s; -}; - -/* ========================================================================= */ -/* UVH_RTC2_INT_CONFIG */ -/* ========================================================================= */ -#define UVH_RTC2_INT_CONFIG 0x61600UL - -#define UVH_RTC2_INT_CONFIG_VECTOR_SHFT 0 -#define UVH_RTC2_INT_CONFIG_VECTOR_MASK 0x00000000000000ffUL -#define UVH_RTC2_INT_CONFIG_DM_SHFT 8 -#define UVH_RTC2_INT_CONFIG_DM_MASK 0x0000000000000700UL -#define UVH_RTC2_INT_CONFIG_DESTMODE_SHFT 11 -#define UVH_RTC2_INT_CONFIG_DESTMODE_MASK 0x0000000000000800UL -#define UVH_RTC2_INT_CONFIG_STATUS_SHFT 12 -#define UVH_RTC2_INT_CONFIG_STATUS_MASK 0x0000000000001000UL -#define UVH_RTC2_INT_CONFIG_P_SHFT 13 -#define UVH_RTC2_INT_CONFIG_P_MASK 0x0000000000002000UL -#define UVH_RTC2_INT_CONFIG_T_SHFT 15 -#define UVH_RTC2_INT_CONFIG_T_MASK 0x0000000000008000UL -#define UVH_RTC2_INT_CONFIG_M_SHFT 16 -#define UVH_RTC2_INT_CONFIG_M_MASK 0x0000000000010000UL -#define UVH_RTC2_INT_CONFIG_APIC_ID_SHFT 32 -#define UVH_RTC2_INT_CONFIG_APIC_ID_MASK 0xffffffff00000000UL - -union uvh_rtc2_int_config_u { - unsigned long v; - struct uvh_rtc2_int_config_s { - unsigned long vector_ : 8; /* RW */ - unsigned long dm : 3; /* RW */ - unsigned long destmode : 1; /* RW */ - unsigned long status : 1; /* RO */ - unsigned long p : 1; /* RO */ - unsigned long rsvd_14 : 1; /* */ - unsigned long t : 1; /* RO */ - unsigned long m : 1; /* RW */ - unsigned long rsvd_17_31: 15; /* */ - unsigned long apic_id : 32; /* RW */ - } s; -}; - -/* ========================================================================= */ -/* UVH_RTC3_INT_CONFIG */ -/* ========================================================================= */ -#define UVH_RTC3_INT_CONFIG 0x61640UL - -#define UVH_RTC3_INT_CONFIG_VECTOR_SHFT 0 -#define UVH_RTC3_INT_CONFIG_VECTOR_MASK 0x00000000000000ffUL -#define UVH_RTC3_INT_CONFIG_DM_SHFT 8 -#define UVH_RTC3_INT_CONFIG_DM_MASK 0x0000000000000700UL -#define UVH_RTC3_INT_CONFIG_DESTMODE_SHFT 11 -#define UVH_RTC3_INT_CONFIG_DESTMODE_MASK 0x0000000000000800UL -#define UVH_RTC3_INT_CONFIG_STATUS_SHFT 12 -#define UVH_RTC3_INT_CONFIG_STATUS_MASK 0x0000000000001000UL -#define UVH_RTC3_INT_CONFIG_P_SHFT 13 -#define UVH_RTC3_INT_CONFIG_P_MASK 0x0000000000002000UL -#define UVH_RTC3_INT_CONFIG_T_SHFT 15 -#define UVH_RTC3_INT_CONFIG_T_MASK 0x0000000000008000UL -#define UVH_RTC3_INT_CONFIG_M_SHFT 16 -#define UVH_RTC3_INT_CONFIG_M_MASK 0x0000000000010000UL -#define UVH_RTC3_INT_CONFIG_APIC_ID_SHFT 32 -#define UVH_RTC3_INT_CONFIG_APIC_ID_MASK 0xffffffff00000000UL - -union uvh_rtc3_int_config_u { - unsigned long v; - struct uvh_rtc3_int_config_s { - unsigned long vector_ : 8; /* RW */ - unsigned long dm : 3; /* RW */ - unsigned long destmode : 1; /* RW */ - unsigned long status : 1; /* RO */ - unsigned long p : 1; /* RO */ - unsigned long rsvd_14 : 1; /* */ - unsigned long t : 1; /* RO */ - unsigned long m : 1; /* RW */ - unsigned long rsvd_17_31: 15; /* */ - unsigned long apic_id : 32; /* RW */ - } s; -}; - -/* ========================================================================= */ -/* UVH_RTC_INC_RATIO */ -/* ========================================================================= */ -#define UVH_RTC_INC_RATIO 0x350000UL - -#define UVH_RTC_INC_RATIO_FRACTION_SHFT 0 -#define UVH_RTC_INC_RATIO_FRACTION_MASK 0x00000000000fffffUL -#define UVH_RTC_INC_RATIO_RATIO_SHFT 20 -#define UVH_RTC_INC_RATIO_RATIO_MASK 0x0000000000700000UL - -union uvh_rtc_inc_ratio_u { - unsigned long v; - struct uvh_rtc_inc_ratio_s { - unsigned long fraction : 20; /* RW */ - unsigned long ratio : 3; /* RW */ - unsigned long rsvd_23_63: 41; /* */ - } s; -}; - -/* ========================================================================= */ -/* UVH_SI_ADDR_MAP_CONFIG */ -/* ========================================================================= */ -#define UVH_SI_ADDR_MAP_CONFIG 0xc80000UL - -#define UVH_SI_ADDR_MAP_CONFIG_M_SKT_SHFT 0 -#define UVH_SI_ADDR_MAP_CONFIG_M_SKT_MASK 0x000000000000003fUL -#define UVH_SI_ADDR_MAP_CONFIG_N_SKT_SHFT 8 -#define UVH_SI_ADDR_MAP_CONFIG_N_SKT_MASK 0x0000000000000f00UL - -union uvh_si_addr_map_config_u { - unsigned long v; - struct uvh_si_addr_map_config_s { - unsigned long m_skt : 6; /* RW */ - unsigned long rsvd_6_7: 2; /* */ - unsigned long n_skt : 4; /* RW */ - unsigned long rsvd_12_63: 52; /* */ - } s; -}; - -/* ========================================================================= */ -/* UVH_SI_ALIAS0_OVERLAY_CONFIG */ -/* ========================================================================= */ -#define UVH_SI_ALIAS0_OVERLAY_CONFIG 0xc80008UL - -#define UVH_SI_ALIAS0_OVERLAY_CONFIG_BASE_SHFT 24 -#define UVH_SI_ALIAS0_OVERLAY_CONFIG_BASE_MASK 0x00000000ff000000UL -#define UVH_SI_ALIAS0_OVERLAY_CONFIG_M_ALIAS_SHFT 48 -#define UVH_SI_ALIAS0_OVERLAY_CONFIG_M_ALIAS_MASK 0x001f000000000000UL -#define UVH_SI_ALIAS0_OVERLAY_CONFIG_ENABLE_SHFT 63 -#define UVH_SI_ALIAS0_OVERLAY_CONFIG_ENABLE_MASK 0x8000000000000000UL - -union uvh_si_alias0_overlay_config_u { - unsigned long v; - struct uvh_si_alias0_overlay_config_s { - unsigned long rsvd_0_23: 24; /* */ - unsigned long base : 8; /* RW */ - unsigned long rsvd_32_47: 16; /* */ - unsigned long m_alias : 5; /* RW */ - unsigned long rsvd_53_62: 10; /* */ - unsigned long enable : 1; /* RW */ - } s; -}; - -/* ========================================================================= */ -/* UVH_SI_ALIAS1_OVERLAY_CONFIG */ -/* ========================================================================= */ -#define UVH_SI_ALIAS1_OVERLAY_CONFIG 0xc80010UL - -#define UVH_SI_ALIAS1_OVERLAY_CONFIG_BASE_SHFT 24 -#define UVH_SI_ALIAS1_OVERLAY_CONFIG_BASE_MASK 0x00000000ff000000UL -#define UVH_SI_ALIAS1_OVERLAY_CONFIG_M_ALIAS_SHFT 48 -#define UVH_SI_ALIAS1_OVERLAY_CONFIG_M_ALIAS_MASK 0x001f000000000000UL -#define UVH_SI_ALIAS1_OVERLAY_CONFIG_ENABLE_SHFT 63 -#define UVH_SI_ALIAS1_OVERLAY_CONFIG_ENABLE_MASK 0x8000000000000000UL - -union uvh_si_alias1_overlay_config_u { - unsigned long v; - struct uvh_si_alias1_overlay_config_s { - unsigned long rsvd_0_23: 24; /* */ - unsigned long base : 8; /* RW */ - unsigned long rsvd_32_47: 16; /* */ - unsigned long m_alias : 5; /* RW */ - unsigned long rsvd_53_62: 10; /* */ - unsigned long enable : 1; /* RW */ - } s; -}; - -/* ========================================================================= */ -/* UVH_SI_ALIAS2_OVERLAY_CONFIG */ -/* ========================================================================= */ -#define UVH_SI_ALIAS2_OVERLAY_CONFIG 0xc80018UL - -#define UVH_SI_ALIAS2_OVERLAY_CONFIG_BASE_SHFT 24 -#define UVH_SI_ALIAS2_OVERLAY_CONFIG_BASE_MASK 0x00000000ff000000UL -#define UVH_SI_ALIAS2_OVERLAY_CONFIG_M_ALIAS_SHFT 48 -#define UVH_SI_ALIAS2_OVERLAY_CONFIG_M_ALIAS_MASK 0x001f000000000000UL -#define UVH_SI_ALIAS2_OVERLAY_CONFIG_ENABLE_SHFT 63 -#define UVH_SI_ALIAS2_OVERLAY_CONFIG_ENABLE_MASK 0x8000000000000000UL - -union uvh_si_alias2_overlay_config_u { - unsigned long v; - struct uvh_si_alias2_overlay_config_s { - unsigned long rsvd_0_23: 24; /* */ - unsigned long base : 8; /* RW */ - unsigned long rsvd_32_47: 16; /* */ - unsigned long m_alias : 5; /* RW */ - unsigned long rsvd_53_62: 10; /* */ - unsigned long enable : 1; /* RW */ - } s; -}; - - -#endif /* _ASM_IA64_UV_UV_MMRS_H */ diff --git a/arch/ia64/include/asm/vermagic.h b/arch/ia64/include/asm/vermagic.h deleted file mode 100644 index 29c7424f4c25..000000000000 --- a/arch/ia64/include/asm/vermagic.h +++ /dev/null @@ -1,15 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Copyright (C) 2003 Hewlett-Packard Co - * David Mosberger-Tang - */ - -#ifndef _ASM_VERMAGIC_H -#define _ASM_VERMAGIC_H - -#include - -#define MODULE_ARCH_VERMAGIC "ia64" \ - "gcc-" __stringify(__GNUC__) "." __stringify(__GNUC_MINOR__) - -#endif /* _ASM_VERMAGIC_H */ diff --git a/arch/ia64/include/asm/vga.h b/arch/ia64/include/asm/vga.h deleted file mode 100644 index 64ce0b971a0a..000000000000 --- a/arch/ia64/include/asm/vga.h +++ /dev/null @@ -1,26 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Access to VGA videoram - * - * (c) 1998 Martin Mares - * (c) 1999 Asit Mallick - * (c) 1999 Don Dugger - */ - -#ifndef __ASM_IA64_VGA_H_ -#define __ASM_IA64_VGA_H_ - -/* - * On the PC, we can just recalculate addresses and then access the - * videoram directly without any black magic. - */ - -extern unsigned long vga_console_iobase; -extern unsigned long vga_console_membase; - -#define VGA_MAP_MEM(x,s) ((unsigned long) ioremap(vga_console_membase + (x), s)) - -#define vga_readb(x) (*(x)) -#define vga_writeb(x,y) (*(y) = (x)) - -#endif /* __ASM_IA64_VGA_H_ */ diff --git a/arch/ia64/include/asm/vmalloc.h b/arch/ia64/include/asm/vmalloc.h deleted file mode 100644 index a2b51141ad28..000000000000 --- a/arch/ia64/include/asm/vmalloc.h +++ /dev/null @@ -1,4 +0,0 @@ -#ifndef _ASM_IA64_VMALLOC_H -#define _ASM_IA64_VMALLOC_H - -#endif /* _ASM_IA64_VMALLOC_H */ diff --git a/arch/ia64/include/asm/xor.h b/arch/ia64/include/asm/xor.h deleted file mode 100644 index 6785f70d3208..000000000000 --- a/arch/ia64/include/asm/xor.h +++ /dev/null @@ -1,30 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Optimized RAID-5 checksumming functions for IA-64. - */ - - -extern void xor_ia64_2(unsigned long bytes, unsigned long * __restrict p1, - const unsigned long * __restrict p2); -extern void xor_ia64_3(unsigned long bytes, unsigned long * __restrict p1, - const unsigned long * __restrict p2, - const unsigned long * __restrict p3); -extern void xor_ia64_4(unsigned long bytes, unsigned long * __restrict p1, - const unsigned long * __restrict p2, - const unsigned long * __restrict p3, - const unsigned long * __restrict p4); -extern void xor_ia64_5(unsigned long bytes, unsigned long * __restrict p1, - const unsigned long * __restrict p2, - const unsigned long * __restrict p3, - const unsigned long * __restrict p4, - const unsigned long * __restrict p5); - -static struct xor_block_template xor_block_ia64 = { - .name = "ia64", - .do_2 = xor_ia64_2, - .do_3 = xor_ia64_3, - .do_4 = xor_ia64_4, - .do_5 = xor_ia64_5, -}; - -#define XOR_TRY_TEMPLATES xor_speed(&xor_block_ia64) diff --git a/arch/ia64/include/asm/xtp.h b/arch/ia64/include/asm/xtp.h deleted file mode 100644 index 5bf1d70ad860..000000000000 --- a/arch/ia64/include/asm/xtp.h +++ /dev/null @@ -1,46 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_IA64_XTP_H -#define _ASM_IA64_XTP_H - -#include - -#ifdef CONFIG_SMP - -#define XTP_OFFSET 0x1e0008 - -#define SMP_IRQ_REDIRECTION (1 << 0) -#define SMP_IPI_REDIRECTION (1 << 1) - -extern unsigned char smp_int_redirect; - -/* - * XTP control functions: - * min_xtp : route all interrupts to this CPU - * normal_xtp: nominal XTP value - * max_xtp : never deliver interrupts to this CPU. - */ - -static inline void -min_xtp (void) -{ - if (smp_int_redirect & SMP_IRQ_REDIRECTION) - writeb(0x00, ipi_base_addr + XTP_OFFSET); /* XTP to min */ -} - -static inline void -normal_xtp (void) -{ - if (smp_int_redirect & SMP_IRQ_REDIRECTION) - writeb(0x08, ipi_base_addr + XTP_OFFSET); /* XTP normal */ -} - -static inline void -max_xtp (void) -{ - if (smp_int_redirect & SMP_IRQ_REDIRECTION) - writeb(0x0f, ipi_base_addr + XTP_OFFSET); /* Set XTP to max */ -} - -#endif /* CONFIG_SMP */ - -#endif /* _ASM_IA64_XTP_Hy */ diff --git a/arch/ia64/include/uapi/asm/Kbuild b/arch/ia64/include/uapi/asm/Kbuild deleted file mode 100644 index 3a1341e3535a..000000000000 --- a/arch/ia64/include/uapi/asm/Kbuild +++ /dev/null @@ -1,2 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0 -generated-y += unistd_64.h diff --git a/arch/ia64/include/uapi/asm/auxvec.h b/arch/ia64/include/uapi/asm/auxvec.h deleted file mode 100644 index 09969a5d2e0a..000000000000 --- a/arch/ia64/include/uapi/asm/auxvec.h +++ /dev/null @@ -1,14 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -#ifndef _ASM_IA64_AUXVEC_H -#define _ASM_IA64_AUXVEC_H - -/* - * Architecture-neutral AT_ values are in the range 0-17. Leave some room for more of - * them, start the architecture-specific ones at 32. - */ -#define AT_SYSINFO 32 -#define AT_SYSINFO_EHDR 33 - -#define AT_VECTOR_SIZE_ARCH 2 /* entries in ARCH_DLINFO */ - -#endif /* _ASM_IA64_AUXVEC_H */ diff --git a/arch/ia64/include/uapi/asm/bitsperlong.h b/arch/ia64/include/uapi/asm/bitsperlong.h deleted file mode 100644 index 1146d55563db..000000000000 --- a/arch/ia64/include/uapi/asm/bitsperlong.h +++ /dev/null @@ -1,9 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -#ifndef __ASM_IA64_BITSPERLONG_H -#define __ASM_IA64_BITSPERLONG_H - -#define __BITS_PER_LONG 64 - -#include - -#endif /* __ASM_IA64_BITSPERLONG_H */ diff --git a/arch/ia64/include/uapi/asm/break.h b/arch/ia64/include/uapi/asm/break.h deleted file mode 100644 index 4ca110f0a94b..000000000000 --- a/arch/ia64/include/uapi/asm/break.h +++ /dev/null @@ -1,23 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -#ifndef _ASM_IA64_BREAK_H -#define _ASM_IA64_BREAK_H - -/* - * IA-64 Linux break numbers. - * - * Copyright (C) 1999 Hewlett-Packard Co - * Copyright (C) 1999 David Mosberger-Tang - */ - -/* - * OS-specific debug break numbers: - */ -#define __IA64_BREAK_KDB 0x80100 -#define __IA64_BREAK_KPROBE 0x81000 /* .. 0x81fff */ - -/* - * OS-specific break numbers: - */ -#define __IA64_BREAK_SYSCALL 0x100000 - -#endif /* _ASM_IA64_BREAK_H */ diff --git a/arch/ia64/include/uapi/asm/byteorder.h b/arch/ia64/include/uapi/asm/byteorder.h deleted file mode 100644 index f85d0faaaf34..000000000000 --- a/arch/ia64/include/uapi/asm/byteorder.h +++ /dev/null @@ -1,7 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -#ifndef _ASM_IA64_BYTEORDER_H -#define _ASM_IA64_BYTEORDER_H - -#include - -#endif /* _ASM_IA64_BYTEORDER_H */ diff --git a/arch/ia64/include/uapi/asm/cmpxchg.h b/arch/ia64/include/uapi/asm/cmpxchg.h deleted file mode 100644 index a59b5de6eec6..000000000000 --- a/arch/ia64/include/uapi/asm/cmpxchg.h +++ /dev/null @@ -1,138 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -#ifndef _UAPI_ASM_IA64_CMPXCHG_H -#define _UAPI_ASM_IA64_CMPXCHG_H - -/* - * Compare/Exchange, forked from asm/intrinsics.h - * which was: - * - * Copyright (C) 2002-2003 Hewlett-Packard Co - * David Mosberger-Tang - */ - -#ifndef __ASSEMBLY__ - -#include -/* include compiler specific intrinsics */ -#include -#include - -/* - * This function doesn't exist, so you'll get a linker error if - * something tries to do an invalid xchg(). - */ -extern void ia64_xchg_called_with_bad_pointer(void); - -#define __arch_xchg(x, ptr, size) \ -({ \ - unsigned long __xchg_result; \ - \ - switch (size) { \ - case 1: \ - __xchg_result = ia64_xchg1((__u8 __force *)ptr, x); \ - break; \ - \ - case 2: \ - __xchg_result = ia64_xchg2((__u16 __force *)ptr, x); \ - break; \ - \ - case 4: \ - __xchg_result = ia64_xchg4((__u32 __force *)ptr, x); \ - break; \ - \ - case 8: \ - __xchg_result = ia64_xchg8((__u64 __force *)ptr, x); \ - break; \ - default: \ - ia64_xchg_called_with_bad_pointer(); \ - } \ - (__typeof__ (*(ptr)) __force) __xchg_result; \ -}) - -#ifndef __KERNEL__ -#define xchg(ptr, x) \ -({(__typeof__(*(ptr))) __arch_xchg((unsigned long) (x), (ptr), sizeof(*(ptr)));}) -#endif - -/* - * Atomic compare and exchange. Compare OLD with MEM, if identical, - * store NEW in MEM. Return the initial value in MEM. Success is - * indicated by comparing RETURN with OLD. - */ - -/* - * This function doesn't exist, so you'll get a linker error - * if something tries to do an invalid cmpxchg(). - */ -extern long ia64_cmpxchg_called_with_bad_pointer(void); - -#define ia64_cmpxchg(sem, ptr, old, new, size) \ -({ \ - __u64 _o_, _r_; \ - \ - switch (size) { \ - case 1: \ - _o_ = (__u8) (long __force) (old); \ - break; \ - case 2: \ - _o_ = (__u16) (long __force) (old); \ - break; \ - case 4: \ - _o_ = (__u32) (long __force) (old); \ - break; \ - case 8: \ - _o_ = (__u64) (long __force) (old); \ - break; \ - default: \ - break; \ - } \ - switch (size) { \ - case 1: \ - _r_ = ia64_cmpxchg1_##sem((__u8 __force *) ptr, new, _o_); \ - break; \ - \ - case 2: \ - _r_ = ia64_cmpxchg2_##sem((__u16 __force *) ptr, new, _o_); \ - break; \ - \ - case 4: \ - _r_ = ia64_cmpxchg4_##sem((__u32 __force *) ptr, new, _o_); \ - break; \ - \ - case 8: \ - _r_ = ia64_cmpxchg8_##sem((__u64 __force *) ptr, new, _o_); \ - break; \ - \ - default: \ - _r_ = ia64_cmpxchg_called_with_bad_pointer(); \ - break; \ - } \ - (__typeof__(old) __force) _r_; \ -}) - -#define cmpxchg_acq(ptr, o, n) \ - ia64_cmpxchg(acq, (ptr), (o), (n), sizeof(*(ptr))) -#define cmpxchg_rel(ptr, o, n) \ - ia64_cmpxchg(rel, (ptr), (o), (n), sizeof(*(ptr))) - -/* - * Worse still - early processor implementations actually just ignored - * the acquire/release and did a full fence all the time. Unfortunately - * this meant a lot of badly written code that used .acq when they really - * wanted .rel became legacy out in the wild - so when we made a cpu - * that strictly did the .acq or .rel ... all that code started breaking - so - * we had to back-pedal and keep the "legacy" behavior of a full fence :-( - */ - -#ifndef __KERNEL__ -/* for compatibility with other platforms: */ -#define cmpxchg(ptr, o, n) cmpxchg_acq((ptr), (o), (n)) -#define cmpxchg64(ptr, o, n) cmpxchg_acq((ptr), (o), (n)) - -#define cmpxchg_local cmpxchg -#define cmpxchg64_local cmpxchg64 -#endif - -#endif /* !__ASSEMBLY__ */ - -#endif /* _UAPI_ASM_IA64_CMPXCHG_H */ diff --git a/arch/ia64/include/uapi/asm/fcntl.h b/arch/ia64/include/uapi/asm/fcntl.h deleted file mode 100644 index 7b95523efe5a..000000000000 --- a/arch/ia64/include/uapi/asm/fcntl.h +++ /dev/null @@ -1,15 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -#ifndef _ASM_IA64_FCNTL_H -#define _ASM_IA64_FCNTL_H -/* - * Modified 1998-2000 - * David Mosberger-Tang , Hewlett-Packard Co. - */ - -#define force_o_largefile() \ - (personality(current->personality) != PER_LINUX32) - -#include -#include - -#endif /* _ASM_IA64_FCNTL_H */ diff --git a/arch/ia64/include/uapi/asm/fpu.h b/arch/ia64/include/uapi/asm/fpu.h deleted file mode 100644 index 0df392982ce8..000000000000 --- a/arch/ia64/include/uapi/asm/fpu.h +++ /dev/null @@ -1,67 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -#ifndef _ASM_IA64_FPU_H -#define _ASM_IA64_FPU_H - -/* - * Copyright (C) 1998, 1999, 2002, 2003 Hewlett-Packard Co - * David Mosberger-Tang - */ - -#include - -/* floating point status register: */ -#define FPSR_TRAP_VD (1 << 0) /* invalid op trap disabled */ -#define FPSR_TRAP_DD (1 << 1) /* denormal trap disabled */ -#define FPSR_TRAP_ZD (1 << 2) /* zero-divide trap disabled */ -#define FPSR_TRAP_OD (1 << 3) /* overflow trap disabled */ -#define FPSR_TRAP_UD (1 << 4) /* underflow trap disabled */ -#define FPSR_TRAP_ID (1 << 5) /* inexact trap disabled */ -#define FPSR_S0(x) ((x) << 6) -#define FPSR_S1(x) ((x) << 19) -#define FPSR_S2(x) (__IA64_UL(x) << 32) -#define FPSR_S3(x) (__IA64_UL(x) << 45) - -/* floating-point status field controls: */ -#define FPSF_FTZ (1 << 0) /* flush-to-zero */ -#define FPSF_WRE (1 << 1) /* widest-range exponent */ -#define FPSF_PC(x) (((x) & 0x3) << 2) /* precision control */ -#define FPSF_RC(x) (((x) & 0x3) << 4) /* rounding control */ -#define FPSF_TD (1 << 6) /* trap disabled */ - -/* floating-point status field flags: */ -#define FPSF_V (1 << 7) /* invalid operation flag */ -#define FPSF_D (1 << 8) /* denormal/unnormal operand flag */ -#define FPSF_Z (1 << 9) /* zero divide (IEEE) flag */ -#define FPSF_O (1 << 10) /* overflow (IEEE) flag */ -#define FPSF_U (1 << 11) /* underflow (IEEE) flag */ -#define FPSF_I (1 << 12) /* inexact (IEEE) flag) */ - -/* floating-point rounding control: */ -#define FPRC_NEAREST 0x0 -#define FPRC_NEGINF 0x1 -#define FPRC_POSINF 0x2 -#define FPRC_TRUNC 0x3 - -#define FPSF_DEFAULT (FPSF_PC (0x3) | FPSF_RC (FPRC_NEAREST)) - -/* This default value is the same as HP-UX uses. Don't change it - without a very good reason. */ -#define FPSR_DEFAULT (FPSR_TRAP_VD | FPSR_TRAP_DD | FPSR_TRAP_ZD \ - | FPSR_TRAP_OD | FPSR_TRAP_UD | FPSR_TRAP_ID \ - | FPSR_S0 (FPSF_DEFAULT) \ - | FPSR_S1 (FPSF_DEFAULT | FPSF_TD | FPSF_WRE) \ - | FPSR_S2 (FPSF_DEFAULT | FPSF_TD) \ - | FPSR_S3 (FPSF_DEFAULT | FPSF_TD)) - -# ifndef __ASSEMBLY__ - -struct ia64_fpreg { - union { - unsigned long bits[2]; - long double __dummy; /* force 16-byte alignment */ - } u; -}; - -# endif /* __ASSEMBLY__ */ - -#endif /* _ASM_IA64_FPU_H */ diff --git a/arch/ia64/include/uapi/asm/gcc_intrin.h b/arch/ia64/include/uapi/asm/gcc_intrin.h deleted file mode 100644 index ecfa3eadb217..000000000000 --- a/arch/ia64/include/uapi/asm/gcc_intrin.h +++ /dev/null @@ -1,619 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -/* - * - * Copyright (C) 2002,2003 Jun Nakajima - * Copyright (C) 2002,2003 Suresh Siddha - */ -#ifndef _UAPI_ASM_IA64_GCC_INTRIN_H -#define _UAPI_ASM_IA64_GCC_INTRIN_H - -#include -#include - -/* define this macro to get some asm stmts included in 'c' files */ -#define ASM_SUPPORTED - -/* Optimization barrier */ -/* The "volatile" is due to gcc bugs */ -#define ia64_barrier() asm volatile ("":::"memory") - -#define ia64_stop() asm volatile (";;"::) - -#define ia64_invala_gr(regnum) asm volatile ("invala.e r%0" :: "i"(regnum)) - -#define ia64_invala_fr(regnum) asm volatile ("invala.e f%0" :: "i"(regnum)) - -#define ia64_flushrs() asm volatile ("flushrs;;":::"memory") - -#define ia64_loadrs() asm volatile ("loadrs;;":::"memory") - -extern void ia64_bad_param_for_setreg (void); -extern void ia64_bad_param_for_getreg (void); - - -#define ia64_setreg(regnum, val) \ -({ \ - switch (regnum) { \ - case _IA64_REG_PSR_L: \ - asm volatile ("mov psr.l=%0" :: "r"(val) : "memory"); \ - break; \ - case _IA64_REG_AR_KR0 ... _IA64_REG_AR_EC: \ - asm volatile ("mov ar%0=%1" :: \ - "i" (regnum - _IA64_REG_AR_KR0), \ - "r"(val): "memory"); \ - break; \ - case _IA64_REG_CR_DCR ... _IA64_REG_CR_LRR1: \ - asm volatile ("mov cr%0=%1" :: \ - "i" (regnum - _IA64_REG_CR_DCR), \ - "r"(val): "memory" ); \ - break; \ - case _IA64_REG_SP: \ - asm volatile ("mov r12=%0" :: \ - "r"(val): "memory"); \ - break; \ - case _IA64_REG_GP: \ - asm volatile ("mov gp=%0" :: "r"(val) : "memory"); \ - break; \ - default: \ - ia64_bad_param_for_setreg(); \ - break; \ - } \ -}) - -#define ia64_getreg(regnum) \ -({ \ - __u64 ia64_intri_res; \ - \ - switch (regnum) { \ - case _IA64_REG_GP: \ - asm volatile ("mov %0=gp" : "=r"(ia64_intri_res)); \ - break; \ - case _IA64_REG_IP: \ - asm volatile ("mov %0=ip" : "=r"(ia64_intri_res)); \ - break; \ - case _IA64_REG_PSR: \ - asm volatile ("mov %0=psr" : "=r"(ia64_intri_res)); \ - break; \ - case _IA64_REG_TP: /* for current() */ \ - ia64_intri_res = ia64_r13; \ - break; \ - case _IA64_REG_AR_KR0 ... _IA64_REG_AR_EC: \ - asm volatile ("mov %0=ar%1" : "=r" (ia64_intri_res) \ - : "i"(regnum - _IA64_REG_AR_KR0)); \ - break; \ - case _IA64_REG_CR_DCR ... _IA64_REG_CR_LRR1: \ - asm volatile ("mov %0=cr%1" : "=r" (ia64_intri_res) \ - : "i" (regnum - _IA64_REG_CR_DCR)); \ - break; \ - case _IA64_REG_SP: \ - asm volatile ("mov %0=sp" : "=r" (ia64_intri_res)); \ - break; \ - default: \ - ia64_bad_param_for_getreg(); \ - break; \ - } \ - ia64_intri_res; \ -}) - -#define ia64_hint_pause 0 - -#define ia64_hint(mode) \ -({ \ - switch (mode) { \ - case ia64_hint_pause: \ - asm volatile ("hint @pause" ::: "memory"); \ - break; \ - } \ -}) - - -/* Integer values for mux1 instruction */ -#define ia64_mux1_brcst 0 -#define ia64_mux1_mix 8 -#define ia64_mux1_shuf 9 -#define ia64_mux1_alt 10 -#define ia64_mux1_rev 11 - -#define ia64_mux1(x, mode) \ -({ \ - __u64 ia64_intri_res; \ - \ - switch (mode) { \ - case ia64_mux1_brcst: \ - asm ("mux1 %0=%1,@brcst" : "=r" (ia64_intri_res) : "r" (x)); \ - break; \ - case ia64_mux1_mix: \ - asm ("mux1 %0=%1,@mix" : "=r" (ia64_intri_res) : "r" (x)); \ - break; \ - case ia64_mux1_shuf: \ - asm ("mux1 %0=%1,@shuf" : "=r" (ia64_intri_res) : "r" (x)); \ - break; \ - case ia64_mux1_alt: \ - asm ("mux1 %0=%1,@alt" : "=r" (ia64_intri_res) : "r" (x)); \ - break; \ - case ia64_mux1_rev: \ - asm ("mux1 %0=%1,@rev" : "=r" (ia64_intri_res) : "r" (x)); \ - break; \ - } \ - ia64_intri_res; \ -}) - -#if __GNUC__ >= 4 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) -# define ia64_popcnt(x) __builtin_popcountl(x) -#else -# define ia64_popcnt(x) \ - ({ \ - __u64 ia64_intri_res; \ - asm ("popcnt %0=%1" : "=r" (ia64_intri_res) : "r" (x)); \ - \ - ia64_intri_res; \ - }) -#endif - -#define ia64_getf_exp(x) \ -({ \ - long ia64_intri_res; \ - \ - asm ("getf.exp %0=%1" : "=r"(ia64_intri_res) : "f"(x)); \ - \ - ia64_intri_res; \ -}) - -#define ia64_shrp(a, b, count) \ -({ \ - __u64 ia64_intri_res; \ - asm ("shrp %0=%1,%2,%3" : "=r"(ia64_intri_res) : "r"(a), "r"(b), "i"(count)); \ - ia64_intri_res; \ -}) - -#define ia64_ldfs(regnum, x) \ -({ \ - register double __f__ asm ("f"#regnum); \ - asm volatile ("ldfs %0=[%1]" :"=f"(__f__): "r"(x)); \ -}) - -#define ia64_ldfd(regnum, x) \ -({ \ - register double __f__ asm ("f"#regnum); \ - asm volatile ("ldfd %0=[%1]" :"=f"(__f__): "r"(x)); \ -}) - -#define ia64_ldfe(regnum, x) \ -({ \ - register double __f__ asm ("f"#regnum); \ - asm volatile ("ldfe %0=[%1]" :"=f"(__f__): "r"(x)); \ -}) - -#define ia64_ldf8(regnum, x) \ -({ \ - register double __f__ asm ("f"#regnum); \ - asm volatile ("ldf8 %0=[%1]" :"=f"(__f__): "r"(x)); \ -}) - -#define ia64_ldf_fill(regnum, x) \ -({ \ - register double __f__ asm ("f"#regnum); \ - asm volatile ("ldf.fill %0=[%1]" :"=f"(__f__): "r"(x)); \ -}) - -#define ia64_st4_rel_nta(m, val) \ -({ \ - asm volatile ("st4.rel.nta [%0] = %1\n\t" :: "r"(m), "r"(val)); \ -}) - -#define ia64_stfs(x, regnum) \ -({ \ - register double __f__ asm ("f"#regnum); \ - asm volatile ("stfs [%0]=%1" :: "r"(x), "f"(__f__) : "memory"); \ -}) - -#define ia64_stfd(x, regnum) \ -({ \ - register double __f__ asm ("f"#regnum); \ - asm volatile ("stfd [%0]=%1" :: "r"(x), "f"(__f__) : "memory"); \ -}) - -#define ia64_stfe(x, regnum) \ -({ \ - register double __f__ asm ("f"#regnum); \ - asm volatile ("stfe [%0]=%1" :: "r"(x), "f"(__f__) : "memory"); \ -}) - -#define ia64_stf8(x, regnum) \ -({ \ - register double __f__ asm ("f"#regnum); \ - asm volatile ("stf8 [%0]=%1" :: "r"(x), "f"(__f__) : "memory"); \ -}) - -#define ia64_stf_spill(x, regnum) \ -({ \ - register double __f__ asm ("f"#regnum); \ - asm volatile ("stf.spill [%0]=%1" :: "r"(x), "f"(__f__) : "memory"); \ -}) - -#define ia64_fetchadd4_acq(p, inc) \ -({ \ - \ - __u64 ia64_intri_res; \ - asm volatile ("fetchadd4.acq %0=[%1],%2" \ - : "=r"(ia64_intri_res) : "r"(p), "i" (inc) \ - : "memory"); \ - \ - ia64_intri_res; \ -}) - -#define ia64_fetchadd4_rel(p, inc) \ -({ \ - __u64 ia64_intri_res; \ - asm volatile ("fetchadd4.rel %0=[%1],%2" \ - : "=r"(ia64_intri_res) : "r"(p), "i" (inc) \ - : "memory"); \ - \ - ia64_intri_res; \ -}) - -#define ia64_fetchadd8_acq(p, inc) \ -({ \ - \ - __u64 ia64_intri_res; \ - asm volatile ("fetchadd8.acq %0=[%1],%2" \ - : "=r"(ia64_intri_res) : "r"(p), "i" (inc) \ - : "memory"); \ - \ - ia64_intri_res; \ -}) - -#define ia64_fetchadd8_rel(p, inc) \ -({ \ - __u64 ia64_intri_res; \ - asm volatile ("fetchadd8.rel %0=[%1],%2" \ - : "=r"(ia64_intri_res) : "r"(p), "i" (inc) \ - : "memory"); \ - \ - ia64_intri_res; \ -}) - -#define ia64_xchg1(ptr,x) \ -({ \ - __u64 ia64_intri_res; \ - asm volatile ("xchg1 %0=[%1],%2" \ - : "=r" (ia64_intri_res) : "r" (ptr), "r" (x) : "memory"); \ - ia64_intri_res; \ -}) - -#define ia64_xchg2(ptr,x) \ -({ \ - __u64 ia64_intri_res; \ - asm volatile ("xchg2 %0=[%1],%2" : "=r" (ia64_intri_res) \ - : "r" (ptr), "r" (x) : "memory"); \ - ia64_intri_res; \ -}) - -#define ia64_xchg4(ptr,x) \ -({ \ - __u64 ia64_intri_res; \ - asm volatile ("xchg4 %0=[%1],%2" : "=r" (ia64_intri_res) \ - : "r" (ptr), "r" (x) : "memory"); \ - ia64_intri_res; \ -}) - -#define ia64_xchg8(ptr,x) \ -({ \ - __u64 ia64_intri_res; \ - asm volatile ("xchg8 %0=[%1],%2" : "=r" (ia64_intri_res) \ - : "r" (ptr), "r" (x) : "memory"); \ - ia64_intri_res; \ -}) - -#define ia64_cmpxchg1_acq(ptr, new, old) \ -({ \ - __u64 ia64_intri_res; \ - asm volatile ("mov ar.ccv=%0;;" :: "rO"(old)); \ - asm volatile ("cmpxchg1.acq %0=[%1],%2,ar.ccv": \ - "=r"(ia64_intri_res) : "r"(ptr), "r"(new) : "memory"); \ - ia64_intri_res; \ -}) - -#define ia64_cmpxchg1_rel(ptr, new, old) \ -({ \ - __u64 ia64_intri_res; \ - asm volatile ("mov ar.ccv=%0;;" :: "rO"(old)); \ - asm volatile ("cmpxchg1.rel %0=[%1],%2,ar.ccv": \ - "=r"(ia64_intri_res) : "r"(ptr), "r"(new) : "memory"); \ - ia64_intri_res; \ -}) - -#define ia64_cmpxchg2_acq(ptr, new, old) \ -({ \ - __u64 ia64_intri_res; \ - asm volatile ("mov ar.ccv=%0;;" :: "rO"(old)); \ - asm volatile ("cmpxchg2.acq %0=[%1],%2,ar.ccv": \ - "=r"(ia64_intri_res) : "r"(ptr), "r"(new) : "memory"); \ - ia64_intri_res; \ -}) - -#define ia64_cmpxchg2_rel(ptr, new, old) \ -({ \ - __u64 ia64_intri_res; \ - asm volatile ("mov ar.ccv=%0;;" :: "rO"(old)); \ - \ - asm volatile ("cmpxchg2.rel %0=[%1],%2,ar.ccv": \ - "=r"(ia64_intri_res) : "r"(ptr), "r"(new) : "memory"); \ - ia64_intri_res; \ -}) - -#define ia64_cmpxchg4_acq(ptr, new, old) \ -({ \ - __u64 ia64_intri_res; \ - asm volatile ("mov ar.ccv=%0;;" :: "rO"(old)); \ - asm volatile ("cmpxchg4.acq %0=[%1],%2,ar.ccv": \ - "=r"(ia64_intri_res) : "r"(ptr), "r"(new) : "memory"); \ - ia64_intri_res; \ -}) - -#define ia64_cmpxchg4_rel(ptr, new, old) \ -({ \ - __u64 ia64_intri_res; \ - asm volatile ("mov ar.ccv=%0;;" :: "rO"(old)); \ - asm volatile ("cmpxchg4.rel %0=[%1],%2,ar.ccv": \ - "=r"(ia64_intri_res) : "r"(ptr), "r"(new) : "memory"); \ - ia64_intri_res; \ -}) - -#define ia64_cmpxchg8_acq(ptr, new, old) \ -({ \ - __u64 ia64_intri_res; \ - asm volatile ("mov ar.ccv=%0;;" :: "rO"(old)); \ - asm volatile ("cmpxchg8.acq %0=[%1],%2,ar.ccv": \ - "=r"(ia64_intri_res) : "r"(ptr), "r"(new) : "memory"); \ - ia64_intri_res; \ -}) - -#define ia64_cmpxchg8_rel(ptr, new, old) \ -({ \ - __u64 ia64_intri_res; \ - asm volatile ("mov ar.ccv=%0;;" :: "rO"(old)); \ - \ - asm volatile ("cmpxchg8.rel %0=[%1],%2,ar.ccv": \ - "=r"(ia64_intri_res) : "r"(ptr), "r"(new) : "memory"); \ - ia64_intri_res; \ -}) - -#define ia64_mf() asm volatile ("mf" ::: "memory") -#define ia64_mfa() asm volatile ("mf.a" ::: "memory") - -#define ia64_invala() asm volatile ("invala" ::: "memory") - -#define ia64_thash(addr) \ -({ \ - unsigned long ia64_intri_res; \ - asm volatile ("thash %0=%1" : "=r"(ia64_intri_res) : "r" (addr)); \ - ia64_intri_res; \ -}) - -#define ia64_srlz_i() asm volatile (";; srlz.i ;;" ::: "memory") -#define ia64_srlz_d() asm volatile (";; srlz.d" ::: "memory"); - -#ifdef HAVE_SERIALIZE_DIRECTIVE -# define ia64_dv_serialize_data() asm volatile (".serialize.data"); -# define ia64_dv_serialize_instruction() asm volatile (".serialize.instruction"); -#else -# define ia64_dv_serialize_data() -# define ia64_dv_serialize_instruction() -#endif - -#define ia64_nop(x) asm volatile ("nop %0"::"i"(x)); - -#define ia64_itci(addr) asm volatile ("itc.i %0;;" :: "r"(addr) : "memory") - -#define ia64_itcd(addr) asm volatile ("itc.d %0;;" :: "r"(addr) : "memory") - - -#define ia64_itri(trnum, addr) asm volatile ("itr.i itr[%0]=%1" \ - :: "r"(trnum), "r"(addr) : "memory") - -#define ia64_itrd(trnum, addr) asm volatile ("itr.d dtr[%0]=%1" \ - :: "r"(trnum), "r"(addr) : "memory") - -#define ia64_tpa(addr) \ -({ \ - unsigned long ia64_pa; \ - asm volatile ("tpa %0 = %1" : "=r"(ia64_pa) : "r"(addr) : "memory"); \ - ia64_pa; \ -}) - -#define __ia64_set_dbr(index, val) \ - asm volatile ("mov dbr[%0]=%1" :: "r"(index), "r"(val) : "memory") - -#define ia64_set_ibr(index, val) \ - asm volatile ("mov ibr[%0]=%1" :: "r"(index), "r"(val) : "memory") - -#define ia64_set_pkr(index, val) \ - asm volatile ("mov pkr[%0]=%1" :: "r"(index), "r"(val) : "memory") - -#define ia64_set_pmc(index, val) \ - asm volatile ("mov pmc[%0]=%1" :: "r"(index), "r"(val) : "memory") - -#define ia64_set_pmd(index, val) \ - asm volatile ("mov pmd[%0]=%1" :: "r"(index), "r"(val) : "memory") - -#define ia64_set_rr(index, val) \ - asm volatile ("mov rr[%0]=%1" :: "r"(index), "r"(val) : "memory"); - -#define ia64_get_cpuid(index) \ -({ \ - unsigned long ia64_intri_res; \ - asm volatile ("mov %0=cpuid[%r1]" : "=r"(ia64_intri_res) : "rO"(index)); \ - ia64_intri_res; \ -}) - -#define __ia64_get_dbr(index) \ -({ \ - unsigned long ia64_intri_res; \ - asm volatile ("mov %0=dbr[%1]" : "=r"(ia64_intri_res) : "r"(index)); \ - ia64_intri_res; \ -}) - -#define ia64_get_ibr(index) \ -({ \ - unsigned long ia64_intri_res; \ - asm volatile ("mov %0=ibr[%1]" : "=r"(ia64_intri_res) : "r"(index)); \ - ia64_intri_res; \ -}) - -#define ia64_get_pkr(index) \ -({ \ - unsigned long ia64_intri_res; \ - asm volatile ("mov %0=pkr[%1]" : "=r"(ia64_intri_res) : "r"(index)); \ - ia64_intri_res; \ -}) - -#define ia64_get_pmc(index) \ -({ \ - unsigned long ia64_intri_res; \ - asm volatile ("mov %0=pmc[%1]" : "=r"(ia64_intri_res) : "r"(index)); \ - ia64_intri_res; \ -}) - - -#define ia64_get_pmd(index) \ -({ \ - unsigned long ia64_intri_res; \ - asm volatile ("mov %0=pmd[%1]" : "=r"(ia64_intri_res) : "r"(index)); \ - ia64_intri_res; \ -}) - -#define ia64_get_rr(index) \ -({ \ - unsigned long ia64_intri_res; \ - asm volatile ("mov %0=rr[%1]" : "=r"(ia64_intri_res) : "r" (index)); \ - ia64_intri_res; \ -}) - -#define ia64_fc(addr) asm volatile ("fc %0" :: "r"(addr) : "memory") - - -#define ia64_sync_i() asm volatile (";; sync.i" ::: "memory") - -#define ia64_ssm(mask) asm volatile ("ssm %0":: "i"((mask)) : "memory") -#define ia64_rsm(mask) asm volatile ("rsm %0":: "i"((mask)) : "memory") -#define ia64_sum(mask) asm volatile ("sum %0":: "i"((mask)) : "memory") -#define ia64_rum(mask) asm volatile ("rum %0":: "i"((mask)) : "memory") - -#define ia64_ptce(addr) asm volatile ("ptc.e %0" :: "r"(addr)) - -#define ia64_ptcga(addr, size) \ -do { \ - asm volatile ("ptc.ga %0,%1" :: "r"(addr), "r"(size) : "memory"); \ - ia64_dv_serialize_data(); \ -} while (0) - -#define ia64_ptcl(addr, size) \ -do { \ - asm volatile ("ptc.l %0,%1" :: "r"(addr), "r"(size) : "memory"); \ - ia64_dv_serialize_data(); \ -} while (0) - -#define ia64_ptri(addr, size) \ - asm volatile ("ptr.i %0,%1" :: "r"(addr), "r"(size) : "memory") - -#define ia64_ptrd(addr, size) \ - asm volatile ("ptr.d %0,%1" :: "r"(addr), "r"(size) : "memory") - -#define ia64_ttag(addr) \ -({ \ - __u64 ia64_intri_res; \ - asm volatile ("ttag %0=%1" : "=r"(ia64_intri_res) : "r" (addr)); \ - ia64_intri_res; \ -}) - - -/* Values for lfhint in ia64_lfetch and ia64_lfetch_fault */ - -#define ia64_lfhint_none 0 -#define ia64_lfhint_nt1 1 -#define ia64_lfhint_nt2 2 -#define ia64_lfhint_nta 3 - -#define ia64_lfetch(lfhint, y) \ -({ \ - switch (lfhint) { \ - case ia64_lfhint_none: \ - asm volatile ("lfetch [%0]" : : "r"(y)); \ - break; \ - case ia64_lfhint_nt1: \ - asm volatile ("lfetch.nt1 [%0]" : : "r"(y)); \ - break; \ - case ia64_lfhint_nt2: \ - asm volatile ("lfetch.nt2 [%0]" : : "r"(y)); \ - break; \ - case ia64_lfhint_nta: \ - asm volatile ("lfetch.nta [%0]" : : "r"(y)); \ - break; \ - } \ -}) - -#define ia64_lfetch_excl(lfhint, y) \ -({ \ - switch (lfhint) { \ - case ia64_lfhint_none: \ - asm volatile ("lfetch.excl [%0]" :: "r"(y)); \ - break; \ - case ia64_lfhint_nt1: \ - asm volatile ("lfetch.excl.nt1 [%0]" :: "r"(y)); \ - break; \ - case ia64_lfhint_nt2: \ - asm volatile ("lfetch.excl.nt2 [%0]" :: "r"(y)); \ - break; \ - case ia64_lfhint_nta: \ - asm volatile ("lfetch.excl.nta [%0]" :: "r"(y)); \ - break; \ - } \ -}) - -#define ia64_lfetch_fault(lfhint, y) \ -({ \ - switch (lfhint) { \ - case ia64_lfhint_none: \ - asm volatile ("lfetch.fault [%0]" : : "r"(y)); \ - break; \ - case ia64_lfhint_nt1: \ - asm volatile ("lfetch.fault.nt1 [%0]" : : "r"(y)); \ - break; \ - case ia64_lfhint_nt2: \ - asm volatile ("lfetch.fault.nt2 [%0]" : : "r"(y)); \ - break; \ - case ia64_lfhint_nta: \ - asm volatile ("lfetch.fault.nta [%0]" : : "r"(y)); \ - break; \ - } \ -}) - -#define ia64_lfetch_fault_excl(lfhint, y) \ -({ \ - switch (lfhint) { \ - case ia64_lfhint_none: \ - asm volatile ("lfetch.fault.excl [%0]" :: "r"(y)); \ - break; \ - case ia64_lfhint_nt1: \ - asm volatile ("lfetch.fault.excl.nt1 [%0]" :: "r"(y)); \ - break; \ - case ia64_lfhint_nt2: \ - asm volatile ("lfetch.fault.excl.nt2 [%0]" :: "r"(y)); \ - break; \ - case ia64_lfhint_nta: \ - asm volatile ("lfetch.fault.excl.nta [%0]" :: "r"(y)); \ - break; \ - } \ -}) - -#define ia64_intrin_local_irq_restore(x) \ -do { \ - asm volatile (";; cmp.ne p6,p7=%0,r0;;" \ - "(p6) ssm psr.i;" \ - "(p7) rsm psr.i;;" \ - "(p6) srlz.d" \ - :: "r"((x)) : "p6", "p7", "memory"); \ -} while (0) - -#endif /* _UAPI_ASM_IA64_GCC_INTRIN_H */ diff --git a/arch/ia64/include/uapi/asm/ia64regs.h b/arch/ia64/include/uapi/asm/ia64regs.h deleted file mode 100644 index d7d10cec8b9f..000000000000 --- a/arch/ia64/include/uapi/asm/ia64regs.h +++ /dev/null @@ -1,101 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -/* - * Copyright (C) 2002,2003 Intel Corp. - * Jun Nakajima - * Suresh Siddha - */ - -#ifndef _ASM_IA64_IA64REGS_H -#define _ASM_IA64_IA64REGS_H - -/* - * Register Names for getreg() and setreg(). - * - * The "magic" numbers happen to match the values used by the Intel compiler's - * getreg()/setreg() intrinsics. - */ - -/* Special Registers */ - -#define _IA64_REG_IP 1016 /* getreg only */ -#define _IA64_REG_PSR 1019 -#define _IA64_REG_PSR_L 1019 - -/* General Integer Registers */ - -#define _IA64_REG_GP 1025 /* R1 */ -#define _IA64_REG_R8 1032 /* R8 */ -#define _IA64_REG_R9 1033 /* R9 */ -#define _IA64_REG_SP 1036 /* R12 */ -#define _IA64_REG_TP 1037 /* R13 */ - -/* Application Registers */ - -#define _IA64_REG_AR_KR0 3072 -#define _IA64_REG_AR_KR1 3073 -#define _IA64_REG_AR_KR2 3074 -#define _IA64_REG_AR_KR3 3075 -#define _IA64_REG_AR_KR4 3076 -#define _IA64_REG_AR_KR5 3077 -#define _IA64_REG_AR_KR6 3078 -#define _IA64_REG_AR_KR7 3079 -#define _IA64_REG_AR_RSC 3088 -#define _IA64_REG_AR_BSP 3089 -#define _IA64_REG_AR_BSPSTORE 3090 -#define _IA64_REG_AR_RNAT 3091 -#define _IA64_REG_AR_FCR 3093 -#define _IA64_REG_AR_EFLAG 3096 -#define _IA64_REG_AR_CSD 3097 -#define _IA64_REG_AR_SSD 3098 -#define _IA64_REG_AR_CFLAG 3099 -#define _IA64_REG_AR_FSR 3100 -#define _IA64_REG_AR_FIR 3101 -#define _IA64_REG_AR_FDR 3102 -#define _IA64_REG_AR_CCV 3104 -#define _IA64_REG_AR_UNAT 3108 -#define _IA64_REG_AR_FPSR 3112 -#define _IA64_REG_AR_ITC 3116 -#define _IA64_REG_AR_PFS 3136 -#define _IA64_REG_AR_LC 3137 -#define _IA64_REG_AR_EC 3138 - -/* Control Registers */ - -#define _IA64_REG_CR_DCR 4096 -#define _IA64_REG_CR_ITM 4097 -#define _IA64_REG_CR_IVA 4098 -#define _IA64_REG_CR_PTA 4104 -#define _IA64_REG_CR_IPSR 4112 -#define _IA64_REG_CR_ISR 4113 -#define _IA64_REG_CR_IIP 4115 -#define _IA64_REG_CR_IFA 4116 -#define _IA64_REG_CR_ITIR 4117 -#define _IA64_REG_CR_IIPA 4118 -#define _IA64_REG_CR_IFS 4119 -#define _IA64_REG_CR_IIM 4120 -#define _IA64_REG_CR_IHA 4121 -#define _IA64_REG_CR_LID 4160 -#define _IA64_REG_CR_IVR 4161 /* getreg only */ -#define _IA64_REG_CR_TPR 4162 -#define _IA64_REG_CR_EOI 4163 -#define _IA64_REG_CR_IRR0 4164 /* getreg only */ -#define _IA64_REG_CR_IRR1 4165 /* getreg only */ -#define _IA64_REG_CR_IRR2 4166 /* getreg only */ -#define _IA64_REG_CR_IRR3 4167 /* getreg only */ -#define _IA64_REG_CR_ITV 4168 -#define _IA64_REG_CR_PMV 4169 -#define _IA64_REG_CR_CMCV 4170 -#define _IA64_REG_CR_LRR0 4176 -#define _IA64_REG_CR_LRR1 4177 - -/* Indirect Registers for getindreg() and setindreg() */ - -#define _IA64_REG_INDR_CPUID 9000 /* getindreg only */ -#define _IA64_REG_INDR_DBR 9001 -#define _IA64_REG_INDR_IBR 9002 -#define _IA64_REG_INDR_PKR 9003 -#define _IA64_REG_INDR_PMC 9004 -#define _IA64_REG_INDR_PMD 9005 -#define _IA64_REG_INDR_RR 9006 - -#endif /* _ASM_IA64_IA64REGS_H */ diff --git a/arch/ia64/include/uapi/asm/intrinsics.h b/arch/ia64/include/uapi/asm/intrinsics.h deleted file mode 100644 index 63f27c4ec739..000000000000 --- a/arch/ia64/include/uapi/asm/intrinsics.h +++ /dev/null @@ -1,82 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -/* - * Compiler-dependent intrinsics. - * - * Copyright (C) 2002-2003 Hewlett-Packard Co - * David Mosberger-Tang - */ -#ifndef _UAPI_ASM_IA64_INTRINSICS_H -#define _UAPI_ASM_IA64_INTRINSICS_H - - -#ifndef __ASSEMBLY__ - -#include -/* include compiler specific intrinsics */ -#include -#include -#include - -#define ia64_set_rr0_to_rr4(val0, val1, val2, val3, val4) \ -do { \ - ia64_set_rr(0x0000000000000000UL, (val0)); \ - ia64_set_rr(0x2000000000000000UL, (val1)); \ - ia64_set_rr(0x4000000000000000UL, (val2)); \ - ia64_set_rr(0x6000000000000000UL, (val3)); \ - ia64_set_rr(0x8000000000000000UL, (val4)); \ -} while (0) - -/* - * Force an unresolved reference if someone tries to use - * ia64_fetch_and_add() with a bad value. - */ -extern unsigned long __bad_size_for_ia64_fetch_and_add (void); -extern unsigned long __bad_increment_for_ia64_fetch_and_add (void); - -#define IA64_FETCHADD(tmp,v,n,sz,sem) \ -({ \ - switch (sz) { \ - case 4: \ - tmp = ia64_fetchadd4_##sem((unsigned int *) v, n); \ - break; \ - \ - case 8: \ - tmp = ia64_fetchadd8_##sem((unsigned long *) v, n); \ - break; \ - \ - default: \ - __bad_size_for_ia64_fetch_and_add(); \ - } \ -}) - -#define ia64_fetchadd(i,v,sem) \ -({ \ - __u64 _tmp; \ - volatile __typeof__(*(v)) *_v = (v); \ - /* Can't use a switch () here: gcc isn't always smart enough for that... */ \ - if ((i) == -16) \ - IA64_FETCHADD(_tmp, _v, -16, sizeof(*(v)), sem); \ - else if ((i) == -8) \ - IA64_FETCHADD(_tmp, _v, -8, sizeof(*(v)), sem); \ - else if ((i) == -4) \ - IA64_FETCHADD(_tmp, _v, -4, sizeof(*(v)), sem); \ - else if ((i) == -1) \ - IA64_FETCHADD(_tmp, _v, -1, sizeof(*(v)), sem); \ - else if ((i) == 1) \ - IA64_FETCHADD(_tmp, _v, 1, sizeof(*(v)), sem); \ - else if ((i) == 4) \ - IA64_FETCHADD(_tmp, _v, 4, sizeof(*(v)), sem); \ - else if ((i) == 8) \ - IA64_FETCHADD(_tmp, _v, 8, sizeof(*(v)), sem); \ - else if ((i) == 16) \ - IA64_FETCHADD(_tmp, _v, 16, sizeof(*(v)), sem); \ - else \ - _tmp = __bad_increment_for_ia64_fetch_and_add(); \ - (__typeof__(*(v))) (_tmp); /* return old value */ \ -}) - -#define ia64_fetch_and_add(i,v) (ia64_fetchadd(i, v, rel) + (i)) /* return new value */ - -#endif - -#endif /* _UAPI_ASM_IA64_INTRINSICS_H */ diff --git a/arch/ia64/include/uapi/asm/mman.h b/arch/ia64/include/uapi/asm/mman.h deleted file mode 100644 index ce0cc3d7509e..000000000000 --- a/arch/ia64/include/uapi/asm/mman.h +++ /dev/null @@ -1,17 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -/* - * Based on . - * - * Modified 1998-2000, 2002 - * David Mosberger-Tang , Hewlett-Packard Co - */ -#ifndef _UAPI_ASM_IA64_MMAN_H -#define _UAPI_ASM_IA64_MMAN_H - - -#include - -#define MAP_GROWSUP 0x0200 /* register stack-like segment */ - - -#endif /* _UAPI_ASM_IA64_MMAN_H */ diff --git a/arch/ia64/include/uapi/asm/param.h b/arch/ia64/include/uapi/asm/param.h deleted file mode 100644 index 123ab45940b4..000000000000 --- a/arch/ia64/include/uapi/asm/param.h +++ /dev/null @@ -1,30 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -/* - * Fundamental kernel parameters. - * - * Based on . - * - * Modified 1998, 1999, 2002-2003 - * David Mosberger-Tang , Hewlett-Packard Co - */ -#ifndef _UAPI_ASM_IA64_PARAM_H -#define _UAPI_ASM_IA64_PARAM_H - - -#define EXEC_PAGESIZE 65536 - -#ifndef NOGROUP -# define NOGROUP (-1) -#endif - -#define MAXHOSTNAMELEN 64 /* max length of hostname */ - -#ifndef __KERNEL__ - /* - * Technically, this is wrong, but some old apps still refer to it. The proper way to - * get the HZ value is via sysconf(_SC_CLK_TCK). - */ -# define HZ 1024 -#endif - -#endif /* _UAPI_ASM_IA64_PARAM_H */ diff --git a/arch/ia64/include/uapi/asm/posix_types.h b/arch/ia64/include/uapi/asm/posix_types.h deleted file mode 100644 index bded40f7defe..000000000000 --- a/arch/ia64/include/uapi/asm/posix_types.h +++ /dev/null @@ -1,9 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -#ifndef _ASM_IA64_POSIX_TYPES_H -#define _ASM_IA64_POSIX_TYPES_H - -typedef unsigned long __kernel_sigset_t; /* at least 32 bits */ - -#include - -#endif /* _ASM_IA64_POSIX_TYPES_H */ diff --git a/arch/ia64/include/uapi/asm/ptrace.h b/arch/ia64/include/uapi/asm/ptrace.h deleted file mode 100644 index f52655b44414..000000000000 --- a/arch/ia64/include/uapi/asm/ptrace.h +++ /dev/null @@ -1,248 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -/* - * Copyright (C) 1998-2004 Hewlett-Packard Co - * David Mosberger-Tang - * Stephane Eranian - * Copyright (C) 2003 Intel Co - * Suresh Siddha - * Fenghua Yu - * Arun Sharma - * - * 12/07/98 S. Eranian added pt_regs & switch_stack - * 12/21/98 D. Mosberger updated to match latest code - * 6/17/99 D. Mosberger added second unat member to "struct switch_stack" - * - */ -#ifndef _UAPI_ASM_IA64_PTRACE_H -#define _UAPI_ASM_IA64_PTRACE_H - -/* - * When a user process is blocked, its state looks as follows: - * - * +----------------------+ ------- IA64_STK_OFFSET - * | | ^ - * | struct pt_regs | | - * | | | - * +----------------------+ | - * | | | - * | memory stack | | - * | (growing downwards) | | - * //.....................// | - * | - * //.....................// | - * | | | - * +----------------------+ | - * | struct switch_stack | | - * | | | - * +----------------------+ | - * | | | - * //.....................// | - * | - * //.....................// | - * | | | - * | register stack | | - * | (growing upwards) | | - * | | | - * +----------------------+ | --- IA64_RBS_OFFSET - * | struct thread_info | | ^ - * +----------------------+ | | - * | | | | - * | struct task_struct | | | - * current -> | | | | - * +----------------------+ ------- - * - * Note that ar.ec is not saved explicitly in pt_reg or switch_stack. - * This is because ar.ec is saved as part of ar.pfs. - */ - - -#include - - -#ifndef __ASSEMBLY__ - -/* - * This struct defines the way the registers are saved on system - * calls. - * - * We don't save all floating point register because the kernel - * is compiled to use only a very small subset, so the other are - * untouched. - * - * THIS STRUCTURE MUST BE A MULTIPLE 16-BYTE IN SIZE - * (because the memory stack pointer MUST ALWAYS be aligned this way) - * - */ -struct pt_regs { - /* The following registers are saved by SAVE_MIN: */ - unsigned long b6; /* scratch */ - unsigned long b7; /* scratch */ - - unsigned long ar_csd; /* used by cmp8xchg16 (scratch) */ - unsigned long ar_ssd; /* reserved for future use (scratch) */ - - unsigned long r8; /* scratch (return value register 0) */ - unsigned long r9; /* scratch (return value register 1) */ - unsigned long r10; /* scratch (return value register 2) */ - unsigned long r11; /* scratch (return value register 3) */ - - unsigned long cr_ipsr; /* interrupted task's psr */ - unsigned long cr_iip; /* interrupted task's instruction pointer */ - /* - * interrupted task's function state; if bit 63 is cleared, it - * contains syscall's ar.pfs.pfm: - */ - unsigned long cr_ifs; - - unsigned long ar_unat; /* interrupted task's NaT register (preserved) */ - unsigned long ar_pfs; /* prev function state */ - unsigned long ar_rsc; /* RSE configuration */ - /* The following two are valid only if cr_ipsr.cpl > 0 || ti->flags & _TIF_MCA_INIT */ - unsigned long ar_rnat; /* RSE NaT */ - unsigned long ar_bspstore; /* RSE bspstore */ - - unsigned long pr; /* 64 predicate registers (1 bit each) */ - unsigned long b0; /* return pointer (bp) */ - unsigned long loadrs; /* size of dirty partition << 16 */ - - unsigned long r1; /* the gp pointer */ - unsigned long r12; /* interrupted task's memory stack pointer */ - unsigned long r13; /* thread pointer */ - - unsigned long ar_fpsr; /* floating point status (preserved) */ - unsigned long r15; /* scratch */ - - /* The remaining registers are NOT saved for system calls. */ - - unsigned long r14; /* scratch */ - unsigned long r2; /* scratch */ - unsigned long r3; /* scratch */ - - /* The following registers are saved by SAVE_REST: */ - unsigned long r16; /* scratch */ - unsigned long r17; /* scratch */ - unsigned long r18; /* scratch */ - unsigned long r19; /* scratch */ - unsigned long r20; /* scratch */ - unsigned long r21; /* scratch */ - unsigned long r22; /* scratch */ - unsigned long r23; /* scratch */ - unsigned long r24; /* scratch */ - unsigned long r25; /* scratch */ - unsigned long r26; /* scratch */ - unsigned long r27; /* scratch */ - unsigned long r28; /* scratch */ - unsigned long r29; /* scratch */ - unsigned long r30; /* scratch */ - unsigned long r31; /* scratch */ - - unsigned long ar_ccv; /* compare/exchange value (scratch) */ - - /* - * Floating point registers that the kernel considers scratch: - */ - struct ia64_fpreg f6; /* scratch */ - struct ia64_fpreg f7; /* scratch */ - struct ia64_fpreg f8; /* scratch */ - struct ia64_fpreg f9; /* scratch */ - struct ia64_fpreg f10; /* scratch */ - struct ia64_fpreg f11; /* scratch */ -}; - -/* - * This structure contains the addition registers that need to - * preserved across a context switch. This generally consists of - * "preserved" registers. - */ -struct switch_stack { - unsigned long caller_unat; /* user NaT collection register (preserved) */ - unsigned long ar_fpsr; /* floating-point status register */ - - struct ia64_fpreg f2; /* preserved */ - struct ia64_fpreg f3; /* preserved */ - struct ia64_fpreg f4; /* preserved */ - struct ia64_fpreg f5; /* preserved */ - - struct ia64_fpreg f12; /* scratch, but untouched by kernel */ - struct ia64_fpreg f13; /* scratch, but untouched by kernel */ - struct ia64_fpreg f14; /* scratch, but untouched by kernel */ - struct ia64_fpreg f15; /* scratch, but untouched by kernel */ - struct ia64_fpreg f16; /* preserved */ - struct ia64_fpreg f17; /* preserved */ - struct ia64_fpreg f18; /* preserved */ - struct ia64_fpreg f19; /* preserved */ - struct ia64_fpreg f20; /* preserved */ - struct ia64_fpreg f21; /* preserved */ - struct ia64_fpreg f22; /* preserved */ - struct ia64_fpreg f23; /* preserved */ - struct ia64_fpreg f24; /* preserved */ - struct ia64_fpreg f25; /* preserved */ - struct ia64_fpreg f26; /* preserved */ - struct ia64_fpreg f27; /* preserved */ - struct ia64_fpreg f28; /* preserved */ - struct ia64_fpreg f29; /* preserved */ - struct ia64_fpreg f30; /* preserved */ - struct ia64_fpreg f31; /* preserved */ - - unsigned long r4; /* preserved */ - unsigned long r5; /* preserved */ - unsigned long r6; /* preserved */ - unsigned long r7; /* preserved */ - - unsigned long b0; /* so we can force a direct return in copy_thread */ - unsigned long b1; - unsigned long b2; - unsigned long b3; - unsigned long b4; - unsigned long b5; - - unsigned long ar_pfs; /* previous function state */ - unsigned long ar_lc; /* loop counter (preserved) */ - unsigned long ar_unat; /* NaT bits for r4-r7 */ - unsigned long ar_rnat; /* RSE NaT collection register */ - unsigned long ar_bspstore; /* RSE dirty base (preserved) */ - unsigned long pr; /* 64 predicate registers (1 bit each) */ -}; - - -/* pt_all_user_regs is used for PTRACE_GETREGS PTRACE_SETREGS */ -struct pt_all_user_regs { - unsigned long nat; - unsigned long cr_iip; - unsigned long cfm; - unsigned long cr_ipsr; - unsigned long pr; - - unsigned long gr[32]; - unsigned long br[8]; - unsigned long ar[128]; - struct ia64_fpreg fr[128]; -}; - -#endif /* !__ASSEMBLY__ */ - -/* indices to application-registers array in pt_all_user_regs */ -#define PT_AUR_RSC 16 -#define PT_AUR_BSP 17 -#define PT_AUR_BSPSTORE 18 -#define PT_AUR_RNAT 19 -#define PT_AUR_CCV 32 -#define PT_AUR_UNAT 36 -#define PT_AUR_FPSR 40 -#define PT_AUR_PFS 64 -#define PT_AUR_LC 65 -#define PT_AUR_EC 66 - -/* - * The numbers chosen here are somewhat arbitrary but absolutely MUST - * not overlap with any of the number assigned in . - */ -#define PTRACE_SINGLEBLOCK 12 /* resume execution until next branch */ -#define PTRACE_OLD_GETSIGINFO 13 /* (replaced by PTRACE_GETSIGINFO in ) */ -#define PTRACE_OLD_SETSIGINFO 14 /* (replaced by PTRACE_SETSIGINFO in ) */ -#define PTRACE_GETREGS 18 /* get all registers (pt_all_user_regs) in one shot */ -#define PTRACE_SETREGS 19 /* set all registers (pt_all_user_regs) in one shot */ - -#define PTRACE_OLDSETOPTIONS 21 - -#endif /* _UAPI_ASM_IA64_PTRACE_H */ diff --git a/arch/ia64/include/uapi/asm/ptrace_offsets.h b/arch/ia64/include/uapi/asm/ptrace_offsets.h deleted file mode 100644 index 2847c18139ef..000000000000 --- a/arch/ia64/include/uapi/asm/ptrace_offsets.h +++ /dev/null @@ -1,269 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -#ifndef _ASM_IA64_PTRACE_OFFSETS_H -#define _ASM_IA64_PTRACE_OFFSETS_H - -/* - * Copyright (C) 1999, 2003 Hewlett-Packard Co - * David Mosberger-Tang - */ -/* - * The "uarea" that can be accessed via PEEKUSER and POKEUSER is a - * virtual structure that would have the following definition: - * - * struct uarea { - * struct ia64_fpreg fph[96]; // f32-f127 - * unsigned long nat_bits; - * unsigned long empty1; - * struct ia64_fpreg f2; // f2-f5 - * : - * struct ia64_fpreg f5; - * struct ia64_fpreg f10; // f10-f31 - * : - * struct ia64_fpreg f31; - * unsigned long r4; // r4-r7 - * : - * unsigned long r7; - * unsigned long b1; // b1-b5 - * : - * unsigned long b5; - * unsigned long ar_ec; - * unsigned long ar_lc; - * unsigned long empty2[5]; - * unsigned long cr_ipsr; - * unsigned long cr_iip; - * unsigned long cfm; - * unsigned long ar_unat; - * unsigned long ar_pfs; - * unsigned long ar_rsc; - * unsigned long ar_rnat; - * unsigned long ar_bspstore; - * unsigned long pr; - * unsigned long b6; - * unsigned long ar_bsp; - * unsigned long r1; - * unsigned long r2; - * unsigned long r3; - * unsigned long r12; - * unsigned long r13; - * unsigned long r14; - * unsigned long r15; - * unsigned long r8; - * unsigned long r9; - * unsigned long r10; - * unsigned long r11; - * unsigned long r16; - * : - * unsigned long r31; - * unsigned long ar_ccv; - * unsigned long ar_fpsr; - * unsigned long b0; - * unsigned long b7; - * unsigned long f6; - * unsigned long f7; - * unsigned long f8; - * unsigned long f9; - * unsigned long ar_csd; - * unsigned long ar_ssd; - * unsigned long rsvd1[710]; - * unsigned long dbr[8]; - * unsigned long rsvd2[504]; - * unsigned long ibr[8]; - * unsigned long rsvd3[504]; - * unsigned long pmd[4]; - * } - */ - -/* fph: */ -#define PT_F32 0x0000 -#define PT_F33 0x0010 -#define PT_F34 0x0020 -#define PT_F35 0x0030 -#define PT_F36 0x0040 -#define PT_F37 0x0050 -#define PT_F38 0x0060 -#define PT_F39 0x0070 -#define PT_F40 0x0080 -#define PT_F41 0x0090 -#define PT_F42 0x00a0 -#define PT_F43 0x00b0 -#define PT_F44 0x00c0 -#define PT_F45 0x00d0 -#define PT_F46 0x00e0 -#define PT_F47 0x00f0 -#define PT_F48 0x0100 -#define PT_F49 0x0110 -#define PT_F50 0x0120 -#define PT_F51 0x0130 -#define PT_F52 0x0140 -#define PT_F53 0x0150 -#define PT_F54 0x0160 -#define PT_F55 0x0170 -#define PT_F56 0x0180 -#define PT_F57 0x0190 -#define PT_F58 0x01a0 -#define PT_F59 0x01b0 -#define PT_F60 0x01c0 -#define PT_F61 0x01d0 -#define PT_F62 0x01e0 -#define PT_F63 0x01f0 -#define PT_F64 0x0200 -#define PT_F65 0x0210 -#define PT_F66 0x0220 -#define PT_F67 0x0230 -#define PT_F68 0x0240 -#define PT_F69 0x0250 -#define PT_F70 0x0260 -#define PT_F71 0x0270 -#define PT_F72 0x0280 -#define PT_F73 0x0290 -#define PT_F74 0x02a0 -#define PT_F75 0x02b0 -#define PT_F76 0x02c0 -#define PT_F77 0x02d0 -#define PT_F78 0x02e0 -#define PT_F79 0x02f0 -#define PT_F80 0x0300 -#define PT_F81 0x0310 -#define PT_F82 0x0320 -#define PT_F83 0x0330 -#define PT_F84 0x0340 -#define PT_F85 0x0350 -#define PT_F86 0x0360 -#define PT_F87 0x0370 -#define PT_F88 0x0380 -#define PT_F89 0x0390 -#define PT_F90 0x03a0 -#define PT_F91 0x03b0 -#define PT_F92 0x03c0 -#define PT_F93 0x03d0 -#define PT_F94 0x03e0 -#define PT_F95 0x03f0 -#define PT_F96 0x0400 -#define PT_F97 0x0410 -#define PT_F98 0x0420 -#define PT_F99 0x0430 -#define PT_F100 0x0440 -#define PT_F101 0x0450 -#define PT_F102 0x0460 -#define PT_F103 0x0470 -#define PT_F104 0x0480 -#define PT_F105 0x0490 -#define PT_F106 0x04a0 -#define PT_F107 0x04b0 -#define PT_F108 0x04c0 -#define PT_F109 0x04d0 -#define PT_F110 0x04e0 -#define PT_F111 0x04f0 -#define PT_F112 0x0500 -#define PT_F113 0x0510 -#define PT_F114 0x0520 -#define PT_F115 0x0530 -#define PT_F116 0x0540 -#define PT_F117 0x0550 -#define PT_F118 0x0560 -#define PT_F119 0x0570 -#define PT_F120 0x0580 -#define PT_F121 0x0590 -#define PT_F122 0x05a0 -#define PT_F123 0x05b0 -#define PT_F124 0x05c0 -#define PT_F125 0x05d0 -#define PT_F126 0x05e0 -#define PT_F127 0x05f0 - -#define PT_NAT_BITS 0x0600 - -#define PT_F2 0x0610 -#define PT_F3 0x0620 -#define PT_F4 0x0630 -#define PT_F5 0x0640 -#define PT_F10 0x0650 -#define PT_F11 0x0660 -#define PT_F12 0x0670 -#define PT_F13 0x0680 -#define PT_F14 0x0690 -#define PT_F15 0x06a0 -#define PT_F16 0x06b0 -#define PT_F17 0x06c0 -#define PT_F18 0x06d0 -#define PT_F19 0x06e0 -#define PT_F20 0x06f0 -#define PT_F21 0x0700 -#define PT_F22 0x0710 -#define PT_F23 0x0720 -#define PT_F24 0x0730 -#define PT_F25 0x0740 -#define PT_F26 0x0750 -#define PT_F27 0x0760 -#define PT_F28 0x0770 -#define PT_F29 0x0780 -#define PT_F30 0x0790 -#define PT_F31 0x07a0 -#define PT_R4 0x07b0 -#define PT_R5 0x07b8 -#define PT_R6 0x07c0 -#define PT_R7 0x07c8 - -#define PT_B1 0x07d8 -#define PT_B2 0x07e0 -#define PT_B3 0x07e8 -#define PT_B4 0x07f0 -#define PT_B5 0x07f8 - -#define PT_AR_EC 0x0800 -#define PT_AR_LC 0x0808 - -#define PT_CR_IPSR 0x0830 -#define PT_CR_IIP 0x0838 -#define PT_CFM 0x0840 -#define PT_AR_UNAT 0x0848 -#define PT_AR_PFS 0x0850 -#define PT_AR_RSC 0x0858 -#define PT_AR_RNAT 0x0860 -#define PT_AR_BSPSTORE 0x0868 -#define PT_PR 0x0870 -#define PT_B6 0x0878 -#define PT_AR_BSP 0x0880 /* note: this points to the *end* of the backing store! */ -#define PT_R1 0x0888 -#define PT_R2 0x0890 -#define PT_R3 0x0898 -#define PT_R12 0x08a0 -#define PT_R13 0x08a8 -#define PT_R14 0x08b0 -#define PT_R15 0x08b8 -#define PT_R8 0x08c0 -#define PT_R9 0x08c8 -#define PT_R10 0x08d0 -#define PT_R11 0x08d8 -#define PT_R16 0x08e0 -#define PT_R17 0x08e8 -#define PT_R18 0x08f0 -#define PT_R19 0x08f8 -#define PT_R20 0x0900 -#define PT_R21 0x0908 -#define PT_R22 0x0910 -#define PT_R23 0x0918 -#define PT_R24 0x0920 -#define PT_R25 0x0928 -#define PT_R26 0x0930 -#define PT_R27 0x0938 -#define PT_R28 0x0940 -#define PT_R29 0x0948 -#define PT_R30 0x0950 -#define PT_R31 0x0958 -#define PT_AR_CCV 0x0960 -#define PT_AR_FPSR 0x0968 -#define PT_B0 0x0970 -#define PT_B7 0x0978 -#define PT_F6 0x0980 -#define PT_F7 0x0990 -#define PT_F8 0x09a0 -#define PT_F9 0x09b0 -#define PT_AR_CSD 0x09c0 -#define PT_AR_SSD 0x09c8 - -#define PT_DBR 0x2000 /* data breakpoint registers */ -#define PT_IBR 0x3000 /* instruction breakpoint registers */ -#define PT_PMD 0x4000 /* performance monitoring counters */ - -#endif /* _ASM_IA64_PTRACE_OFFSETS_H */ diff --git a/arch/ia64/include/uapi/asm/resource.h b/arch/ia64/include/uapi/asm/resource.h deleted file mode 100644 index d488d2b22ac4..000000000000 --- a/arch/ia64/include/uapi/asm/resource.h +++ /dev/null @@ -1,8 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -#ifndef _ASM_IA64_RESOURCE_H -#define _ASM_IA64_RESOURCE_H - -#include -#include - -#endif /* _ASM_IA64_RESOURCE_H */ diff --git a/arch/ia64/include/uapi/asm/rse.h b/arch/ia64/include/uapi/asm/rse.h deleted file mode 100644 index 6d260af571c5..000000000000 --- a/arch/ia64/include/uapi/asm/rse.h +++ /dev/null @@ -1,67 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -#ifndef _ASM_IA64_RSE_H -#define _ASM_IA64_RSE_H - -/* - * Copyright (C) 1998, 1999 Hewlett-Packard Co - * Copyright (C) 1998, 1999 David Mosberger-Tang - * - * Register stack engine related helper functions. This file may be - * used in applications, so be careful about the name-space and give - * some consideration to non-GNU C compilers (though __inline__ is - * fine). - */ - -static __inline__ unsigned long -ia64_rse_slot_num (unsigned long *addr) -{ - return (((unsigned long) addr) >> 3) & 0x3f; -} - -/* - * Return TRUE if ADDR is the address of an RNAT slot. - */ -static __inline__ unsigned long -ia64_rse_is_rnat_slot (unsigned long *addr) -{ - return ia64_rse_slot_num(addr) == 0x3f; -} - -/* - * Returns the address of the RNAT slot that covers the slot at - * address SLOT_ADDR. - */ -static __inline__ unsigned long * -ia64_rse_rnat_addr (unsigned long *slot_addr) -{ - return (unsigned long *) ((unsigned long) slot_addr | (0x3f << 3)); -} - -/* - * Calculate the number of registers in the dirty partition starting at BSPSTORE and - * ending at BSP. This isn't simply (BSP-BSPSTORE)/8 because every 64th slot stores - * ar.rnat. - */ -static __inline__ unsigned long -ia64_rse_num_regs (unsigned long *bspstore, unsigned long *bsp) -{ - unsigned long slots = (bsp - bspstore); - - return slots - (ia64_rse_slot_num(bspstore) + slots)/0x40; -} - -/* - * The inverse of the above: given bspstore and the number of - * registers, calculate ar.bsp. - */ -static __inline__ unsigned long * -ia64_rse_skip_regs (unsigned long *addr, long num_regs) -{ - long delta = ia64_rse_slot_num(addr) + num_regs; - - if (num_regs < 0) - delta -= 0x3e; - return addr + num_regs + delta/0x3f; -} - -#endif /* _ASM_IA64_RSE_H */ diff --git a/arch/ia64/include/uapi/asm/setup.h b/arch/ia64/include/uapi/asm/setup.h deleted file mode 100644 index 8d13ce8fb03a..000000000000 --- a/arch/ia64/include/uapi/asm/setup.h +++ /dev/null @@ -1,25 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -#ifndef __IA64_SETUP_H -#define __IA64_SETUP_H - -#define COMMAND_LINE_SIZE 2048 - -extern struct ia64_boot_param { - __u64 command_line; /* physical address of command line arguments */ - __u64 efi_systab; /* physical address of EFI system table */ - __u64 efi_memmap; /* physical address of EFI memory map */ - __u64 efi_memmap_size; /* size of EFI memory map */ - __u64 efi_memdesc_size; /* size of an EFI memory map descriptor */ - __u32 efi_memdesc_version; /* memory descriptor version */ - struct { - __u16 num_cols; /* number of columns on console output device */ - __u16 num_rows; /* number of rows on console output device */ - __u16 orig_x; /* cursor's x position */ - __u16 orig_y; /* cursor's y position */ - } console_info; - __u64 fpswa; /* physical address of the fpswa interface */ - __u64 initrd_start; - __u64 initrd_size; -} *ia64_boot_param; - -#endif diff --git a/arch/ia64/include/uapi/asm/sigcontext.h b/arch/ia64/include/uapi/asm/sigcontext.h deleted file mode 100644 index 1bb6f0f2bd73..000000000000 --- a/arch/ia64/include/uapi/asm/sigcontext.h +++ /dev/null @@ -1,71 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -#ifndef _ASM_IA64_SIGCONTEXT_H -#define _ASM_IA64_SIGCONTEXT_H - -/* - * Copyright (C) 1998, 1999, 2001 Hewlett-Packard Co - * Copyright (C) 1998, 1999, 2001 David Mosberger-Tang - */ - -#include - -#define IA64_SC_FLAG_ONSTACK_BIT 0 /* is handler running on signal stack? */ -#define IA64_SC_FLAG_IN_SYSCALL_BIT 1 /* did signal interrupt a syscall? */ -#define IA64_SC_FLAG_FPH_VALID_BIT 2 /* is state in f[32]-f[127] valid? */ - -#define IA64_SC_FLAG_ONSTACK (1 << IA64_SC_FLAG_ONSTACK_BIT) -#define IA64_SC_FLAG_IN_SYSCALL (1 << IA64_SC_FLAG_IN_SYSCALL_BIT) -#define IA64_SC_FLAG_FPH_VALID (1 << IA64_SC_FLAG_FPH_VALID_BIT) - -# ifndef __ASSEMBLY__ - -/* - * Note on handling of register backing store: sc_ar_bsp contains the address that would - * be found in ar.bsp after executing a "cover" instruction the context in which the - * signal was raised. If signal delivery required switching to an alternate signal stack - * (sc_rbs_base is not NULL), the "dirty" partition (as it would exist after executing the - * imaginary "cover" instruction) is backed by the *alternate* signal stack, not the - * original one. In this case, sc_rbs_base contains the base address of the new register - * backing store. The number of registers in the dirty partition can be calculated as: - * - * ndirty = ia64_rse_num_regs(sc_rbs_base, sc_rbs_base + (sc_loadrs >> 16)) - * - */ - -struct sigcontext { - unsigned long sc_flags; /* see manifest constants above */ - unsigned long sc_nat; /* bit i == 1 iff scratch reg gr[i] is a NaT */ - stack_t sc_stack; /* previously active stack */ - - unsigned long sc_ip; /* instruction pointer */ - unsigned long sc_cfm; /* current frame marker */ - unsigned long sc_um; /* user mask bits */ - unsigned long sc_ar_rsc; /* register stack configuration register */ - unsigned long sc_ar_bsp; /* backing store pointer */ - unsigned long sc_ar_rnat; /* RSE NaT collection register */ - unsigned long sc_ar_ccv; /* compare and exchange compare value register */ - unsigned long sc_ar_unat; /* ar.unat of interrupted context */ - unsigned long sc_ar_fpsr; /* floating-point status register */ - unsigned long sc_ar_pfs; /* previous function state */ - unsigned long sc_ar_lc; /* loop count register */ - unsigned long sc_pr; /* predicate registers */ - unsigned long sc_br[8]; /* branch registers */ - /* Note: sc_gr[0] is used as the "uc_link" member of ucontext_t */ - unsigned long sc_gr[32]; /* general registers (static partition) */ - struct ia64_fpreg sc_fr[128]; /* floating-point registers */ - - unsigned long sc_rbs_base; /* NULL or new base of sighandler's rbs */ - unsigned long sc_loadrs; /* see description above */ - - unsigned long sc_ar25; /* cmp8xchg16 uses this */ - unsigned long sc_ar26; /* rsvd for scratch use */ - unsigned long sc_rsvd[12]; /* reserved for future use */ - /* - * The mask must come last so we can increase _NSIG_WORDS - * without breaking binary compatibility. - */ - sigset_t sc_mask; /* signal mask to restore after handler returns */ -}; - -# endif /* __ASSEMBLY__ */ -#endif /* _ASM_IA64_SIGCONTEXT_H */ diff --git a/arch/ia64/include/uapi/asm/siginfo.h b/arch/ia64/include/uapi/asm/siginfo.h deleted file mode 100644 index 796af1ccaa7e..000000000000 --- a/arch/ia64/include/uapi/asm/siginfo.h +++ /dev/null @@ -1,28 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -/* - * Based on . - * - * Modified 1998-2002 - * David Mosberger-Tang , Hewlett-Packard Co - */ -#ifndef _UAPI_ASM_IA64_SIGINFO_H -#define _UAPI_ASM_IA64_SIGINFO_H - - -#include - -#define si_imm _sifields._sigfault._imm /* as per UNIX SysV ABI spec */ -#define si_flags _sifields._sigfault._flags -/* - * si_isr is valid for SIGILL, SIGFPE, SIGSEGV, SIGBUS, and SIGTRAP provided that - * si_code is non-zero and __ISR_VALID is set in si_flags. - */ -#define si_isr _sifields._sigfault._isr - -/* - * Flag values for si_flags: - */ -#define __ISR_VALID_BIT 0 -#define __ISR_VALID (1 << __ISR_VALID_BIT) - -#endif /* _UAPI_ASM_IA64_SIGINFO_H */ diff --git a/arch/ia64/include/uapi/asm/signal.h b/arch/ia64/include/uapi/asm/signal.h deleted file mode 100644 index 63d574e802a2..000000000000 --- a/arch/ia64/include/uapi/asm/signal.h +++ /dev/null @@ -1,98 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -/* - * Modified 1998-2001, 2003 - * David Mosberger-Tang , Hewlett-Packard Co - * - * Unfortunately, this file is being included by bits/signal.h in - * glibc-2.x. Hence the #ifdef __KERNEL__ ugliness. - */ -#ifndef _UAPI_ASM_IA64_SIGNAL_H -#define _UAPI_ASM_IA64_SIGNAL_H - - -#define SIGHUP 1 -#define SIGINT 2 -#define SIGQUIT 3 -#define SIGILL 4 -#define SIGTRAP 5 -#define SIGABRT 6 -#define SIGIOT 6 -#define SIGBUS 7 -#define SIGFPE 8 -#define SIGKILL 9 -#define SIGUSR1 10 -#define SIGSEGV 11 -#define SIGUSR2 12 -#define SIGPIPE 13 -#define SIGALRM 14 -#define SIGTERM 15 -#define SIGSTKFLT 16 -#define SIGCHLD 17 -#define SIGCONT 18 -#define SIGSTOP 19 -#define SIGTSTP 20 -#define SIGTTIN 21 -#define SIGTTOU 22 -#define SIGURG 23 -#define SIGXCPU 24 -#define SIGXFSZ 25 -#define SIGVTALRM 26 -#define SIGPROF 27 -#define SIGWINCH 28 -#define SIGIO 29 -#define SIGPOLL SIGIO -/* -#define SIGLOST 29 -*/ -#define SIGPWR 30 -#define SIGSYS 31 -/* signal 31 is no longer "unused", but the SIGUNUSED macro remains for backwards compatibility */ -#define SIGUNUSED 31 - -/* These should not be considered constants from userland. */ -#define SIGRTMIN 32 -#define SIGRTMAX _NSIG - -#define SA_RESTORER 0x04000000 - -/* - * The minimum stack size needs to be fairly large because we want to - * be sure that an app compiled for today's CPUs will continue to run - * on all future CPU models. The CPU model matters because the signal - * frame needs to have space for the complete machine state, including - * all physical stacked registers. The number of physical stacked - * registers is CPU model dependent, but given that the width of - * ar.rsc.loadrs is 14 bits, we can assume that they'll never take up - * more than 16KB of space. - */ -#if 1 - /* - * This is a stupid typo: the value was _meant_ to be 131072 (0x20000), but I typed it - * in wrong. ;-( To preserve backwards compatibility, we leave the kernel at the - * incorrect value and fix libc only. - */ -# define MINSIGSTKSZ 131027 /* min. stack size for sigaltstack() */ -#else -# define MINSIGSTKSZ 131072 /* min. stack size for sigaltstack() */ -#endif -#define SIGSTKSZ 262144 /* default stack size for sigaltstack() */ - - -#include - -# ifndef __ASSEMBLY__ - -# include - -/* Avoid too many header ordering problems. */ -struct siginfo; - -typedef struct sigaltstack { - void __user *ss_sp; - int ss_flags; - __kernel_size_t ss_size; -} stack_t; - - -# endif /* !__ASSEMBLY__ */ -#endif /* _UAPI_ASM_IA64_SIGNAL_H */ diff --git a/arch/ia64/include/uapi/asm/stat.h b/arch/ia64/include/uapi/asm/stat.h deleted file mode 100644 index 3265ed5aac0f..000000000000 --- a/arch/ia64/include/uapi/asm/stat.h +++ /dev/null @@ -1,52 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -#ifndef _ASM_IA64_STAT_H -#define _ASM_IA64_STAT_H - -/* - * Modified 1998, 1999 - * David Mosberger-Tang , Hewlett-Packard Co - */ - -struct stat { - unsigned long st_dev; - unsigned long st_ino; - unsigned long st_nlink; - unsigned int st_mode; - unsigned int st_uid; - unsigned int st_gid; - unsigned int __pad0; - unsigned long st_rdev; - unsigned long st_size; - unsigned long st_atime; - unsigned long st_atime_nsec; - unsigned long st_mtime; - unsigned long st_mtime_nsec; - unsigned long st_ctime; - unsigned long st_ctime_nsec; - unsigned long st_blksize; - long st_blocks; - unsigned long __unused[3]; -}; - -#define STAT_HAVE_NSEC 1 - -struct ia64_oldstat { - unsigned int st_dev; - unsigned int st_ino; - unsigned int st_mode; - unsigned int st_nlink; - unsigned int st_uid; - unsigned int st_gid; - unsigned int st_rdev; - unsigned int __pad1; - unsigned long st_size; - unsigned long st_atime; - unsigned long st_mtime; - unsigned long st_ctime; - unsigned int st_blksize; - int st_blocks; - unsigned int __unused1; - unsigned int __unused2; -}; - -#endif /* _ASM_IA64_STAT_H */ diff --git a/arch/ia64/include/uapi/asm/statfs.h b/arch/ia64/include/uapi/asm/statfs.h deleted file mode 100644 index de3bae4f137d..000000000000 --- a/arch/ia64/include/uapi/asm/statfs.h +++ /dev/null @@ -1,21 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -#ifndef _ASM_IA64_STATFS_H -#define _ASM_IA64_STATFS_H - -/* - * Based on . - * - * Modified 1998, 1999, 2003 - * David Mosberger-Tang , Hewlett-Packard Co - */ - -/* - * We need compat_statfs64 to be packed, because the i386 ABI won't - * add padding at the end to bring it to a multiple of 8 bytes, but - * the IA64 ABI will. - */ -#define ARCH_PACK_COMPAT_STATFS64 __attribute__((packed,aligned(4))) - -#include - -#endif /* _ASM_IA64_STATFS_H */ diff --git a/arch/ia64/include/uapi/asm/swab.h b/arch/ia64/include/uapi/asm/swab.h deleted file mode 100644 index 79f3fef1a05e..000000000000 --- a/arch/ia64/include/uapi/asm/swab.h +++ /dev/null @@ -1,35 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -#ifndef _ASM_IA64_SWAB_H -#define _ASM_IA64_SWAB_H - -/* - * Modified 1998, 1999 - * David Mosberger-Tang , Hewlett-Packard Co. - */ - -#include -#include -#include - -static __inline__ __attribute_const__ __u64 __arch_swab64(__u64 x) -{ - __u64 result; - - result = ia64_mux1(x, ia64_mux1_rev); - return result; -} -#define __arch_swab64 __arch_swab64 - -static __inline__ __attribute_const__ __u32 __arch_swab32(__u32 x) -{ - return __arch_swab64(x) >> 32; -} -#define __arch_swab32 __arch_swab32 - -static __inline__ __attribute_const__ __u16 __arch_swab16(__u16 x) -{ - return __arch_swab64(x) >> 48; -} -#define __arch_swab16 __arch_swab16 - -#endif /* _ASM_IA64_SWAB_H */ diff --git a/arch/ia64/include/uapi/asm/types.h b/arch/ia64/include/uapi/asm/types.h deleted file mode 100644 index 2000de474be6..000000000000 --- a/arch/ia64/include/uapi/asm/types.h +++ /dev/null @@ -1,32 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -/* - * This file is never included by application software unless explicitly - * requested (e.g., via linux/types.h) in which case the application is - * Linux specific so (user-) name space pollution is not a major issue. - * However, for interoperability, libraries still need to be careful to - * avoid naming clashes. - * - * Based on . - * - * Modified 1998-2000, 2002 - * David Mosberger-Tang , Hewlett-Packard Co - */ -#ifndef _UAPI_ASM_IA64_TYPES_H -#define _UAPI_ASM_IA64_TYPES_H - - -#ifndef __KERNEL__ -#include -#endif - -#ifdef __ASSEMBLY__ -# define __IA64_UL(x) (x) -# define __IA64_UL_CONST(x) x - -#else -# define __IA64_UL(x) ((unsigned long)(x)) -# define __IA64_UL_CONST(x) x##UL - -#endif /* !__ASSEMBLY__ */ - -#endif /* _UAPI_ASM_IA64_TYPES_H */ diff --git a/arch/ia64/include/uapi/asm/ucontext.h b/arch/ia64/include/uapi/asm/ucontext.h deleted file mode 100644 index 46f51e535e04..000000000000 --- a/arch/ia64/include/uapi/asm/ucontext.h +++ /dev/null @@ -1,13 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -#ifndef _ASM_IA64_UCONTEXT_H -#define _ASM_IA64_UCONTEXT_H - -struct ucontext { - struct sigcontext uc_mcontext; -}; - -#define uc_link uc_mcontext.sc_gr[0] /* wrong type; nobody cares */ -#define uc_sigmask uc_mcontext.sc_sigmask -#define uc_stack uc_mcontext.sc_stack - -#endif /* _ASM_IA64_UCONTEXT_H */ diff --git a/arch/ia64/include/uapi/asm/unistd.h b/arch/ia64/include/uapi/asm/unistd.h deleted file mode 100644 index 013e0bcacc39..000000000000 --- a/arch/ia64/include/uapi/asm/unistd.h +++ /dev/null @@ -1,22 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -/* - * IA-64 Linux syscall numbers and inline-functions. - * - * Copyright (C) 1998-2005 Hewlett-Packard Co - * David Mosberger-Tang - */ -#ifndef _UAPI_ASM_IA64_UNISTD_H -#define _UAPI_ASM_IA64_UNISTD_H - - -#include - -#define __BREAK_SYSCALL __IA64_BREAK_SYSCALL - -#define __NR_Linux 1024 - -#define __NR_umount __NR_umount2 - -#include - -#endif /* _UAPI_ASM_IA64_UNISTD_H */ diff --git a/arch/ia64/include/uapi/asm/ustack.h b/arch/ia64/include/uapi/asm/ustack.h deleted file mode 100644 index 703cc5f546ff..000000000000 --- a/arch/ia64/include/uapi/asm/ustack.h +++ /dev/null @@ -1,13 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -#ifndef _UAPI_ASM_IA64_USTACK_H -#define _UAPI_ASM_IA64_USTACK_H - -/* - * Constants for the user stack size - */ - - -/* Make a default stack size of 2GiB */ -#define DEFAULT_USER_STACK_SIZE (1UL << 31) - -#endif /* _UAPI_ASM_IA64_USTACK_H */ diff --git a/arch/ia64/install.sh b/arch/ia64/install.sh deleted file mode 100755 index 2d4b66a9f362..000000000000 --- a/arch/ia64/install.sh +++ /dev/null @@ -1,30 +0,0 @@ -#!/bin/sh -# -# This file is subject to the terms and conditions of the GNU General Public -# License. See the file "COPYING" in the main directory of this archive -# for more details. -# -# Copyright (C) 1995 by Linus Torvalds -# -# Adapted from code in arch/i386/boot/Makefile by H. Peter Anvin -# -# "make install" script for ia64 architecture -# -# Arguments: -# $1 - kernel version -# $2 - kernel image file -# $3 - kernel map file -# $4 - default install path (blank if root directory) - -if [ -f $4/vmlinuz ]; then - mv $4/vmlinuz $4/vmlinuz.old -fi - -if [ -f $4/System.map ]; then - mv $4/System.map $4/System.old -fi - -cat $2 > $4/vmlinuz -cp $3 $4/System.map - -test -x /usr/sbin/elilo && /usr/sbin/elilo diff --git a/arch/ia64/kernel/.gitignore b/arch/ia64/kernel/.gitignore deleted file mode 100644 index 0374827206e7..000000000000 --- a/arch/ia64/kernel/.gitignore +++ /dev/null @@ -1,3 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only -gate.lds -vmlinux.lds diff --git a/arch/ia64/kernel/Makefile b/arch/ia64/kernel/Makefile deleted file mode 100644 index d7e1cabee2ec..000000000000 --- a/arch/ia64/kernel/Makefile +++ /dev/null @@ -1,46 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0 -# -# Makefile for the linux kernel. -# - -ifdef CONFIG_DYNAMIC_FTRACE -CFLAGS_REMOVE_ftrace.o = -pg -endif - -extra-y := vmlinux.lds - -obj-y := head.o entry.o efi.o efi_stub.o gate-data.o fsys.o irq.o irq_ia64.o \ - irq_lsapic.o ivt.o pal.o patch.o process.o ptrace.o sal.o \ - salinfo.o setup.o signal.o sys_ia64.o time.o traps.o unaligned.o \ - unwind.o mca.o mca_asm.o topology.o dma-mapping.o iosapic.o acpi.o \ - acpi-ext.o - -obj-$(CONFIG_IA64_BRL_EMU) += brl_emu.o - -obj-$(CONFIG_IA64_PALINFO) += palinfo.o -obj-$(CONFIG_MODULES) += module.o -obj-$(CONFIG_SMP) += smp.o smpboot.o -obj-$(CONFIG_NUMA) += numa.o -obj-$(CONFIG_IA64_CYCLONE) += cyclone.o -obj-$(CONFIG_IA64_MCA_RECOVERY) += mca_recovery.o -obj-$(CONFIG_KPROBES) += kprobes.o -obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o -obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o crash.o -obj-$(CONFIG_CRASH_DUMP) += crash_dump.o -obj-$(CONFIG_IA64_UNCACHED_ALLOCATOR) += uncached.o -obj-$(CONFIG_AUDIT) += audit.o -obj-y += msi_ia64.o -mca_recovery-y += mca_drv.o mca_drv_asm.o -obj-$(CONFIG_IA64_MC_ERR_INJECT)+= err_inject.o -obj-$(CONFIG_STACKTRACE) += stacktrace.o - -obj-$(CONFIG_IA64_ESI) += esi.o esi_stub.o # must be in kernel proper -obj-$(CONFIG_INTEL_IOMMU) += pci-dma.o - -obj-$(CONFIG_ELF_CORE) += elfcore.o - -# fp_emulate() expects f2-f5,f16-f31 to contain the user-level state. -CFLAGS_traps.o += -mfixed-range=f2-f5,f16-f31 - -# The gate DSO image is built using a special linker script. -include $(srctree)/$(src)/Makefile.gate diff --git a/arch/ia64/kernel/Makefile.gate b/arch/ia64/kernel/Makefile.gate deleted file mode 100644 index 846867bff6d6..000000000000 --- a/arch/ia64/kernel/Makefile.gate +++ /dev/null @@ -1,29 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0 -# The gate DSO image is built using a special linker script. - -targets += gate.so gate.lds gate.o gate-dummy.o - -obj-y += gate-syms.o - -CPPFLAGS_gate.lds := -P -C -U$(ARCH) - -quiet_cmd_gate = GATE $@ - cmd_gate = $(CC) -nostdlib $(GATECFLAGS_$(@F)) -Wl,-T,$(filter-out FORCE,$^) -o $@ - -GATECFLAGS_gate.so = -shared -s -Wl,-soname=linux-gate.so.1 \ - -Wl,--hash-style=sysv -$(obj)/gate.so: $(obj)/gate.lds $(obj)/gate.o FORCE - $(call if_changed,gate) - -GATECFLAGS_gate-dummy.o = -r -$(obj)/gate-dummy.o: $(obj)/gate.lds $(obj)/gate.o FORCE - $(call if_changed,gate) - -LDFLAGS_gate-syms.o := -r -R -$(obj)/gate-syms.o: $(obj)/gate-dummy.o FORCE - $(call if_changed,ld) - -# gate-data.o contains the gate DSO image as data in section .data..gate. -# We must build gate.so before we can assemble it. -# Note: kbuild does not track this dependency due to usage of .incbin -$(obj)/gate-data.o: $(obj)/gate.so diff --git a/arch/ia64/kernel/acpi-ext.c b/arch/ia64/kernel/acpi-ext.c deleted file mode 100644 index 42cd21480833..000000000000 --- a/arch/ia64/kernel/acpi-ext.c +++ /dev/null @@ -1,101 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * (c) Copyright 2003, 2006 Hewlett-Packard Development Company, L.P. - * Alex Williamson - * Bjorn Helgaas - */ - -#include -#include -#include -#include - -#include - -/* - * Device CSRs that do not appear in PCI config space should be described - * via ACPI. This would normally be done with Address Space Descriptors - * marked as "consumer-only," but old versions of Windows and Linux ignore - * the producer/consumer flag, so HP invented a vendor-defined resource to - * describe the location and size of CSR space. - */ - -struct acpi_vendor_uuid hp_ccsr_uuid = { - .subtype = 2, - .data = { 0xf9, 0xad, 0xe9, 0x69, 0x4f, 0x92, 0x5f, 0xab, 0xf6, 0x4a, - 0x24, 0xd2, 0x01, 0x37, 0x0e, 0xad }, -}; - -static acpi_status hp_ccsr_locate(acpi_handle obj, u64 *base, u64 *length) -{ - acpi_status status; - struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL }; - struct acpi_resource *resource; - struct acpi_resource_vendor_typed *vendor; - - status = acpi_get_vendor_resource(obj, METHOD_NAME__CRS, &hp_ccsr_uuid, - &buffer); - - resource = buffer.pointer; - vendor = &resource->data.vendor_typed; - - if (ACPI_FAILURE(status) || vendor->byte_length < 16) { - status = AE_NOT_FOUND; - goto exit; - } - - memcpy(base, vendor->byte_data, sizeof(*base)); - memcpy(length, vendor->byte_data + 8, sizeof(*length)); - - exit: - kfree(buffer.pointer); - return status; -} - -struct csr_space { - u64 base; - u64 length; -}; - -static acpi_status find_csr_space(struct acpi_resource *resource, void *data) -{ - struct csr_space *space = data; - struct acpi_resource_address64 addr; - acpi_status status; - - status = acpi_resource_to_address64(resource, &addr); - if (ACPI_SUCCESS(status) && - addr.resource_type == ACPI_MEMORY_RANGE && - addr.address.address_length && - addr.producer_consumer == ACPI_CONSUMER) { - space->base = addr.address.minimum; - space->length = addr.address.address_length; - return AE_CTRL_TERMINATE; - } - return AE_OK; /* keep looking */ -} - -static acpi_status hp_crs_locate(acpi_handle obj, u64 *base, u64 *length) -{ - struct csr_space space = { 0, 0 }; - - acpi_walk_resources(obj, METHOD_NAME__CRS, find_csr_space, &space); - if (!space.length) - return AE_NOT_FOUND; - - *base = space.base; - *length = space.length; - return AE_OK; -} - -acpi_status hp_acpi_csr_space(acpi_handle obj, u64 *csr_base, u64 *csr_length) -{ - acpi_status status; - - status = hp_ccsr_locate(obj, csr_base, csr_length); - if (ACPI_SUCCESS(status)) - return status; - - return hp_crs_locate(obj, csr_base, csr_length); -} -EXPORT_SYMBOL(hp_acpi_csr_space); diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c deleted file mode 100644 index 41e8fe55cd98..000000000000 --- a/arch/ia64/kernel/acpi.c +++ /dev/null @@ -1,913 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * acpi.c - Architecture-Specific Low-Level ACPI Support - * - * Copyright (C) 1999 VA Linux Systems - * Copyright (C) 1999,2000 Walt Drummond - * Copyright (C) 2000, 2002-2003 Hewlett-Packard Co. - * David Mosberger-Tang - * Copyright (C) 2000 Intel Corp. - * Copyright (C) 2000,2001 J.I. Lee - * Copyright (C) 2001 Paul Diefenbaugh - * Copyright (C) 2001 Jenna Hall - * Copyright (C) 2001 Takayoshi Kochi - * Copyright (C) 2002 Erich Focht - * Copyright (C) 2004 Ashok Raj - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#define PREFIX "ACPI: " - -int acpi_lapic; -unsigned int acpi_cpei_override; -unsigned int acpi_cpei_phys_cpuid; - -#define ACPI_MAX_PLATFORM_INTERRUPTS 256 - -/* Array to record platform interrupt vectors for generic interrupt routing. */ -int platform_intr_list[ACPI_MAX_PLATFORM_INTERRUPTS] = { - [0 ... ACPI_MAX_PLATFORM_INTERRUPTS - 1] = -1 -}; - -enum acpi_irq_model_id acpi_irq_model = ACPI_IRQ_MODEL_IOSAPIC; - -/* - * Interrupt routing API for device drivers. Provides interrupt vector for - * a generic platform event. Currently only CPEI is implemented. - */ -int acpi_request_vector(u32 int_type) -{ - int vector = -1; - - if (int_type < ACPI_MAX_PLATFORM_INTERRUPTS) { - /* corrected platform error interrupt */ - vector = platform_intr_list[int_type]; - } else - printk(KERN_ERR - "acpi_request_vector(): invalid interrupt type\n"); - return vector; -} - -void __init __iomem *__acpi_map_table(unsigned long phys, unsigned long size) -{ - return __va(phys); -} - -void __init __acpi_unmap_table(void __iomem *map, unsigned long size) -{ -} - -/* -------------------------------------------------------------------------- - Boot-time Table Parsing - -------------------------------------------------------------------------- */ - -static int available_cpus __initdata; -struct acpi_table_madt *acpi_madt __initdata; -static u8 has_8259; - -static int __init -acpi_parse_lapic_addr_ovr(union acpi_subtable_headers * header, - const unsigned long end) -{ - struct acpi_madt_local_apic_override *lapic; - - lapic = (struct acpi_madt_local_apic_override *)header; - - if (BAD_MADT_ENTRY(lapic, end)) - return -EINVAL; - - if (lapic->address) { - iounmap(ipi_base_addr); - ipi_base_addr = ioremap(lapic->address, 0); - } - return 0; -} - -static int __init -acpi_parse_lsapic(union acpi_subtable_headers *header, const unsigned long end) -{ - struct acpi_madt_local_sapic *lsapic; - - lsapic = (struct acpi_madt_local_sapic *)header; - - /*Skip BAD_MADT_ENTRY check, as lsapic size could vary */ - - if (lsapic->lapic_flags & ACPI_MADT_ENABLED) { -#ifdef CONFIG_SMP - smp_boot_data.cpu_phys_id[available_cpus] = - (lsapic->id << 8) | lsapic->eid; -#endif - ++available_cpus; - } - - total_cpus++; - return 0; -} - -static int __init -acpi_parse_lapic_nmi(union acpi_subtable_headers * header, const unsigned long end) -{ - struct acpi_madt_local_apic_nmi *lacpi_nmi; - - lacpi_nmi = (struct acpi_madt_local_apic_nmi *)header; - - if (BAD_MADT_ENTRY(lacpi_nmi, end)) - return -EINVAL; - - /* TBD: Support lapic_nmi entries */ - return 0; -} - -static int __init -acpi_parse_iosapic(union acpi_subtable_headers * header, const unsigned long end) -{ - struct acpi_madt_io_sapic *iosapic; - - iosapic = (struct acpi_madt_io_sapic *)header; - - if (BAD_MADT_ENTRY(iosapic, end)) - return -EINVAL; - - return iosapic_init(iosapic->address, iosapic->global_irq_base); -} - -static unsigned int __initdata acpi_madt_rev; - -static int __init -acpi_parse_plat_int_src(union acpi_subtable_headers * header, - const unsigned long end) -{ - struct acpi_madt_interrupt_source *plintsrc; - int vector; - - plintsrc = (struct acpi_madt_interrupt_source *)header; - - if (BAD_MADT_ENTRY(plintsrc, end)) - return -EINVAL; - - /* - * Get vector assignment for this interrupt, set attributes, - * and program the IOSAPIC routing table. - */ - vector = iosapic_register_platform_intr(plintsrc->type, - plintsrc->global_irq, - plintsrc->io_sapic_vector, - plintsrc->eid, - plintsrc->id, - ((plintsrc->inti_flags & ACPI_MADT_POLARITY_MASK) == - ACPI_MADT_POLARITY_ACTIVE_HIGH) ? - IOSAPIC_POL_HIGH : IOSAPIC_POL_LOW, - ((plintsrc->inti_flags & ACPI_MADT_TRIGGER_MASK) == - ACPI_MADT_TRIGGER_EDGE) ? - IOSAPIC_EDGE : IOSAPIC_LEVEL); - - platform_intr_list[plintsrc->type] = vector; - if (acpi_madt_rev > 1) { - acpi_cpei_override = plintsrc->flags & ACPI_MADT_CPEI_OVERRIDE; - } - - /* - * Save the physical id, so we can check when its being removed - */ - acpi_cpei_phys_cpuid = ((plintsrc->id << 8) | (plintsrc->eid)) & 0xffff; - - return 0; -} - -#ifdef CONFIG_HOTPLUG_CPU -unsigned int can_cpei_retarget(void) -{ - extern int cpe_vector; - extern unsigned int force_cpei_retarget; - - /* - * Only if CPEI is supported and the override flag - * is present, otherwise return that its re-targettable - * if we are in polling mode. - */ - if (cpe_vector > 0) { - if (acpi_cpei_override || force_cpei_retarget) - return 1; - else - return 0; - } - return 1; -} - -unsigned int is_cpu_cpei_target(unsigned int cpu) -{ - unsigned int logical_id; - - logical_id = cpu_logical_id(acpi_cpei_phys_cpuid); - - if (logical_id == cpu) - return 1; - else - return 0; -} - -void set_cpei_target_cpu(unsigned int cpu) -{ - acpi_cpei_phys_cpuid = cpu_physical_id(cpu); -} -#endif - -unsigned int get_cpei_target_cpu(void) -{ - return acpi_cpei_phys_cpuid; -} - -static int __init -acpi_parse_int_src_ovr(union acpi_subtable_headers * header, - const unsigned long end) -{ - struct acpi_madt_interrupt_override *p; - - p = (struct acpi_madt_interrupt_override *)header; - - if (BAD_MADT_ENTRY(p, end)) - return -EINVAL; - - iosapic_override_isa_irq(p->source_irq, p->global_irq, - ((p->inti_flags & ACPI_MADT_POLARITY_MASK) == - ACPI_MADT_POLARITY_ACTIVE_LOW) ? - IOSAPIC_POL_LOW : IOSAPIC_POL_HIGH, - ((p->inti_flags & ACPI_MADT_TRIGGER_MASK) == - ACPI_MADT_TRIGGER_LEVEL) ? - IOSAPIC_LEVEL : IOSAPIC_EDGE); - return 0; -} - -static int __init -acpi_parse_nmi_src(union acpi_subtable_headers * header, const unsigned long end) -{ - struct acpi_madt_nmi_source *nmi_src; - - nmi_src = (struct acpi_madt_nmi_source *)header; - - if (BAD_MADT_ENTRY(nmi_src, end)) - return -EINVAL; - - /* TBD: Support nimsrc entries */ - return 0; -} - -static void __init acpi_madt_oem_check(char *oem_id, char *oem_table_id) -{ - if (!strncmp(oem_id, "IBM", 3) && (!strncmp(oem_table_id, "SERMOW", 6))) { - - /* - * Unfortunately ITC_DRIFT is not yet part of the - * official SAL spec, so the ITC_DRIFT bit is not - * set by the BIOS on this hardware. - */ - sal_platform_features |= IA64_SAL_PLATFORM_FEATURE_ITC_DRIFT; - - cyclone_setup(); - } -} - -static int __init acpi_parse_madt(struct acpi_table_header *table) -{ - acpi_madt = (struct acpi_table_madt *)table; - - acpi_madt_rev = acpi_madt->header.revision; - - /* remember the value for reference after free_initmem() */ -#ifdef CONFIG_ITANIUM - has_8259 = 1; /* Firmware on old Itanium systems is broken */ -#else - has_8259 = acpi_madt->flags & ACPI_MADT_PCAT_COMPAT; -#endif - iosapic_system_init(has_8259); - - /* Get base address of IPI Message Block */ - - if (acpi_madt->address) - ipi_base_addr = ioremap(acpi_madt->address, 0); - - printk(KERN_INFO PREFIX "Local APIC address %p\n", ipi_base_addr); - - acpi_madt_oem_check(acpi_madt->header.oem_id, - acpi_madt->header.oem_table_id); - - return 0; -} - -#ifdef CONFIG_ACPI_NUMA - -#undef SLIT_DEBUG - -#define PXM_FLAG_LEN ((MAX_PXM_DOMAINS + 1)/32) - -static int __initdata srat_num_cpus; /* number of cpus */ -static u32 pxm_flag[PXM_FLAG_LEN]; -#define pxm_bit_set(bit) (set_bit(bit,(void *)pxm_flag)) -#define pxm_bit_test(bit) (test_bit(bit,(void *)pxm_flag)) -static struct acpi_table_slit __initdata *slit_table; -cpumask_t early_cpu_possible_map = CPU_MASK_NONE; - -static int __init -get_processor_proximity_domain(struct acpi_srat_cpu_affinity *pa) -{ - int pxm; - - pxm = pa->proximity_domain_lo; - if (acpi_srat_revision >= 2) - pxm += pa->proximity_domain_hi[0] << 8; - return pxm; -} - -static int __init -get_memory_proximity_domain(struct acpi_srat_mem_affinity *ma) -{ - int pxm; - - pxm = ma->proximity_domain; - if (acpi_srat_revision <= 1) - pxm &= 0xff; - - return pxm; -} - -/* - * ACPI 2.0 SLIT (System Locality Information Table) - * http://devresource.hp.com/devresource/Docs/TechPapers/IA64/slit.pdf - */ -void __init acpi_numa_slit_init(struct acpi_table_slit *slit) -{ - u32 len; - - len = sizeof(struct acpi_table_header) + 8 - + slit->locality_count * slit->locality_count; - if (slit->header.length != len) { - printk(KERN_ERR - "ACPI 2.0 SLIT: size mismatch: %d expected, %d actual\n", - len, slit->header.length); - return; - } - slit_table = slit; -} - -void __init -acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa) -{ - int pxm; - - if (!(pa->flags & ACPI_SRAT_CPU_ENABLED)) - return; - - if (srat_num_cpus >= ARRAY_SIZE(node_cpuid)) { - printk_once(KERN_WARNING - "node_cpuid[%ld] is too small, may not be able to use all cpus\n", - ARRAY_SIZE(node_cpuid)); - return; - } - pxm = get_processor_proximity_domain(pa); - - /* record this node in proximity bitmap */ - pxm_bit_set(pxm); - - node_cpuid[srat_num_cpus].phys_id = - (pa->apic_id << 8) | (pa->local_sapic_eid); - /* nid should be overridden as logical node id later */ - node_cpuid[srat_num_cpus].nid = pxm; - cpumask_set_cpu(srat_num_cpus, &early_cpu_possible_map); - srat_num_cpus++; -} - -int __init -acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma) -{ - unsigned long paddr, size; - int pxm; - struct node_memblk_s *p, *q, *pend; - - pxm = get_memory_proximity_domain(ma); - - /* fill node memory chunk structure */ - paddr = ma->base_address; - size = ma->length; - - /* Ignore disabled entries */ - if (!(ma->flags & ACPI_SRAT_MEM_ENABLED)) - return -1; - - if (num_node_memblks >= NR_NODE_MEMBLKS) { - pr_err("NUMA: too many memblk ranges\n"); - return -EINVAL; - } - - /* record this node in proximity bitmap */ - pxm_bit_set(pxm); - - /* Insertion sort based on base address */ - pend = &node_memblk[num_node_memblks]; - for (p = &node_memblk[0]; p < pend; p++) { - if (paddr < p->start_paddr) - break; - } - if (p < pend) { - for (q = pend - 1; q >= p; q--) - *(q + 1) = *q; - } - p->start_paddr = paddr; - p->size = size; - p->nid = pxm; - num_node_memblks++; - return 0; -} - -void __init acpi_numa_fixup(void) -{ - int i, j, node_from, node_to; - - /* If there's no SRAT, fix the phys_id and mark node 0 online */ - if (srat_num_cpus == 0) { - node_set_online(0); - node_cpuid[0].phys_id = hard_smp_processor_id(); - slit_distance(0, 0) = LOCAL_DISTANCE; - goto out; - } - - /* - * MCD - This can probably be dropped now. No need for pxm ID to node ID - * mapping with sparse node numbering iff MAX_PXM_DOMAINS <= MAX_NUMNODES. - */ - nodes_clear(node_online_map); - for (i = 0; i < MAX_PXM_DOMAINS; i++) { - if (pxm_bit_test(i)) { - int nid = acpi_map_pxm_to_node(i); - node_set_online(nid); - } - } - - /* set logical node id in memory chunk structure */ - for (i = 0; i < num_node_memblks; i++) - node_memblk[i].nid = pxm_to_node(node_memblk[i].nid); - - /* assign memory bank numbers for each chunk on each node */ - for_each_online_node(i) { - int bank; - - bank = 0; - for (j = 0; j < num_node_memblks; j++) - if (node_memblk[j].nid == i) - node_memblk[j].bank = bank++; - } - - /* set logical node id in cpu structure */ - for_each_possible_early_cpu(i) - node_cpuid[i].nid = pxm_to_node(node_cpuid[i].nid); - - printk(KERN_INFO "Number of logical nodes in system = %d\n", - num_online_nodes()); - printk(KERN_INFO "Number of memory chunks in system = %d\n", - num_node_memblks); - - if (!slit_table) { - for (i = 0; i < MAX_NUMNODES; i++) - for (j = 0; j < MAX_NUMNODES; j++) - slit_distance(i, j) = i == j ? - LOCAL_DISTANCE : REMOTE_DISTANCE; - goto out; - } - - memset(numa_slit, -1, sizeof(numa_slit)); - for (i = 0; i < slit_table->locality_count; i++) { - if (!pxm_bit_test(i)) - continue; - node_from = pxm_to_node(i); - for (j = 0; j < slit_table->locality_count; j++) { - if (!pxm_bit_test(j)) - continue; - node_to = pxm_to_node(j); - slit_distance(node_from, node_to) = - slit_table->entry[i * slit_table->locality_count + j]; - } - } - -#ifdef SLIT_DEBUG - printk("ACPI 2.0 SLIT locality table:\n"); - for_each_online_node(i) { - for_each_online_node(j) - printk("%03d ", node_distance(i, j)); - printk("\n"); - } -#endif -out: - node_possible_map = node_online_map; -} -#endif /* CONFIG_ACPI_NUMA */ - -/* - * success: return IRQ number (>=0) - * failure: return < 0 - */ -int acpi_register_gsi(struct device *dev, u32 gsi, int triggering, int polarity) -{ - if (acpi_irq_model == ACPI_IRQ_MODEL_PLATFORM) - return gsi; - - if (has_8259 && gsi < 16) - return isa_irq_to_vector(gsi); - - return iosapic_register_intr(gsi, - (polarity == - ACPI_ACTIVE_HIGH) ? IOSAPIC_POL_HIGH : - IOSAPIC_POL_LOW, - (triggering == - ACPI_EDGE_SENSITIVE) ? IOSAPIC_EDGE : - IOSAPIC_LEVEL); -} -EXPORT_SYMBOL_GPL(acpi_register_gsi); - -void acpi_unregister_gsi(u32 gsi) -{ - if (acpi_irq_model == ACPI_IRQ_MODEL_PLATFORM) - return; - - if (has_8259 && gsi < 16) - return; - - iosapic_unregister_intr(gsi); -} -EXPORT_SYMBOL_GPL(acpi_unregister_gsi); - -static int __init acpi_parse_fadt(struct acpi_table_header *table) -{ - struct acpi_table_header *fadt_header; - struct acpi_table_fadt *fadt; - - fadt_header = (struct acpi_table_header *)table; - if (fadt_header->revision != 3) - return -ENODEV; /* Only deal with ACPI 2.0 FADT */ - - fadt = (struct acpi_table_fadt *)fadt_header; - - acpi_register_gsi(NULL, fadt->sci_interrupt, ACPI_LEVEL_SENSITIVE, - ACPI_ACTIVE_LOW); - return 0; -} - -int __init early_acpi_boot_init(void) -{ - int ret; - - /* - * do a partial walk of MADT to determine how many CPUs - * we have including offline CPUs - */ - if (acpi_table_parse(ACPI_SIG_MADT, acpi_parse_madt)) { - printk(KERN_ERR PREFIX "Can't find MADT\n"); - return 0; - } - - ret = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_SAPIC, - acpi_parse_lsapic, NR_CPUS); - if (ret < 1) - printk(KERN_ERR PREFIX - "Error parsing MADT - no LAPIC entries\n"); - else - acpi_lapic = 1; - -#ifdef CONFIG_SMP - if (available_cpus == 0) { - printk(KERN_INFO "ACPI: Found 0 CPUS; assuming 1\n"); - printk(KERN_INFO "CPU 0 (0x%04x)", hard_smp_processor_id()); - smp_boot_data.cpu_phys_id[available_cpus] = - hard_smp_processor_id(); - available_cpus = 1; /* We've got at least one of these, no? */ - } - smp_boot_data.cpu_count = available_cpus; -#endif - /* Make boot-up look pretty */ - printk(KERN_INFO "%d CPUs available, %d CPUs total\n", available_cpus, - total_cpus); - - return 0; -} - -int __init acpi_boot_init(void) -{ - - /* - * MADT - * ---- - * Parse the Multiple APIC Description Table (MADT), if exists. - * Note that this table provides platform SMP configuration - * information -- the successor to MPS tables. - */ - - if (acpi_table_parse(ACPI_SIG_MADT, acpi_parse_madt)) { - printk(KERN_ERR PREFIX "Can't find MADT\n"); - goto skip_madt; - } - - /* Local APIC */ - - if (acpi_table_parse_madt - (ACPI_MADT_TYPE_LOCAL_APIC_OVERRIDE, acpi_parse_lapic_addr_ovr, 0) < 0) - printk(KERN_ERR PREFIX - "Error parsing LAPIC address override entry\n"); - - if (acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC_NMI, acpi_parse_lapic_nmi, 0) - < 0) - printk(KERN_ERR PREFIX "Error parsing LAPIC NMI entry\n"); - - /* I/O APIC */ - - if (acpi_table_parse_madt - (ACPI_MADT_TYPE_IO_SAPIC, acpi_parse_iosapic, NR_IOSAPICS) < 1) { - printk(KERN_ERR PREFIX - "Error parsing MADT - no IOSAPIC entries\n"); - } - - /* System-Level Interrupt Routing */ - - if (acpi_table_parse_madt - (ACPI_MADT_TYPE_INTERRUPT_SOURCE, acpi_parse_plat_int_src, - ACPI_MAX_PLATFORM_INTERRUPTS) < 0) - printk(KERN_ERR PREFIX - "Error parsing platform interrupt source entry\n"); - - if (acpi_table_parse_madt - (ACPI_MADT_TYPE_INTERRUPT_OVERRIDE, acpi_parse_int_src_ovr, 0) < 0) - printk(KERN_ERR PREFIX - "Error parsing interrupt source overrides entry\n"); - - if (acpi_table_parse_madt(ACPI_MADT_TYPE_NMI_SOURCE, acpi_parse_nmi_src, 0) < 0) - printk(KERN_ERR PREFIX "Error parsing NMI SRC entry\n"); - skip_madt: - - /* - * FADT says whether a legacy keyboard controller is present. - * The FADT also contains an SCI_INT line, by which the system - * gets interrupts such as power and sleep buttons. If it's not - * on a Legacy interrupt, it needs to be setup. - */ - if (acpi_table_parse(ACPI_SIG_FADT, acpi_parse_fadt)) - printk(KERN_ERR PREFIX "Can't find FADT\n"); - -#ifdef CONFIG_ACPI_NUMA -#ifdef CONFIG_SMP - if (srat_num_cpus == 0) { - int cpu, i = 1; - for (cpu = 0; cpu < smp_boot_data.cpu_count; cpu++) - if (smp_boot_data.cpu_phys_id[cpu] != - hard_smp_processor_id()) - node_cpuid[i++].phys_id = - smp_boot_data.cpu_phys_id[cpu]; - } -#endif - build_cpu_to_node_map(); -#endif - return 0; -} - -int acpi_gsi_to_irq(u32 gsi, unsigned int *irq) -{ - int tmp; - - if (has_8259 && gsi < 16) - *irq = isa_irq_to_vector(gsi); - else { - tmp = gsi_to_irq(gsi); - if (tmp == -1) - return -1; - *irq = tmp; - } - return 0; -} - -int acpi_isa_irq_to_gsi(unsigned isa_irq, u32 *gsi) -{ - if (isa_irq >= 16) - return -1; - *gsi = isa_irq; - return 0; -} - -/* - * ACPI based hotplug CPU support - */ -#ifdef CONFIG_ACPI_HOTPLUG_CPU -int acpi_map_cpu2node(acpi_handle handle, int cpu, int physid) -{ -#ifdef CONFIG_ACPI_NUMA - /* - * We don't have cpu-only-node hotadd. But if the system equips - * SRAT table, pxm is already found and node is ready. - * So, just pxm_to_nid(pxm) is OK. - * This code here is for the system which doesn't have full SRAT - * table for possible cpus. - */ - node_cpuid[cpu].phys_id = physid; - node_cpuid[cpu].nid = acpi_get_node(handle); -#endif - return 0; -} - -int additional_cpus __initdata = -1; - -static __init int setup_additional_cpus(char *s) -{ - if (s) - additional_cpus = simple_strtol(s, NULL, 0); - - return 0; -} - -early_param("additional_cpus", setup_additional_cpus); - -/* - * cpu_possible_mask should be static, it cannot change as CPUs - * are onlined, or offlined. The reason is per-cpu data-structures - * are allocated by some modules at init time, and dont expect to - * do this dynamically on cpu arrival/departure. - * cpu_present_mask on the other hand can change dynamically. - * In case when cpu_hotplug is not compiled, then we resort to current - * behaviour, which is cpu_possible == cpu_present. - * - Ashok Raj - * - * Three ways to find out the number of additional hotplug CPUs: - * - If the BIOS specified disabled CPUs in ACPI/mptables use that. - * - The user can overwrite it with additional_cpus=NUM - * - Otherwise don't reserve additional CPUs. - */ -__init void prefill_possible_map(void) -{ - int i; - int possible, disabled_cpus; - - disabled_cpus = total_cpus - available_cpus; - - if (additional_cpus == -1) { - if (disabled_cpus > 0) - additional_cpus = disabled_cpus; - else - additional_cpus = 0; - } - - possible = available_cpus + additional_cpus; - - if (possible > nr_cpu_ids) - possible = nr_cpu_ids; - - printk(KERN_INFO "SMP: Allowing %d CPUs, %d hotplug CPUs\n", - possible, max((possible - available_cpus), 0)); - - for (i = 0; i < possible; i++) - set_cpu_possible(i, true); -} - -static int _acpi_map_lsapic(acpi_handle handle, int physid, int *pcpu) -{ - int cpu; - - cpu = cpumask_first_zero(cpu_present_mask); - if (cpu >= nr_cpu_ids) - return -EINVAL; - - acpi_map_cpu2node(handle, cpu, physid); - - set_cpu_present(cpu, true); - ia64_cpu_to_sapicid[cpu] = physid; - - acpi_processor_set_pdc(handle); - - *pcpu = cpu; - return (0); -} - -/* wrapper to silence section mismatch warning */ -int __ref acpi_map_cpu(acpi_handle handle, phys_cpuid_t physid, u32 acpi_id, - int *pcpu) -{ - return _acpi_map_lsapic(handle, physid, pcpu); -} -EXPORT_SYMBOL(acpi_map_cpu); - -int acpi_unmap_cpu(int cpu) -{ - ia64_cpu_to_sapicid[cpu] = -1; - set_cpu_present(cpu, false); - -#ifdef CONFIG_ACPI_NUMA - /* NUMA specific cleanup's */ -#endif - - return (0); -} -EXPORT_SYMBOL(acpi_unmap_cpu); -#endif /* CONFIG_ACPI_HOTPLUG_CPU */ - -#ifdef CONFIG_ACPI_NUMA -static acpi_status acpi_map_iosapic(acpi_handle handle, u32 depth, - void *context, void **ret) -{ - struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL }; - union acpi_object *obj; - struct acpi_madt_io_sapic *iosapic; - unsigned int gsi_base; - int node; - - /* Only care about objects w/ a method that returns the MADT */ - if (ACPI_FAILURE(acpi_evaluate_object(handle, "_MAT", NULL, &buffer))) - return AE_OK; - - if (!buffer.length || !buffer.pointer) - return AE_OK; - - obj = buffer.pointer; - if (obj->type != ACPI_TYPE_BUFFER || - obj->buffer.length < sizeof(*iosapic)) { - kfree(buffer.pointer); - return AE_OK; - } - - iosapic = (struct acpi_madt_io_sapic *)obj->buffer.pointer; - - if (iosapic->header.type != ACPI_MADT_TYPE_IO_SAPIC) { - kfree(buffer.pointer); - return AE_OK; - } - - gsi_base = iosapic->global_irq_base; - - kfree(buffer.pointer); - - /* OK, it's an IOSAPIC MADT entry; associate it with a node */ - node = acpi_get_node(handle); - if (node == NUMA_NO_NODE || !node_online(node) || - cpumask_empty(cpumask_of_node(node))) - return AE_OK; - - /* We know a gsi to node mapping! */ - map_iosapic_to_node(gsi_base, node); - return AE_OK; -} - -static int __init -acpi_map_iosapics (void) -{ - acpi_get_devices(NULL, acpi_map_iosapic, NULL, NULL); - return 0; -} - -fs_initcall(acpi_map_iosapics); -#endif /* CONFIG_ACPI_NUMA */ - -int __ref acpi_register_ioapic(acpi_handle handle, u64 phys_addr, u32 gsi_base) -{ - int err; - - if ((err = iosapic_init(phys_addr, gsi_base))) - return err; - -#ifdef CONFIG_ACPI_NUMA - acpi_map_iosapic(handle, 0, NULL, NULL); -#endif /* CONFIG_ACPI_NUMA */ - - return 0; -} - -EXPORT_SYMBOL(acpi_register_ioapic); - -int acpi_unregister_ioapic(acpi_handle handle, u32 gsi_base) -{ - return iosapic_remove(gsi_base); -} - -EXPORT_SYMBOL(acpi_unregister_ioapic); - -/* - * acpi_suspend_lowlevel() - save kernel state and suspend. - * - * TBD when IA64 starts to support suspend... - */ -int acpi_suspend_lowlevel(void) { return 0; } - -void acpi_proc_quirk_mwait_check(void) -{ -} diff --git a/arch/ia64/kernel/asm-offsets.c b/arch/ia64/kernel/asm-offsets.c deleted file mode 100644 index be3b90fef2e9..000000000000 --- a/arch/ia64/kernel/asm-offsets.c +++ /dev/null @@ -1,289 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Generate definitions needed by assembly language modules. - * This code generates raw asm output which is post-processed - * to extract and format the required data. - */ - -#define ASM_OFFSETS_C 1 - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "../kernel/sigframe.h" -#include "../kernel/fsyscall_gtod_data.h" - -void foo(void) -{ - DEFINE(IA64_TASK_SIZE, sizeof (struct task_struct)); - DEFINE(IA64_THREAD_INFO_SIZE, sizeof (struct thread_info)); - DEFINE(IA64_PT_REGS_SIZE, sizeof (struct pt_regs)); - DEFINE(IA64_SWITCH_STACK_SIZE, sizeof (struct switch_stack)); - DEFINE(IA64_SIGINFO_SIZE, sizeof (struct siginfo)); - DEFINE(IA64_CPU_SIZE, sizeof (struct cpuinfo_ia64)); - DEFINE(SIGFRAME_SIZE, sizeof (struct sigframe)); - DEFINE(UNW_FRAME_INFO_SIZE, sizeof (struct unw_frame_info)); - - BUILD_BUG_ON(sizeof(struct upid) != 16); - DEFINE(IA64_UPID_SHIFT, 4); - - BLANK(); - - DEFINE(TI_FLAGS, offsetof(struct thread_info, flags)); - DEFINE(TI_CPU, offsetof(struct thread_info, cpu)); - DEFINE(TI_PRE_COUNT, offsetof(struct thread_info, preempt_count)); -#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE - DEFINE(TI_AC_STAMP, offsetof(struct thread_info, ac_stamp)); - DEFINE(TI_AC_LEAVE, offsetof(struct thread_info, ac_leave)); - DEFINE(TI_AC_STIME, offsetof(struct thread_info, ac_stime)); - DEFINE(TI_AC_UTIME, offsetof(struct thread_info, ac_utime)); -#endif - - BLANK(); - - DEFINE(IA64_TASK_BLOCKED_OFFSET,offsetof (struct task_struct, blocked)); - DEFINE(IA64_TASK_CLEAR_CHILD_TID_OFFSET,offsetof (struct task_struct, clear_child_tid)); - DEFINE(IA64_TASK_THREAD_PID_OFFSET,offsetof (struct task_struct, thread_pid)); - DEFINE(IA64_PID_LEVEL_OFFSET, offsetof (struct pid, level)); - DEFINE(IA64_PID_UPID_OFFSET, offsetof (struct pid, numbers[0])); - DEFINE(IA64_TASK_PENDING_OFFSET,offsetof (struct task_struct, pending)); - DEFINE(IA64_TASK_PID_OFFSET, offsetof (struct task_struct, pid)); - DEFINE(IA64_TASK_REAL_PARENT_OFFSET, offsetof (struct task_struct, real_parent)); - DEFINE(IA64_TASK_SIGNAL_OFFSET,offsetof (struct task_struct, signal)); - DEFINE(IA64_TASK_TGID_OFFSET, offsetof (struct task_struct, tgid)); - DEFINE(IA64_TASK_THREAD_KSP_OFFSET, offsetof (struct task_struct, thread.ksp)); - DEFINE(IA64_TASK_THREAD_ON_USTACK_OFFSET, offsetof (struct task_struct, thread.on_ustack)); - - BLANK(); - - - DEFINE(IA64_SIGNAL_GROUP_STOP_COUNT_OFFSET,offsetof (struct signal_struct, - group_stop_count)); - DEFINE(IA64_SIGNAL_SHARED_PENDING_OFFSET,offsetof (struct signal_struct, shared_pending)); - DEFINE(IA64_SIGNAL_PIDS_TGID_OFFSET, offsetof (struct signal_struct, pids[PIDTYPE_TGID])); - - BLANK(); - - DEFINE(IA64_PT_REGS_B6_OFFSET, offsetof (struct pt_regs, b6)); - DEFINE(IA64_PT_REGS_B7_OFFSET, offsetof (struct pt_regs, b7)); - DEFINE(IA64_PT_REGS_AR_CSD_OFFSET, offsetof (struct pt_regs, ar_csd)); - DEFINE(IA64_PT_REGS_AR_SSD_OFFSET, offsetof (struct pt_regs, ar_ssd)); - DEFINE(IA64_PT_REGS_R8_OFFSET, offsetof (struct pt_regs, r8)); - DEFINE(IA64_PT_REGS_R9_OFFSET, offsetof (struct pt_regs, r9)); - DEFINE(IA64_PT_REGS_R10_OFFSET, offsetof (struct pt_regs, r10)); - DEFINE(IA64_PT_REGS_R11_OFFSET, offsetof (struct pt_regs, r11)); - DEFINE(IA64_PT_REGS_CR_IPSR_OFFSET, offsetof (struct pt_regs, cr_ipsr)); - DEFINE(IA64_PT_REGS_CR_IIP_OFFSET, offsetof (struct pt_regs, cr_iip)); - DEFINE(IA64_PT_REGS_CR_IFS_OFFSET, offsetof (struct pt_regs, cr_ifs)); - DEFINE(IA64_PT_REGS_AR_UNAT_OFFSET, offsetof (struct pt_regs, ar_unat)); - DEFINE(IA64_PT_REGS_AR_PFS_OFFSET, offsetof (struct pt_regs, ar_pfs)); - DEFINE(IA64_PT_REGS_AR_RSC_OFFSET, offsetof (struct pt_regs, ar_rsc)); - DEFINE(IA64_PT_REGS_AR_RNAT_OFFSET, offsetof (struct pt_regs, ar_rnat)); - - DEFINE(IA64_PT_REGS_AR_BSPSTORE_OFFSET, offsetof (struct pt_regs, ar_bspstore)); - DEFINE(IA64_PT_REGS_PR_OFFSET, offsetof (struct pt_regs, pr)); - DEFINE(IA64_PT_REGS_B0_OFFSET, offsetof (struct pt_regs, b0)); - DEFINE(IA64_PT_REGS_LOADRS_OFFSET, offsetof (struct pt_regs, loadrs)); - DEFINE(IA64_PT_REGS_R1_OFFSET, offsetof (struct pt_regs, r1)); - DEFINE(IA64_PT_REGS_R12_OFFSET, offsetof (struct pt_regs, r12)); - DEFINE(IA64_PT_REGS_R13_OFFSET, offsetof (struct pt_regs, r13)); - DEFINE(IA64_PT_REGS_AR_FPSR_OFFSET, offsetof (struct pt_regs, ar_fpsr)); - DEFINE(IA64_PT_REGS_R15_OFFSET, offsetof (struct pt_regs, r15)); - DEFINE(IA64_PT_REGS_R14_OFFSET, offsetof (struct pt_regs, r14)); - DEFINE(IA64_PT_REGS_R2_OFFSET, offsetof (struct pt_regs, r2)); - DEFINE(IA64_PT_REGS_R3_OFFSET, offsetof (struct pt_regs, r3)); - DEFINE(IA64_PT_REGS_R16_OFFSET, offsetof (struct pt_regs, r16)); - DEFINE(IA64_PT_REGS_R17_OFFSET, offsetof (struct pt_regs, r17)); - DEFINE(IA64_PT_REGS_R18_OFFSET, offsetof (struct pt_regs, r18)); - DEFINE(IA64_PT_REGS_R19_OFFSET, offsetof (struct pt_regs, r19)); - DEFINE(IA64_PT_REGS_R20_OFFSET, offsetof (struct pt_regs, r20)); - DEFINE(IA64_PT_REGS_R21_OFFSET, offsetof (struct pt_regs, r21)); - DEFINE(IA64_PT_REGS_R22_OFFSET, offsetof (struct pt_regs, r22)); - DEFINE(IA64_PT_REGS_R23_OFFSET, offsetof (struct pt_regs, r23)); - DEFINE(IA64_PT_REGS_R24_OFFSET, offsetof (struct pt_regs, r24)); - DEFINE(IA64_PT_REGS_R25_OFFSET, offsetof (struct pt_regs, r25)); - DEFINE(IA64_PT_REGS_R26_OFFSET, offsetof (struct pt_regs, r26)); - DEFINE(IA64_PT_REGS_R27_OFFSET, offsetof (struct pt_regs, r27)); - DEFINE(IA64_PT_REGS_R28_OFFSET, offsetof (struct pt_regs, r28)); - DEFINE(IA64_PT_REGS_R29_OFFSET, offsetof (struct pt_regs, r29)); - DEFINE(IA64_PT_REGS_R30_OFFSET, offsetof (struct pt_regs, r30)); - DEFINE(IA64_PT_REGS_R31_OFFSET, offsetof (struct pt_regs, r31)); - DEFINE(IA64_PT_REGS_AR_CCV_OFFSET, offsetof (struct pt_regs, ar_ccv)); - DEFINE(IA64_PT_REGS_F6_OFFSET, offsetof (struct pt_regs, f6)); - DEFINE(IA64_PT_REGS_F7_OFFSET, offsetof (struct pt_regs, f7)); - DEFINE(IA64_PT_REGS_F8_OFFSET, offsetof (struct pt_regs, f8)); - DEFINE(IA64_PT_REGS_F9_OFFSET, offsetof (struct pt_regs, f9)); - DEFINE(IA64_PT_REGS_F10_OFFSET, offsetof (struct pt_regs, f10)); - DEFINE(IA64_PT_REGS_F11_OFFSET, offsetof (struct pt_regs, f11)); - - BLANK(); - - DEFINE(IA64_SWITCH_STACK_CALLER_UNAT_OFFSET, offsetof (struct switch_stack, caller_unat)); - DEFINE(IA64_SWITCH_STACK_AR_FPSR_OFFSET, offsetof (struct switch_stack, ar_fpsr)); - DEFINE(IA64_SWITCH_STACK_F2_OFFSET, offsetof (struct switch_stack, f2)); - DEFINE(IA64_SWITCH_STACK_F3_OFFSET, offsetof (struct switch_stack, f3)); - DEFINE(IA64_SWITCH_STACK_F4_OFFSET, offsetof (struct switch_stack, f4)); - DEFINE(IA64_SWITCH_STACK_F5_OFFSET, offsetof (struct switch_stack, f5)); - DEFINE(IA64_SWITCH_STACK_F12_OFFSET, offsetof (struct switch_stack, f12)); - DEFINE(IA64_SWITCH_STACK_F13_OFFSET, offsetof (struct switch_stack, f13)); - DEFINE(IA64_SWITCH_STACK_F14_OFFSET, offsetof (struct switch_stack, f14)); - DEFINE(IA64_SWITCH_STACK_F15_OFFSET, offsetof (struct switch_stack, f15)); - DEFINE(IA64_SWITCH_STACK_F16_OFFSET, offsetof (struct switch_stack, f16)); - DEFINE(IA64_SWITCH_STACK_F17_OFFSET, offsetof (struct switch_stack, f17)); - DEFINE(IA64_SWITCH_STACK_F18_OFFSET, offsetof (struct switch_stack, f18)); - DEFINE(IA64_SWITCH_STACK_F19_OFFSET, offsetof (struct switch_stack, f19)); - DEFINE(IA64_SWITCH_STACK_F20_OFFSET, offsetof (struct switch_stack, f20)); - DEFINE(IA64_SWITCH_STACK_F21_OFFSET, offsetof (struct switch_stack, f21)); - DEFINE(IA64_SWITCH_STACK_F22_OFFSET, offsetof (struct switch_stack, f22)); - DEFINE(IA64_SWITCH_STACK_F23_OFFSET, offsetof (struct switch_stack, f23)); - DEFINE(IA64_SWITCH_STACK_F24_OFFSET, offsetof (struct switch_stack, f24)); - DEFINE(IA64_SWITCH_STACK_F25_OFFSET, offsetof (struct switch_stack, f25)); - DEFINE(IA64_SWITCH_STACK_F26_OFFSET, offsetof (struct switch_stack, f26)); - DEFINE(IA64_SWITCH_STACK_F27_OFFSET, offsetof (struct switch_stack, f27)); - DEFINE(IA64_SWITCH_STACK_F28_OFFSET, offsetof (struct switch_stack, f28)); - DEFINE(IA64_SWITCH_STACK_F29_OFFSET, offsetof (struct switch_stack, f29)); - DEFINE(IA64_SWITCH_STACK_F30_OFFSET, offsetof (struct switch_stack, f30)); - DEFINE(IA64_SWITCH_STACK_F31_OFFSET, offsetof (struct switch_stack, f31)); - DEFINE(IA64_SWITCH_STACK_R4_OFFSET, offsetof (struct switch_stack, r4)); - DEFINE(IA64_SWITCH_STACK_R5_OFFSET, offsetof (struct switch_stack, r5)); - DEFINE(IA64_SWITCH_STACK_R6_OFFSET, offsetof (struct switch_stack, r6)); - DEFINE(IA64_SWITCH_STACK_R7_OFFSET, offsetof (struct switch_stack, r7)); - DEFINE(IA64_SWITCH_STACK_B0_OFFSET, offsetof (struct switch_stack, b0)); - DEFINE(IA64_SWITCH_STACK_B1_OFFSET, offsetof (struct switch_stack, b1)); - DEFINE(IA64_SWITCH_STACK_B2_OFFSET, offsetof (struct switch_stack, b2)); - DEFINE(IA64_SWITCH_STACK_B3_OFFSET, offsetof (struct switch_stack, b3)); - DEFINE(IA64_SWITCH_STACK_B4_OFFSET, offsetof (struct switch_stack, b4)); - DEFINE(IA64_SWITCH_STACK_B5_OFFSET, offsetof (struct switch_stack, b5)); - DEFINE(IA64_SWITCH_STACK_AR_PFS_OFFSET, offsetof (struct switch_stack, ar_pfs)); - DEFINE(IA64_SWITCH_STACK_AR_LC_OFFSET, offsetof (struct switch_stack, ar_lc)); - DEFINE(IA64_SWITCH_STACK_AR_UNAT_OFFSET, offsetof (struct switch_stack, ar_unat)); - DEFINE(IA64_SWITCH_STACK_AR_RNAT_OFFSET, offsetof (struct switch_stack, ar_rnat)); - DEFINE(IA64_SWITCH_STACK_AR_BSPSTORE_OFFSET, offsetof (struct switch_stack, ar_bspstore)); - DEFINE(IA64_SWITCH_STACK_PR_OFFSET, offsetof (struct switch_stack, pr)); - - BLANK(); - - DEFINE(IA64_SIGCONTEXT_IP_OFFSET, offsetof (struct sigcontext, sc_ip)); - DEFINE(IA64_SIGCONTEXT_AR_BSP_OFFSET, offsetof (struct sigcontext, sc_ar_bsp)); - DEFINE(IA64_SIGCONTEXT_AR_FPSR_OFFSET, offsetof (struct sigcontext, sc_ar_fpsr)); - DEFINE(IA64_SIGCONTEXT_AR_RNAT_OFFSET, offsetof (struct sigcontext, sc_ar_rnat)); - DEFINE(IA64_SIGCONTEXT_AR_UNAT_OFFSET, offsetof (struct sigcontext, sc_ar_unat)); - DEFINE(IA64_SIGCONTEXT_B0_OFFSET, offsetof (struct sigcontext, sc_br[0])); - DEFINE(IA64_SIGCONTEXT_CFM_OFFSET, offsetof (struct sigcontext, sc_cfm)); - DEFINE(IA64_SIGCONTEXT_FLAGS_OFFSET, offsetof (struct sigcontext, sc_flags)); - DEFINE(IA64_SIGCONTEXT_FR6_OFFSET, offsetof (struct sigcontext, sc_fr[6])); - DEFINE(IA64_SIGCONTEXT_PR_OFFSET, offsetof (struct sigcontext, sc_pr)); - DEFINE(IA64_SIGCONTEXT_R12_OFFSET, offsetof (struct sigcontext, sc_gr[12])); - DEFINE(IA64_SIGCONTEXT_RBS_BASE_OFFSET,offsetof (struct sigcontext, sc_rbs_base)); - DEFINE(IA64_SIGCONTEXT_LOADRS_OFFSET, offsetof (struct sigcontext, sc_loadrs)); - - BLANK(); - - DEFINE(IA64_SIGPENDING_SIGNAL_OFFSET, offsetof (struct sigpending, signal)); - - BLANK(); - - DEFINE(IA64_SIGFRAME_ARG0_OFFSET, offsetof (struct sigframe, arg0)); - DEFINE(IA64_SIGFRAME_ARG1_OFFSET, offsetof (struct sigframe, arg1)); - DEFINE(IA64_SIGFRAME_ARG2_OFFSET, offsetof (struct sigframe, arg2)); - DEFINE(IA64_SIGFRAME_HANDLER_OFFSET, offsetof (struct sigframe, handler)); - DEFINE(IA64_SIGFRAME_SIGCONTEXT_OFFSET, offsetof (struct sigframe, sc)); - BLANK(); - /* for assembly files which can't include sched.h: */ - DEFINE(IA64_CLONE_VFORK, CLONE_VFORK); - DEFINE(IA64_CLONE_VM, CLONE_VM); - - BLANK(); - DEFINE(IA64_CPUINFO_NSEC_PER_CYC_OFFSET, - offsetof (struct cpuinfo_ia64, nsec_per_cyc)); - DEFINE(IA64_CPUINFO_PTCE_BASE_OFFSET, - offsetof (struct cpuinfo_ia64, ptce_base)); - DEFINE(IA64_CPUINFO_PTCE_COUNT_OFFSET, - offsetof (struct cpuinfo_ia64, ptce_count)); - DEFINE(IA64_CPUINFO_PTCE_STRIDE_OFFSET, - offsetof (struct cpuinfo_ia64, ptce_stride)); - BLANK(); - DEFINE(IA64_TIMESPEC_TV_NSEC_OFFSET, - offsetof (struct __kernel_old_timespec, tv_nsec)); - DEFINE(IA64_TIME_SN_SPEC_SNSEC_OFFSET, - offsetof (struct time_sn_spec, snsec)); - - DEFINE(CLONE_SETTLS_BIT, 19); -#if CLONE_SETTLS != (1<<19) -# error "CLONE_SETTLS_BIT incorrect, please fix" -#endif - - BLANK(); - DEFINE(IA64_MCA_CPU_MCA_STACK_OFFSET, - offsetof (struct ia64_mca_cpu, mca_stack)); - DEFINE(IA64_MCA_CPU_INIT_STACK_OFFSET, - offsetof (struct ia64_mca_cpu, init_stack)); - BLANK(); - DEFINE(IA64_SAL_OS_STATE_OS_GP_OFFSET, - offsetof (struct ia64_sal_os_state, os_gp)); - DEFINE(IA64_SAL_OS_STATE_PROC_STATE_PARAM_OFFSET, - offsetof (struct ia64_sal_os_state, proc_state_param)); - DEFINE(IA64_SAL_OS_STATE_SAL_RA_OFFSET, - offsetof (struct ia64_sal_os_state, sal_ra)); - DEFINE(IA64_SAL_OS_STATE_SAL_GP_OFFSET, - offsetof (struct ia64_sal_os_state, sal_gp)); - DEFINE(IA64_SAL_OS_STATE_PAL_MIN_STATE_OFFSET, - offsetof (struct ia64_sal_os_state, pal_min_state)); - DEFINE(IA64_SAL_OS_STATE_OS_STATUS_OFFSET, - offsetof (struct ia64_sal_os_state, os_status)); - DEFINE(IA64_SAL_OS_STATE_CONTEXT_OFFSET, - offsetof (struct ia64_sal_os_state, context)); - DEFINE(IA64_SAL_OS_STATE_SIZE, - sizeof (struct ia64_sal_os_state)); - BLANK(); - - DEFINE(IA64_PMSA_GR_OFFSET, - offsetof(struct pal_min_state_area, pmsa_gr)); - DEFINE(IA64_PMSA_BANK1_GR_OFFSET, - offsetof(struct pal_min_state_area, pmsa_bank1_gr)); - DEFINE(IA64_PMSA_PR_OFFSET, - offsetof(struct pal_min_state_area, pmsa_pr)); - DEFINE(IA64_PMSA_BR0_OFFSET, - offsetof(struct pal_min_state_area, pmsa_br0)); - DEFINE(IA64_PMSA_RSC_OFFSET, - offsetof(struct pal_min_state_area, pmsa_rsc)); - DEFINE(IA64_PMSA_IIP_OFFSET, - offsetof(struct pal_min_state_area, pmsa_iip)); - DEFINE(IA64_PMSA_IPSR_OFFSET, - offsetof(struct pal_min_state_area, pmsa_ipsr)); - DEFINE(IA64_PMSA_IFS_OFFSET, - offsetof(struct pal_min_state_area, pmsa_ifs)); - DEFINE(IA64_PMSA_XIP_OFFSET, - offsetof(struct pal_min_state_area, pmsa_xip)); - BLANK(); - - /* used by fsys_gettimeofday in arch/ia64/kernel/fsys.S */ - DEFINE(IA64_GTOD_SEQ_OFFSET, - offsetof (struct fsyscall_gtod_data_t, seq)); - DEFINE(IA64_GTOD_WALL_TIME_OFFSET, - offsetof (struct fsyscall_gtod_data_t, wall_time)); - DEFINE(IA64_GTOD_MONO_TIME_OFFSET, - offsetof (struct fsyscall_gtod_data_t, monotonic_time)); - DEFINE(IA64_CLKSRC_MASK_OFFSET, - offsetof (struct fsyscall_gtod_data_t, clk_mask)); - DEFINE(IA64_CLKSRC_MULT_OFFSET, - offsetof (struct fsyscall_gtod_data_t, clk_mult)); - DEFINE(IA64_CLKSRC_SHIFT_OFFSET, - offsetof (struct fsyscall_gtod_data_t, clk_shift)); - DEFINE(IA64_CLKSRC_MMIO_OFFSET, - offsetof (struct fsyscall_gtod_data_t, clk_fsys_mmio)); - DEFINE(IA64_CLKSRC_CYCLE_LAST_OFFSET, - offsetof (struct fsyscall_gtod_data_t, clk_cycle_last)); - DEFINE(IA64_ITC_JITTER_OFFSET, - offsetof (struct itc_jitter_data_t, itc_jitter)); - DEFINE(IA64_ITC_LASTCYCLE_OFFSET, - offsetof (struct itc_jitter_data_t, itc_lastcycle)); - -} diff --git a/arch/ia64/kernel/audit.c b/arch/ia64/kernel/audit.c deleted file mode 100644 index ec61f20ca61f..000000000000 --- a/arch/ia64/kernel/audit.c +++ /dev/null @@ -1,63 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include -#include -#include -#include - -static unsigned dir_class[] = { -#include -~0U -}; - -static unsigned read_class[] = { -#include -~0U -}; - -static unsigned write_class[] = { -#include -~0U -}; - -static unsigned chattr_class[] = { -#include -~0U -}; - -static unsigned signal_class[] = { -#include -~0U -}; - -int audit_classify_arch(int arch) -{ - return 0; -} - -int audit_classify_syscall(int abi, unsigned syscall) -{ - switch(syscall) { - case __NR_open: - return AUDITSC_OPEN; - case __NR_openat: - return AUDITSC_OPENAT; - case __NR_execve: - return AUDITSC_EXECVE; - case __NR_openat2: - return AUDITSC_OPENAT2; - default: - return AUDITSC_NATIVE; - } -} - -static int __init audit_classes_init(void) -{ - audit_register_class(AUDIT_CLASS_WRITE, write_class); - audit_register_class(AUDIT_CLASS_READ, read_class); - audit_register_class(AUDIT_CLASS_DIR_WRITE, dir_class); - audit_register_class(AUDIT_CLASS_CHATTR, chattr_class); - audit_register_class(AUDIT_CLASS_SIGNAL, signal_class); - return 0; -} - -__initcall(audit_classes_init); diff --git a/arch/ia64/kernel/brl_emu.c b/arch/ia64/kernel/brl_emu.c deleted file mode 100644 index 782c481d7052..000000000000 --- a/arch/ia64/kernel/brl_emu.c +++ /dev/null @@ -1,217 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Emulation of the "brl" instruction for IA64 processors that - * don't support it in hardware. - * Author: Stephan Zeisset, Intel Corp. - * - * 02/22/02 D. Mosberger Clear si_flgs, si_isr, and si_imm to avoid - * leaking kernel bits. - */ - -#include -#include -#include -#include - -extern char ia64_set_b1, ia64_set_b2, ia64_set_b3, ia64_set_b4, ia64_set_b5; - -struct illegal_op_return { - unsigned long fkt, arg1, arg2, arg3; -}; - -/* - * The unimplemented bits of a virtual address must be set - * to the value of the most significant implemented bit. - * unimpl_va_mask includes all unimplemented bits and - * the most significant implemented bit, so the result - * of an and operation with the mask must be all 0's - * or all 1's for the address to be valid. - */ -#define unimplemented_virtual_address(va) ( \ - ((va) & local_cpu_data->unimpl_va_mask) != 0 && \ - ((va) & local_cpu_data->unimpl_va_mask) != local_cpu_data->unimpl_va_mask \ -) - -/* - * The unimplemented bits of a physical address must be 0. - * unimpl_pa_mask includes all unimplemented bits, so the result - * of an and operation with the mask must be all 0's for the - * address to be valid. - */ -#define unimplemented_physical_address(pa) ( \ - ((pa) & local_cpu_data->unimpl_pa_mask) != 0 \ -) - -/* - * Handle an illegal operation fault that was caused by an - * unimplemented "brl" instruction. - * If we are not successful (e.g because the illegal operation - * wasn't caused by a "brl" after all), we return -1. - * If we are successful, we return either 0 or the address - * of a "fixup" function for manipulating preserved register - * state. - */ - -struct illegal_op_return -ia64_emulate_brl (struct pt_regs *regs, unsigned long ar_ec) -{ - unsigned long bundle[2]; - unsigned long opcode, btype, qp, offset, cpl; - unsigned long next_ip; - struct illegal_op_return rv; - long tmp_taken, unimplemented_address; - - rv.fkt = (unsigned long) -1; - - /* - * Decode the instruction bundle. - */ - - if (copy_from_user(bundle, (void *) (regs->cr_iip), sizeof(bundle))) - return rv; - - next_ip = (unsigned long) regs->cr_iip + 16; - - /* "brl" must be in slot 2. */ - if (ia64_psr(regs)->ri != 1) return rv; - - /* Must be "mlx" template */ - if ((bundle[0] & 0x1e) != 0x4) return rv; - - opcode = (bundle[1] >> 60); - btype = ((bundle[1] >> 29) & 0x7); - qp = ((bundle[1] >> 23) & 0x3f); - offset = ((bundle[1] & 0x0800000000000000L) << 4) - | ((bundle[1] & 0x00fffff000000000L) >> 32) - | ((bundle[1] & 0x00000000007fffffL) << 40) - | ((bundle[0] & 0xffff000000000000L) >> 24); - - tmp_taken = regs->pr & (1L << qp); - - switch(opcode) { - - case 0xC: - /* - * Long Branch. - */ - if (btype != 0) return rv; - rv.fkt = 0; - if (!(tmp_taken)) { - /* - * Qualifying predicate is 0. - * Skip instruction. - */ - regs->cr_iip = next_ip; - ia64_psr(regs)->ri = 0; - return rv; - } - break; - - case 0xD: - /* - * Long Call. - */ - rv.fkt = 0; - if (!(tmp_taken)) { - /* - * Qualifying predicate is 0. - * Skip instruction. - */ - regs->cr_iip = next_ip; - ia64_psr(regs)->ri = 0; - return rv; - } - - /* - * BR[btype] = IP+16 - */ - switch(btype) { - case 0: - regs->b0 = next_ip; - break; - case 1: - rv.fkt = (unsigned long) &ia64_set_b1; - break; - case 2: - rv.fkt = (unsigned long) &ia64_set_b2; - break; - case 3: - rv.fkt = (unsigned long) &ia64_set_b3; - break; - case 4: - rv.fkt = (unsigned long) &ia64_set_b4; - break; - case 5: - rv.fkt = (unsigned long) &ia64_set_b5; - break; - case 6: - regs->b6 = next_ip; - break; - case 7: - regs->b7 = next_ip; - break; - } - rv.arg1 = next_ip; - - /* - * AR[PFS].pfm = CFM - * AR[PFS].pec = AR[EC] - * AR[PFS].ppl = PSR.cpl - */ - cpl = ia64_psr(regs)->cpl; - regs->ar_pfs = ((regs->cr_ifs & 0x3fffffffff) - | (ar_ec << 52) | (cpl << 62)); - - /* - * CFM.sof -= CFM.sol - * CFM.sol = 0 - * CFM.sor = 0 - * CFM.rrb.gr = 0 - * CFM.rrb.fr = 0 - * CFM.rrb.pr = 0 - */ - regs->cr_ifs = ((regs->cr_ifs & 0xffffffc00000007f) - - ((regs->cr_ifs >> 7) & 0x7f)); - - break; - - default: - /* - * Unknown opcode. - */ - return rv; - - } - - regs->cr_iip += offset; - ia64_psr(regs)->ri = 0; - - if (ia64_psr(regs)->it == 0) - unimplemented_address = unimplemented_physical_address(regs->cr_iip); - else - unimplemented_address = unimplemented_virtual_address(regs->cr_iip); - - if (unimplemented_address) { - /* - * The target address contains unimplemented bits. - */ - printk(KERN_DEBUG "Woah! Unimplemented Instruction Address Trap!\n"); - force_sig_fault(SIGILL, ILL_BADIADDR, (void __user *)NULL, - 0, 0, 0); - } else if (ia64_psr(regs)->tb) { - /* - * Branch Tracing is enabled. - * Force a taken branch signal. - */ - force_sig_fault(SIGTRAP, TRAP_BRANCH, (void __user *)NULL, - 0, 0, 0); - } else if (ia64_psr(regs)->ss) { - /* - * Single Step is enabled. - * Force a trace signal. - */ - force_sig_fault(SIGTRAP, TRAP_TRACE, (void __user *)NULL, - 0, 0, 0); - } - return rv; -} diff --git a/arch/ia64/kernel/crash.c b/arch/ia64/kernel/crash.c deleted file mode 100644 index 88b3ce3e66cd..000000000000 --- a/arch/ia64/kernel/crash.c +++ /dev/null @@ -1,257 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * arch/ia64/kernel/crash.c - * - * Architecture specific (ia64) functions for kexec based crash dumps. - * - * Created by: Khalid Aziz - * Copyright (C) 2005 Hewlett-Packard Development Company, L.P. - * Copyright (C) 2005 Intel Corp Zou Nan hai - * - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -int kdump_status[NR_CPUS]; -static atomic_t kdump_cpu_frozen; -atomic_t kdump_in_progress; -static int kdump_freeze_monarch; -static int kdump_on_init = 1; -static int kdump_on_fatal_mca = 1; - -extern void ia64_dump_cpu_regs(void *); - -static DEFINE_PER_CPU(struct elf_prstatus, elf_prstatus); - -void -crash_save_this_cpu(void) -{ - void *buf; - unsigned long cfm, sof, sol; - - int cpu = smp_processor_id(); - struct elf_prstatus *prstatus = &per_cpu(elf_prstatus, cpu); - - elf_greg_t *dst = (elf_greg_t *)&(prstatus->pr_reg); - memset(prstatus, 0, sizeof(*prstatus)); - prstatus->common.pr_pid = current->pid; - - ia64_dump_cpu_regs(dst); - cfm = dst[43]; - sol = (cfm >> 7) & 0x7f; - sof = cfm & 0x7f; - dst[46] = (unsigned long)ia64_rse_skip_regs((unsigned long *)dst[46], - sof - sol); - - buf = (u64 *) per_cpu_ptr(crash_notes, cpu); - if (!buf) - return; - buf = append_elf_note(buf, KEXEC_CORE_NOTE_NAME, NT_PRSTATUS, prstatus, - sizeof(*prstatus)); - final_note(buf); -} - -#ifdef CONFIG_SMP -static int -kdump_wait_cpu_freeze(void) -{ - int cpu_num = num_online_cpus() - 1; - int timeout = 1000; - while(timeout-- > 0) { - if (atomic_read(&kdump_cpu_frozen) == cpu_num) - return 0; - udelay(1000); - } - return 1; -} -#endif - -void -machine_crash_shutdown(struct pt_regs *pt) -{ - /* This function is only called after the system - * has paniced or is otherwise in a critical state. - * The minimum amount of code to allow a kexec'd kernel - * to run successfully needs to happen here. - * - * In practice this means shooting down the other cpus in - * an SMP system. - */ - kexec_disable_iosapic(); -#ifdef CONFIG_SMP - /* - * If kdump_on_init is set and an INIT is asserted here, kdump will - * be started again via INIT monarch. - */ - local_irq_disable(); - ia64_set_psr_mc(); /* mask MCA/INIT */ - if (atomic_inc_return(&kdump_in_progress) != 1) - unw_init_running(kdump_cpu_freeze, NULL); - - /* - * Now this cpu is ready for kdump. - * Stop all others by IPI or INIT. They could receive INIT from - * outside and might be INIT monarch, but only thing they have to - * do is falling into kdump_cpu_freeze(). - * - * If an INIT is asserted here: - * - All receivers might be slaves, since some of cpus could already - * be frozen and INIT might be masked on monarch. In this case, - * all slaves will be frozen soon since kdump_in_progress will let - * them into DIE_INIT_SLAVE_LEAVE. - * - One might be a monarch, but INIT rendezvous will fail since - * at least this cpu already have INIT masked so it never join - * to the rendezvous. In this case, all slaves and monarch will - * be frozen soon with no wait since the INIT rendezvous is skipped - * by kdump_in_progress. - */ - kdump_smp_send_stop(); - /* not all cpu response to IPI, send INIT to freeze them */ - if (kdump_wait_cpu_freeze()) { - kdump_smp_send_init(); - /* wait again, don't go ahead if possible */ - kdump_wait_cpu_freeze(); - } -#endif -} - -static void -machine_kdump_on_init(void) -{ - crash_save_vmcoreinfo(); - local_irq_disable(); - kexec_disable_iosapic(); - machine_kexec(ia64_kimage); -} - -void -kdump_cpu_freeze(struct unw_frame_info *info, void *arg) -{ - int cpuid; - - local_irq_disable(); - cpuid = smp_processor_id(); - crash_save_this_cpu(); - current->thread.ksp = (__u64)info->sw - 16; - - ia64_set_psr_mc(); /* mask MCA/INIT and stop reentrance */ - - atomic_inc(&kdump_cpu_frozen); - kdump_status[cpuid] = 1; - mb(); - for (;;) - cpu_relax(); -} - -static int -kdump_init_notifier(struct notifier_block *self, unsigned long val, void *data) -{ - struct ia64_mca_notify_die *nd; - struct die_args *args = data; - - if (atomic_read(&kdump_in_progress)) { - switch (val) { - case DIE_INIT_MONARCH_LEAVE: - if (!kdump_freeze_monarch) - break; - fallthrough; - case DIE_INIT_SLAVE_LEAVE: - case DIE_INIT_MONARCH_ENTER: - case DIE_MCA_RENDZVOUS_LEAVE: - unw_init_running(kdump_cpu_freeze, NULL); - break; - } - } - - if (!kdump_on_init && !kdump_on_fatal_mca) - return NOTIFY_DONE; - - if (!ia64_kimage) { - if (val == DIE_INIT_MONARCH_LEAVE) - ia64_mca_printk(KERN_NOTICE - "%s: kdump not configured\n", - __func__); - return NOTIFY_DONE; - } - - if (val != DIE_INIT_MONARCH_LEAVE && - val != DIE_INIT_MONARCH_PROCESS && - val != DIE_MCA_MONARCH_LEAVE) - return NOTIFY_DONE; - - nd = (struct ia64_mca_notify_die *)args->err; - - switch (val) { - case DIE_INIT_MONARCH_PROCESS: - /* Reason code 1 means machine check rendezvous*/ - if (kdump_on_init && (nd->sos->rv_rc != 1)) { - if (atomic_inc_return(&kdump_in_progress) != 1) - kdump_freeze_monarch = 1; - } - break; - case DIE_INIT_MONARCH_LEAVE: - /* Reason code 1 means machine check rendezvous*/ - if (kdump_on_init && (nd->sos->rv_rc != 1)) - machine_kdump_on_init(); - break; - case DIE_MCA_MONARCH_LEAVE: - /* *(nd->data) indicate if MCA is recoverable */ - if (kdump_on_fatal_mca && !(*(nd->data))) { - if (atomic_inc_return(&kdump_in_progress) == 1) - machine_kdump_on_init(); - /* We got fatal MCA while kdump!? No way!! */ - } - break; - } - return NOTIFY_DONE; -} - -#ifdef CONFIG_SYSCTL -static struct ctl_table kdump_ctl_table[] = { - { - .procname = "kdump_on_init", - .data = &kdump_on_init, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - { - .procname = "kdump_on_fatal_mca", - .data = &kdump_on_fatal_mca, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - { } -}; -#endif - -static int -machine_crash_setup(void) -{ - /* be notified before default_monarch_init_process */ - static struct notifier_block kdump_init_notifier_nb = { - .notifier_call = kdump_init_notifier, - .priority = 1, - }; - int ret; - if((ret = register_die_notifier(&kdump_init_notifier_nb)) != 0) - return ret; -#ifdef CONFIG_SYSCTL - register_sysctl("kernel", kdump_ctl_table); -#endif - return 0; -} - -__initcall(machine_crash_setup); - diff --git a/arch/ia64/kernel/crash_dump.c b/arch/ia64/kernel/crash_dump.c deleted file mode 100644 index 4ef68e2aa757..000000000000 --- a/arch/ia64/kernel/crash_dump.c +++ /dev/null @@ -1,27 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * kernel/crash_dump.c - Memory preserving reboot related code. - * - * Created by: Simon Horman - * Original code moved from kernel/crash.c - * Original code comment copied from the i386 version of this file - */ - -#include -#include -#include -#include -#include - -ssize_t copy_oldmem_page(struct iov_iter *iter, unsigned long pfn, - size_t csize, unsigned long offset) -{ - void *vaddr; - - if (!csize) - return 0; - vaddr = __va(pfn< -#include -#include -#include -#include -#include -#include - -/* IBM Summit (EXA) Cyclone counter code*/ -#define CYCLONE_CBAR_ADDR 0xFEB00CD0 -#define CYCLONE_PMCC_OFFSET 0x51A0 -#define CYCLONE_MPMC_OFFSET 0x51D0 -#define CYCLONE_MPCS_OFFSET 0x51A8 -#define CYCLONE_TIMER_FREQ 100000000 - -int use_cyclone; -void __init cyclone_setup(void) -{ - use_cyclone = 1; -} - -static void __iomem *cyclone_mc; - -static u64 read_cyclone(struct clocksource *cs) -{ - return (u64)readq((void __iomem *)cyclone_mc); -} - -static struct clocksource clocksource_cyclone = { - .name = "cyclone", - .rating = 300, - .read = read_cyclone, - .mask = (1LL << 40) - 1, - .flags = CLOCK_SOURCE_IS_CONTINUOUS, -}; - -int __init init_cyclone_clock(void) -{ - u64 __iomem *reg; - u64 base; /* saved cyclone base address */ - u64 offset; /* offset from pageaddr to cyclone_timer register */ - int i; - u32 __iomem *cyclone_timer; /* Cyclone MPMC0 register */ - - if (!use_cyclone) - return 0; - - printk(KERN_INFO "Summit chipset: Starting Cyclone Counter.\n"); - - /* find base address */ - offset = (CYCLONE_CBAR_ADDR); - reg = ioremap(offset, sizeof(u64)); - if(!reg){ - printk(KERN_ERR "Summit chipset: Could not find valid CBAR" - " register.\n"); - use_cyclone = 0; - return -ENODEV; - } - base = readq(reg); - iounmap(reg); - if(!base){ - printk(KERN_ERR "Summit chipset: Could not find valid CBAR" - " value.\n"); - use_cyclone = 0; - return -ENODEV; - } - - /* setup PMCC */ - offset = (base + CYCLONE_PMCC_OFFSET); - reg = ioremap(offset, sizeof(u64)); - if(!reg){ - printk(KERN_ERR "Summit chipset: Could not find valid PMCC" - " register.\n"); - use_cyclone = 0; - return -ENODEV; - } - writel(0x00000001,reg); - iounmap(reg); - - /* setup MPCS */ - offset = (base + CYCLONE_MPCS_OFFSET); - reg = ioremap(offset, sizeof(u64)); - if(!reg){ - printk(KERN_ERR "Summit chipset: Could not find valid MPCS" - " register.\n"); - use_cyclone = 0; - return -ENODEV; - } - writel(0x00000001,reg); - iounmap(reg); - - /* map in cyclone_timer */ - offset = (base + CYCLONE_MPMC_OFFSET); - cyclone_timer = ioremap(offset, sizeof(u32)); - if(!cyclone_timer){ - printk(KERN_ERR "Summit chipset: Could not find valid MPMC" - " register.\n"); - use_cyclone = 0; - return -ENODEV; - } - - /*quick test to make sure its ticking*/ - for(i=0; i<3; i++){ - u32 old = readl(cyclone_timer); - int stall = 100; - while(stall--) barrier(); - if(readl(cyclone_timer) == old){ - printk(KERN_ERR "Summit chipset: Counter not counting!" - " DISABLED\n"); - iounmap(cyclone_timer); - cyclone_timer = NULL; - use_cyclone = 0; - return -ENODEV; - } - } - /* initialize last tick */ - cyclone_mc = cyclone_timer; - clocksource_cyclone.archdata.fsys_mmio = cyclone_timer; - clocksource_register_hz(&clocksource_cyclone, CYCLONE_TIMER_FREQ); - - return 0; -} - -__initcall(init_cyclone_clock); diff --git a/arch/ia64/kernel/dma-mapping.c b/arch/ia64/kernel/dma-mapping.c deleted file mode 100644 index cd0c166bfbc2..000000000000 --- a/arch/ia64/kernel/dma-mapping.c +++ /dev/null @@ -1,9 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include -#include - -/* Set this to 1 if there is a HW IOMMU in the system */ -int iommu_detected __read_mostly; - -const struct dma_map_ops *dma_ops; -EXPORT_SYMBOL(dma_ops); diff --git a/arch/ia64/kernel/efi.c b/arch/ia64/kernel/efi.c deleted file mode 100644 index 033f5aead88a..000000000000 --- a/arch/ia64/kernel/efi.c +++ /dev/null @@ -1,1360 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Extensible Firmware Interface - * - * Based on Extensible Firmware Interface Specification version 0.9 - * April 30, 1999 - * - * Copyright (C) 1999 VA Linux Systems - * Copyright (C) 1999 Walt Drummond - * Copyright (C) 1999-2003 Hewlett-Packard Co. - * David Mosberger-Tang - * Stephane Eranian - * (c) Copyright 2006 Hewlett-Packard Development Company, L.P. - * Bjorn Helgaas - * - * All EFI Runtime Services are not implemented yet as EFI only - * supports physical mode addressing on SoftSDV. This is to be fixed - * in a future version. --drummond 1999-07-20 - * - * Implemented EFI runtime services and virtual mode calls. --davidm - * - * Goutham Rao: - * Skip non-WB memory and ignore empty memory ranges. - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#define EFI_DEBUG 0 - -#define ESI_TABLE_GUID \ - EFI_GUID(0x43EA58DC, 0xCF28, 0x4b06, 0xB3, \ - 0x91, 0xB7, 0x50, 0x59, 0x34, 0x2B, 0xD4) - -static unsigned long mps_phys = EFI_INVALID_TABLE_ADDR; -static __initdata unsigned long palo_phys; - -unsigned long __initdata esi_phys = EFI_INVALID_TABLE_ADDR; -unsigned long hcdp_phys = EFI_INVALID_TABLE_ADDR; -unsigned long sal_systab_phys = EFI_INVALID_TABLE_ADDR; - -static const efi_config_table_type_t arch_tables[] __initconst = { - {ESI_TABLE_GUID, &esi_phys, "ESI" }, - {HCDP_TABLE_GUID, &hcdp_phys, "HCDP" }, - {MPS_TABLE_GUID, &mps_phys, "MPS" }, - {PROCESSOR_ABSTRACTION_LAYER_OVERWRITE_GUID, &palo_phys, "PALO" }, - {SAL_SYSTEM_TABLE_GUID, &sal_systab_phys, "SALsystab" }, - {}, -}; - -extern efi_status_t efi_call_phys (void *, ...); - -static efi_runtime_services_t *runtime; -static u64 mem_limit = ~0UL, max_addr = ~0UL, min_addr = 0UL; - -#define efi_call_virt(f, args...) (*(f))(args) - -#define STUB_GET_TIME(prefix, adjust_arg) \ -static efi_status_t \ -prefix##_get_time (efi_time_t *tm, efi_time_cap_t *tc) \ -{ \ - struct ia64_fpreg fr[6]; \ - efi_time_cap_t *atc = NULL; \ - efi_status_t ret; \ - \ - if (tc) \ - atc = adjust_arg(tc); \ - ia64_save_scratch_fpregs(fr); \ - ret = efi_call_##prefix((efi_get_time_t *) __va(runtime->get_time), \ - adjust_arg(tm), atc); \ - ia64_load_scratch_fpregs(fr); \ - return ret; \ -} - -#define STUB_SET_TIME(prefix, adjust_arg) \ -static efi_status_t \ -prefix##_set_time (efi_time_t *tm) \ -{ \ - struct ia64_fpreg fr[6]; \ - efi_status_t ret; \ - \ - ia64_save_scratch_fpregs(fr); \ - ret = efi_call_##prefix((efi_set_time_t *) __va(runtime->set_time), \ - adjust_arg(tm)); \ - ia64_load_scratch_fpregs(fr); \ - return ret; \ -} - -#define STUB_GET_WAKEUP_TIME(prefix, adjust_arg) \ -static efi_status_t \ -prefix##_get_wakeup_time (efi_bool_t *enabled, efi_bool_t *pending, \ - efi_time_t *tm) \ -{ \ - struct ia64_fpreg fr[6]; \ - efi_status_t ret; \ - \ - ia64_save_scratch_fpregs(fr); \ - ret = efi_call_##prefix( \ - (efi_get_wakeup_time_t *) __va(runtime->get_wakeup_time), \ - adjust_arg(enabled), adjust_arg(pending), adjust_arg(tm)); \ - ia64_load_scratch_fpregs(fr); \ - return ret; \ -} - -#define STUB_SET_WAKEUP_TIME(prefix, adjust_arg) \ -static efi_status_t \ -prefix##_set_wakeup_time (efi_bool_t enabled, efi_time_t *tm) \ -{ \ - struct ia64_fpreg fr[6]; \ - efi_time_t *atm = NULL; \ - efi_status_t ret; \ - \ - if (tm) \ - atm = adjust_arg(tm); \ - ia64_save_scratch_fpregs(fr); \ - ret = efi_call_##prefix( \ - (efi_set_wakeup_time_t *) __va(runtime->set_wakeup_time), \ - enabled, atm); \ - ia64_load_scratch_fpregs(fr); \ - return ret; \ -} - -#define STUB_GET_VARIABLE(prefix, adjust_arg) \ -static efi_status_t \ -prefix##_get_variable (efi_char16_t *name, efi_guid_t *vendor, u32 *attr, \ - unsigned long *data_size, void *data) \ -{ \ - struct ia64_fpreg fr[6]; \ - u32 *aattr = NULL; \ - efi_status_t ret; \ - \ - if (attr) \ - aattr = adjust_arg(attr); \ - ia64_save_scratch_fpregs(fr); \ - ret = efi_call_##prefix( \ - (efi_get_variable_t *) __va(runtime->get_variable), \ - adjust_arg(name), adjust_arg(vendor), aattr, \ - adjust_arg(data_size), adjust_arg(data)); \ - ia64_load_scratch_fpregs(fr); \ - return ret; \ -} - -#define STUB_GET_NEXT_VARIABLE(prefix, adjust_arg) \ -static efi_status_t \ -prefix##_get_next_variable (unsigned long *name_size, efi_char16_t *name, \ - efi_guid_t *vendor) \ -{ \ - struct ia64_fpreg fr[6]; \ - efi_status_t ret; \ - \ - ia64_save_scratch_fpregs(fr); \ - ret = efi_call_##prefix( \ - (efi_get_next_variable_t *) __va(runtime->get_next_variable), \ - adjust_arg(name_size), adjust_arg(name), adjust_arg(vendor)); \ - ia64_load_scratch_fpregs(fr); \ - return ret; \ -} - -#define STUB_SET_VARIABLE(prefix, adjust_arg) \ -static efi_status_t \ -prefix##_set_variable (efi_char16_t *name, efi_guid_t *vendor, \ - u32 attr, unsigned long data_size, \ - void *data) \ -{ \ - struct ia64_fpreg fr[6]; \ - efi_status_t ret; \ - \ - ia64_save_scratch_fpregs(fr); \ - ret = efi_call_##prefix( \ - (efi_set_variable_t *) __va(runtime->set_variable), \ - adjust_arg(name), adjust_arg(vendor), attr, data_size, \ - adjust_arg(data)); \ - ia64_load_scratch_fpregs(fr); \ - return ret; \ -} - -#define STUB_GET_NEXT_HIGH_MONO_COUNT(prefix, adjust_arg) \ -static efi_status_t \ -prefix##_get_next_high_mono_count (u32 *count) \ -{ \ - struct ia64_fpreg fr[6]; \ - efi_status_t ret; \ - \ - ia64_save_scratch_fpregs(fr); \ - ret = efi_call_##prefix((efi_get_next_high_mono_count_t *) \ - __va(runtime->get_next_high_mono_count), \ - adjust_arg(count)); \ - ia64_load_scratch_fpregs(fr); \ - return ret; \ -} - -#define STUB_RESET_SYSTEM(prefix, adjust_arg) \ -static void \ -prefix##_reset_system (int reset_type, efi_status_t status, \ - unsigned long data_size, efi_char16_t *data) \ -{ \ - struct ia64_fpreg fr[6]; \ - efi_char16_t *adata = NULL; \ - \ - if (data) \ - adata = adjust_arg(data); \ - \ - ia64_save_scratch_fpregs(fr); \ - efi_call_##prefix( \ - (efi_reset_system_t *) __va(runtime->reset_system), \ - reset_type, status, data_size, adata); \ - /* should not return, but just in case... */ \ - ia64_load_scratch_fpregs(fr); \ -} - -#define phys_ptr(arg) ((__typeof__(arg)) ia64_tpa(arg)) - -STUB_GET_TIME(phys, phys_ptr) -STUB_SET_TIME(phys, phys_ptr) -STUB_GET_WAKEUP_TIME(phys, phys_ptr) -STUB_SET_WAKEUP_TIME(phys, phys_ptr) -STUB_GET_VARIABLE(phys, phys_ptr) -STUB_GET_NEXT_VARIABLE(phys, phys_ptr) -STUB_SET_VARIABLE(phys, phys_ptr) -STUB_GET_NEXT_HIGH_MONO_COUNT(phys, phys_ptr) -STUB_RESET_SYSTEM(phys, phys_ptr) - -#define id(arg) arg - -STUB_GET_TIME(virt, id) -STUB_SET_TIME(virt, id) -STUB_GET_WAKEUP_TIME(virt, id) -STUB_SET_WAKEUP_TIME(virt, id) -STUB_GET_VARIABLE(virt, id) -STUB_GET_NEXT_VARIABLE(virt, id) -STUB_SET_VARIABLE(virt, id) -STUB_GET_NEXT_HIGH_MONO_COUNT(virt, id) -STUB_RESET_SYSTEM(virt, id) - -void -efi_gettimeofday (struct timespec64 *ts) -{ - efi_time_t tm; - - if ((*efi.get_time)(&tm, NULL) != EFI_SUCCESS) { - memset(ts, 0, sizeof(*ts)); - return; - } - - ts->tv_sec = mktime64(tm.year, tm.month, tm.day, - tm.hour, tm.minute, tm.second); - ts->tv_nsec = tm.nanosecond; -} - -static int -is_memory_available (efi_memory_desc_t *md) -{ - if (!(md->attribute & EFI_MEMORY_WB)) - return 0; - - switch (md->type) { - case EFI_LOADER_CODE: - case EFI_LOADER_DATA: - case EFI_BOOT_SERVICES_CODE: - case EFI_BOOT_SERVICES_DATA: - case EFI_CONVENTIONAL_MEMORY: - return 1; - } - return 0; -} - -typedef struct kern_memdesc { - u64 attribute; - u64 start; - u64 num_pages; -} kern_memdesc_t; - -static kern_memdesc_t *kern_memmap; - -#define efi_md_size(md) (md->num_pages << EFI_PAGE_SHIFT) - -static inline u64 -kmd_end(kern_memdesc_t *kmd) -{ - return (kmd->start + (kmd->num_pages << EFI_PAGE_SHIFT)); -} - -static inline u64 -efi_md_end(efi_memory_desc_t *md) -{ - return (md->phys_addr + efi_md_size(md)); -} - -static inline int -efi_wb(efi_memory_desc_t *md) -{ - return (md->attribute & EFI_MEMORY_WB); -} - -static inline int -efi_uc(efi_memory_desc_t *md) -{ - return (md->attribute & EFI_MEMORY_UC); -} - -static void -walk (efi_freemem_callback_t callback, void *arg, u64 attr) -{ - kern_memdesc_t *k; - u64 start, end, voff; - - voff = (attr == EFI_MEMORY_WB) ? PAGE_OFFSET : __IA64_UNCACHED_OFFSET; - for (k = kern_memmap; k->start != ~0UL; k++) { - if (k->attribute != attr) - continue; - start = PAGE_ALIGN(k->start); - end = (k->start + (k->num_pages << EFI_PAGE_SHIFT)) & PAGE_MASK; - if (start < end) - if ((*callback)(start + voff, end + voff, arg) < 0) - return; - } -} - -/* - * Walk the EFI memory map and call CALLBACK once for each EFI memory - * descriptor that has memory that is available for OS use. - */ -void -efi_memmap_walk (efi_freemem_callback_t callback, void *arg) -{ - walk(callback, arg, EFI_MEMORY_WB); -} - -/* - * Walk the EFI memory map and call CALLBACK once for each EFI memory - * descriptor that has memory that is available for uncached allocator. - */ -void -efi_memmap_walk_uc (efi_freemem_callback_t callback, void *arg) -{ - walk(callback, arg, EFI_MEMORY_UC); -} - -/* - * Look for the PAL_CODE region reported by EFI and map it using an - * ITR to enable safe PAL calls in virtual mode. See IA-64 Processor - * Abstraction Layer chapter 11 in ADAG - */ -void * -efi_get_pal_addr (void) -{ - void *efi_map_start, *efi_map_end, *p; - efi_memory_desc_t *md; - u64 efi_desc_size; - int pal_code_count = 0; - u64 vaddr, mask; - - efi_map_start = __va(ia64_boot_param->efi_memmap); - efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size; - efi_desc_size = ia64_boot_param->efi_memdesc_size; - - for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) { - md = p; - if (md->type != EFI_PAL_CODE) - continue; - - if (++pal_code_count > 1) { - printk(KERN_ERR "Too many EFI Pal Code memory ranges, " - "dropped @ %llx\n", md->phys_addr); - continue; - } - /* - * The only ITLB entry in region 7 that is used is the one - * installed by __start(). That entry covers a 64MB range. - */ - mask = ~((1 << KERNEL_TR_PAGE_SHIFT) - 1); - vaddr = PAGE_OFFSET + md->phys_addr; - - /* - * We must check that the PAL mapping won't overlap with the - * kernel mapping. - * - * PAL code is guaranteed to be aligned on a power of 2 between - * 4k and 256KB and that only one ITR is needed to map it. This - * implies that the PAL code is always aligned on its size, - * i.e., the closest matching page size supported by the TLB. - * Therefore PAL code is guaranteed never to cross a 64MB unless - * it is bigger than 64MB (very unlikely!). So for now the - * following test is enough to determine whether or not we need - * a dedicated ITR for the PAL code. - */ - if ((vaddr & mask) == (KERNEL_START & mask)) { - printk(KERN_INFO "%s: no need to install ITR for PAL code\n", - __func__); - continue; - } - - if (efi_md_size(md) > IA64_GRANULE_SIZE) - panic("Whoa! PAL code size bigger than a granule!"); - -#if EFI_DEBUG - mask = ~((1 << IA64_GRANULE_SHIFT) - 1); - - printk(KERN_INFO "CPU %d: mapping PAL code " - "[0x%llx-0x%llx) into [0x%llx-0x%llx)\n", - smp_processor_id(), md->phys_addr, - md->phys_addr + efi_md_size(md), - vaddr & mask, (vaddr & mask) + IA64_GRANULE_SIZE); -#endif - return __va(md->phys_addr); - } - printk(KERN_WARNING "%s: no PAL-code memory-descriptor found\n", - __func__); - return NULL; -} - - -static u8 __init palo_checksum(u8 *buffer, u32 length) -{ - u8 sum = 0; - u8 *end = buffer + length; - - while (buffer < end) - sum = (u8) (sum + *(buffer++)); - - return sum; -} - -/* - * Parse and handle PALO table which is published at: - * http://www.dig64.org/home/DIG64_PALO_R1_0.pdf - */ -static void __init handle_palo(unsigned long phys_addr) -{ - struct palo_table *palo = __va(phys_addr); - u8 checksum; - - if (strncmp(palo->signature, PALO_SIG, sizeof(PALO_SIG) - 1)) { - printk(KERN_INFO "PALO signature incorrect.\n"); - return; - } - - checksum = palo_checksum((u8 *)palo, palo->length); - if (checksum) { - printk(KERN_INFO "PALO checksum incorrect.\n"); - return; - } - - setup_ptcg_sem(palo->max_tlb_purges, NPTCG_FROM_PALO); -} - -void -efi_map_pal_code (void) -{ - void *pal_vaddr = efi_get_pal_addr (); - u64 psr; - - if (!pal_vaddr) - return; - - /* - * Cannot write to CRx with PSR.ic=1 - */ - psr = ia64_clear_ic(); - ia64_itr(0x1, IA64_TR_PALCODE, - GRANULEROUNDDOWN((unsigned long) pal_vaddr), - pte_val(pfn_pte(__pa(pal_vaddr) >> PAGE_SHIFT, PAGE_KERNEL)), - IA64_GRANULE_SHIFT); - ia64_set_psr(psr); /* restore psr */ -} - -void __init -efi_init (void) -{ - const efi_system_table_t *efi_systab; - void *efi_map_start, *efi_map_end; - u64 efi_desc_size; - char *cp; - - set_bit(EFI_BOOT, &efi.flags); - set_bit(EFI_64BIT, &efi.flags); - - /* - * It's too early to be able to use the standard kernel command line - * support... - */ - for (cp = boot_command_line; *cp; ) { - if (memcmp(cp, "mem=", 4) == 0) { - mem_limit = memparse(cp + 4, &cp); - } else if (memcmp(cp, "max_addr=", 9) == 0) { - max_addr = GRANULEROUNDDOWN(memparse(cp + 9, &cp)); - } else if (memcmp(cp, "min_addr=", 9) == 0) { - min_addr = GRANULEROUNDDOWN(memparse(cp + 9, &cp)); - } else { - while (*cp != ' ' && *cp) - ++cp; - while (*cp == ' ') - ++cp; - } - } - if (min_addr != 0UL) - printk(KERN_INFO "Ignoring memory below %lluMB\n", - min_addr >> 20); - if (max_addr != ~0UL) - printk(KERN_INFO "Ignoring memory above %lluMB\n", - max_addr >> 20); - - efi_systab = __va(ia64_boot_param->efi_systab); - - /* - * Verify the EFI Table - */ - if (efi_systab == NULL) - panic("Whoa! Can't find EFI system table.\n"); - if (efi_systab_check_header(&efi_systab->hdr)) - panic("Whoa! EFI system table signature incorrect\n"); - - efi_systab_report_header(&efi_systab->hdr, efi_systab->fw_vendor); - - palo_phys = EFI_INVALID_TABLE_ADDR; - - if (efi_config_parse_tables(__va(efi_systab->tables), - efi_systab->nr_tables, - arch_tables) != 0) - return; - - if (palo_phys != EFI_INVALID_TABLE_ADDR) - handle_palo(palo_phys); - - runtime = __va(efi_systab->runtime); - efi.get_time = phys_get_time; - efi.set_time = phys_set_time; - efi.get_wakeup_time = phys_get_wakeup_time; - efi.set_wakeup_time = phys_set_wakeup_time; - efi.get_variable = phys_get_variable; - efi.get_next_variable = phys_get_next_variable; - efi.set_variable = phys_set_variable; - efi.get_next_high_mono_count = phys_get_next_high_mono_count; - efi.reset_system = phys_reset_system; - - efi_map_start = __va(ia64_boot_param->efi_memmap); - efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size; - efi_desc_size = ia64_boot_param->efi_memdesc_size; - -#if EFI_DEBUG - /* print EFI memory map: */ - { - efi_memory_desc_t *md; - void *p; - unsigned int i; - - for (i = 0, p = efi_map_start; p < efi_map_end; - ++i, p += efi_desc_size) - { - const char *unit; - unsigned long size; - char buf[64]; - - md = p; - size = md->num_pages << EFI_PAGE_SHIFT; - - if ((size >> 40) > 0) { - size >>= 40; - unit = "TB"; - } else if ((size >> 30) > 0) { - size >>= 30; - unit = "GB"; - } else if ((size >> 20) > 0) { - size >>= 20; - unit = "MB"; - } else { - size >>= 10; - unit = "KB"; - } - - printk("mem%02d: %s " - "range=[0x%016llx-0x%016llx) (%4lu%s)\n", - i, efi_md_typeattr_format(buf, sizeof(buf), md), - md->phys_addr, - md->phys_addr + efi_md_size(md), size, unit); - } - } -#endif - - efi_map_pal_code(); - efi_enter_virtual_mode(); -} - -void -efi_enter_virtual_mode (void) -{ - void *efi_map_start, *efi_map_end, *p; - efi_memory_desc_t *md; - efi_status_t status; - u64 efi_desc_size; - - efi_map_start = __va(ia64_boot_param->efi_memmap); - efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size; - efi_desc_size = ia64_boot_param->efi_memdesc_size; - - for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) { - md = p; - if (md->attribute & EFI_MEMORY_RUNTIME) { - /* - * Some descriptors have multiple bits set, so the - * order of the tests is relevant. - */ - if (md->attribute & EFI_MEMORY_WB) { - md->virt_addr = (u64) __va(md->phys_addr); - } else if (md->attribute & EFI_MEMORY_UC) { - md->virt_addr = (u64) ioremap(md->phys_addr, 0); - } else if (md->attribute & EFI_MEMORY_WC) { -#if 0 - md->virt_addr = ia64_remap(md->phys_addr, - (_PAGE_A | - _PAGE_P | - _PAGE_D | - _PAGE_MA_WC | - _PAGE_PL_0 | - _PAGE_AR_RW)); -#else - printk(KERN_INFO "EFI_MEMORY_WC mapping\n"); - md->virt_addr = (u64) ioremap(md->phys_addr, 0); -#endif - } else if (md->attribute & EFI_MEMORY_WT) { -#if 0 - md->virt_addr = ia64_remap(md->phys_addr, - (_PAGE_A | - _PAGE_P | - _PAGE_D | - _PAGE_MA_WT | - _PAGE_PL_0 | - _PAGE_AR_RW)); -#else - printk(KERN_INFO "EFI_MEMORY_WT mapping\n"); - md->virt_addr = (u64) ioremap(md->phys_addr, 0); -#endif - } - } - } - - status = efi_call_phys(__va(runtime->set_virtual_address_map), - ia64_boot_param->efi_memmap_size, - efi_desc_size, - ia64_boot_param->efi_memdesc_version, - ia64_boot_param->efi_memmap); - if (status != EFI_SUCCESS) { - printk(KERN_WARNING "warning: unable to switch EFI into " - "virtual mode (status=%lu)\n", status); - return; - } - - set_bit(EFI_RUNTIME_SERVICES, &efi.flags); - - /* - * Now that EFI is in virtual mode, we call the EFI functions more - * efficiently: - */ - efi.get_time = virt_get_time; - efi.set_time = virt_set_time; - efi.get_wakeup_time = virt_get_wakeup_time; - efi.set_wakeup_time = virt_set_wakeup_time; - efi.get_variable = virt_get_variable; - efi.get_next_variable = virt_get_next_variable; - efi.set_variable = virt_set_variable; - efi.get_next_high_mono_count = virt_get_next_high_mono_count; - efi.reset_system = virt_reset_system; -} - -/* - * Walk the EFI memory map looking for the I/O port range. There can only be - * one entry of this type, other I/O port ranges should be described via ACPI. - */ -u64 -efi_get_iobase (void) -{ - void *efi_map_start, *efi_map_end, *p; - efi_memory_desc_t *md; - u64 efi_desc_size; - - efi_map_start = __va(ia64_boot_param->efi_memmap); - efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size; - efi_desc_size = ia64_boot_param->efi_memdesc_size; - - for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) { - md = p; - if (md->type == EFI_MEMORY_MAPPED_IO_PORT_SPACE) { - if (md->attribute & EFI_MEMORY_UC) - return md->phys_addr; - } - } - return 0; -} - -static struct kern_memdesc * -kern_memory_descriptor (unsigned long phys_addr) -{ - struct kern_memdesc *md; - - for (md = kern_memmap; md->start != ~0UL; md++) { - if (phys_addr - md->start < (md->num_pages << EFI_PAGE_SHIFT)) - return md; - } - return NULL; -} - -static efi_memory_desc_t * -efi_memory_descriptor (unsigned long phys_addr) -{ - void *efi_map_start, *efi_map_end, *p; - efi_memory_desc_t *md; - u64 efi_desc_size; - - efi_map_start = __va(ia64_boot_param->efi_memmap); - efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size; - efi_desc_size = ia64_boot_param->efi_memdesc_size; - - for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) { - md = p; - - if (phys_addr - md->phys_addr < efi_md_size(md)) - return md; - } - return NULL; -} - -static int -efi_memmap_intersects (unsigned long phys_addr, unsigned long size) -{ - void *efi_map_start, *efi_map_end, *p; - efi_memory_desc_t *md; - u64 efi_desc_size; - unsigned long end; - - efi_map_start = __va(ia64_boot_param->efi_memmap); - efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size; - efi_desc_size = ia64_boot_param->efi_memdesc_size; - - end = phys_addr + size; - - for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) { - md = p; - if (md->phys_addr < end && efi_md_end(md) > phys_addr) - return 1; - } - return 0; -} - -int -efi_mem_type (unsigned long phys_addr) -{ - efi_memory_desc_t *md = efi_memory_descriptor(phys_addr); - - if (md) - return md->type; - return -EINVAL; -} - -u64 -efi_mem_attributes (unsigned long phys_addr) -{ - efi_memory_desc_t *md = efi_memory_descriptor(phys_addr); - - if (md) - return md->attribute; - return 0; -} -EXPORT_SYMBOL(efi_mem_attributes); - -u64 -efi_mem_attribute (unsigned long phys_addr, unsigned long size) -{ - unsigned long end = phys_addr + size; - efi_memory_desc_t *md = efi_memory_descriptor(phys_addr); - u64 attr; - - if (!md) - return 0; - - /* - * EFI_MEMORY_RUNTIME is not a memory attribute; it just tells - * the kernel that firmware needs this region mapped. - */ - attr = md->attribute & ~EFI_MEMORY_RUNTIME; - do { - unsigned long md_end = efi_md_end(md); - - if (end <= md_end) - return attr; - - md = efi_memory_descriptor(md_end); - if (!md || (md->attribute & ~EFI_MEMORY_RUNTIME) != attr) - return 0; - } while (md); - return 0; /* never reached */ -} - -u64 -kern_mem_attribute (unsigned long phys_addr, unsigned long size) -{ - unsigned long end = phys_addr + size; - struct kern_memdesc *md; - u64 attr; - - /* - * This is a hack for ioremap calls before we set up kern_memmap. - * Maybe we should do efi_memmap_init() earlier instead. - */ - if (!kern_memmap) { - attr = efi_mem_attribute(phys_addr, size); - if (attr & EFI_MEMORY_WB) - return EFI_MEMORY_WB; - return 0; - } - - md = kern_memory_descriptor(phys_addr); - if (!md) - return 0; - - attr = md->attribute; - do { - unsigned long md_end = kmd_end(md); - - if (end <= md_end) - return attr; - - md = kern_memory_descriptor(md_end); - if (!md || md->attribute != attr) - return 0; - } while (md); - return 0; /* never reached */ -} - -int -valid_phys_addr_range (phys_addr_t phys_addr, unsigned long size) -{ - u64 attr; - - /* - * /dev/mem reads and writes use copy_to_user(), which implicitly - * uses a granule-sized kernel identity mapping. It's really - * only safe to do this for regions in kern_memmap. For more - * details, see Documentation/arch/ia64/aliasing.rst. - */ - attr = kern_mem_attribute(phys_addr, size); - if (attr & EFI_MEMORY_WB || attr & EFI_MEMORY_UC) - return 1; - return 0; -} - -int -valid_mmap_phys_addr_range (unsigned long pfn, unsigned long size) -{ - unsigned long phys_addr = pfn << PAGE_SHIFT; - u64 attr; - - attr = efi_mem_attribute(phys_addr, size); - - /* - * /dev/mem mmap uses normal user pages, so we don't need the entire - * granule, but the entire region we're mapping must support the same - * attribute. - */ - if (attr & EFI_MEMORY_WB || attr & EFI_MEMORY_UC) - return 1; - - /* - * Intel firmware doesn't tell us about all the MMIO regions, so - * in general we have to allow mmap requests. But if EFI *does* - * tell us about anything inside this region, we should deny it. - * The user can always map a smaller region to avoid the overlap. - */ - if (efi_memmap_intersects(phys_addr, size)) - return 0; - - return 1; -} - -pgprot_t -phys_mem_access_prot(struct file *file, unsigned long pfn, unsigned long size, - pgprot_t vma_prot) -{ - unsigned long phys_addr = pfn << PAGE_SHIFT; - u64 attr; - - /* - * For /dev/mem mmap, we use user mappings, but if the region is - * in kern_memmap (and hence may be covered by a kernel mapping), - * we must use the same attribute as the kernel mapping. - */ - attr = kern_mem_attribute(phys_addr, size); - if (attr & EFI_MEMORY_WB) - return pgprot_cacheable(vma_prot); - else if (attr & EFI_MEMORY_UC) - return pgprot_noncached(vma_prot); - - /* - * Some chipsets don't support UC access to memory. If - * WB is supported, we prefer that. - */ - if (efi_mem_attribute(phys_addr, size) & EFI_MEMORY_WB) - return pgprot_cacheable(vma_prot); - - return pgprot_noncached(vma_prot); -} - -int __init -efi_uart_console_only(void) -{ - efi_status_t status; - char *s, name[] = "ConOut"; - efi_guid_t guid = EFI_GLOBAL_VARIABLE_GUID; - efi_char16_t *utf16, name_utf16[32]; - unsigned char data[1024]; - unsigned long size = sizeof(data); - struct efi_generic_dev_path *hdr, *end_addr; - int uart = 0; - - /* Convert to UTF-16 */ - utf16 = name_utf16; - s = name; - while (*s) - *utf16++ = *s++ & 0x7f; - *utf16 = 0; - - status = efi.get_variable(name_utf16, &guid, NULL, &size, data); - if (status != EFI_SUCCESS) { - printk(KERN_ERR "No EFI %s variable?\n", name); - return 0; - } - - hdr = (struct efi_generic_dev_path *) data; - end_addr = (struct efi_generic_dev_path *) ((u8 *) data + size); - while (hdr < end_addr) { - if (hdr->type == EFI_DEV_MSG && - hdr->sub_type == EFI_DEV_MSG_UART) - uart = 1; - else if (hdr->type == EFI_DEV_END_PATH || - hdr->type == EFI_DEV_END_PATH2) { - if (!uart) - return 0; - if (hdr->sub_type == EFI_DEV_END_ENTIRE) - return 1; - uart = 0; - } - hdr = (struct efi_generic_dev_path *)((u8 *) hdr + hdr->length); - } - printk(KERN_ERR "Malformed %s value\n", name); - return 0; -} - -/* - * Look for the first granule aligned memory descriptor memory - * that is big enough to hold EFI memory map. Make sure this - * descriptor is at least granule sized so it does not get trimmed - */ -struct kern_memdesc * -find_memmap_space (void) -{ - u64 contig_low=0, contig_high=0; - u64 as = 0, ae; - void *efi_map_start, *efi_map_end, *p, *q; - efi_memory_desc_t *md, *pmd = NULL, *check_md; - u64 space_needed, efi_desc_size; - unsigned long total_mem = 0; - - efi_map_start = __va(ia64_boot_param->efi_memmap); - efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size; - efi_desc_size = ia64_boot_param->efi_memdesc_size; - - /* - * Worst case: we need 3 kernel descriptors for each efi descriptor - * (if every entry has a WB part in the middle, and UC head and tail), - * plus one for the end marker. - */ - space_needed = sizeof(kern_memdesc_t) * - (3 * (ia64_boot_param->efi_memmap_size/efi_desc_size) + 1); - - for (p = efi_map_start; p < efi_map_end; pmd = md, p += efi_desc_size) { - md = p; - if (!efi_wb(md)) { - continue; - } - if (pmd == NULL || !efi_wb(pmd) || - efi_md_end(pmd) != md->phys_addr) { - contig_low = GRANULEROUNDUP(md->phys_addr); - contig_high = efi_md_end(md); - for (q = p + efi_desc_size; q < efi_map_end; - q += efi_desc_size) { - check_md = q; - if (!efi_wb(check_md)) - break; - if (contig_high != check_md->phys_addr) - break; - contig_high = efi_md_end(check_md); - } - contig_high = GRANULEROUNDDOWN(contig_high); - } - if (!is_memory_available(md) || md->type == EFI_LOADER_DATA) - continue; - - /* Round ends inward to granule boundaries */ - as = max(contig_low, md->phys_addr); - ae = min(contig_high, efi_md_end(md)); - - /* keep within max_addr= and min_addr= command line arg */ - as = max(as, min_addr); - ae = min(ae, max_addr); - if (ae <= as) - continue; - - /* avoid going over mem= command line arg */ - if (total_mem + (ae - as) > mem_limit) - ae -= total_mem + (ae - as) - mem_limit; - - if (ae <= as) - continue; - - if (ae - as > space_needed) - break; - } - if (p >= efi_map_end) - panic("Can't allocate space for kernel memory descriptors"); - - return __va(as); -} - -/* - * Walk the EFI memory map and gather all memory available for kernel - * to use. We can allocate partial granules only if the unavailable - * parts exist, and are WB. - */ -unsigned long -efi_memmap_init(u64 *s, u64 *e) -{ - struct kern_memdesc *k, *prev = NULL; - u64 contig_low=0, contig_high=0; - u64 as, ae, lim; - void *efi_map_start, *efi_map_end, *p, *q; - efi_memory_desc_t *md, *pmd = NULL, *check_md; - u64 efi_desc_size; - unsigned long total_mem = 0; - - k = kern_memmap = find_memmap_space(); - - efi_map_start = __va(ia64_boot_param->efi_memmap); - efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size; - efi_desc_size = ia64_boot_param->efi_memdesc_size; - - for (p = efi_map_start; p < efi_map_end; pmd = md, p += efi_desc_size) { - md = p; - if (!efi_wb(md)) { - if (efi_uc(md) && - (md->type == EFI_CONVENTIONAL_MEMORY || - md->type == EFI_BOOT_SERVICES_DATA)) { - k->attribute = EFI_MEMORY_UC; - k->start = md->phys_addr; - k->num_pages = md->num_pages; - k++; - } - continue; - } - if (pmd == NULL || !efi_wb(pmd) || - efi_md_end(pmd) != md->phys_addr) { - contig_low = GRANULEROUNDUP(md->phys_addr); - contig_high = efi_md_end(md); - for (q = p + efi_desc_size; q < efi_map_end; - q += efi_desc_size) { - check_md = q; - if (!efi_wb(check_md)) - break; - if (contig_high != check_md->phys_addr) - break; - contig_high = efi_md_end(check_md); - } - contig_high = GRANULEROUNDDOWN(contig_high); - } - if (!is_memory_available(md)) - continue; - - /* - * Round ends inward to granule boundaries - * Give trimmings to uncached allocator - */ - if (md->phys_addr < contig_low) { - lim = min(efi_md_end(md), contig_low); - if (efi_uc(md)) { - if (k > kern_memmap && - (k-1)->attribute == EFI_MEMORY_UC && - kmd_end(k-1) == md->phys_addr) { - (k-1)->num_pages += - (lim - md->phys_addr) - >> EFI_PAGE_SHIFT; - } else { - k->attribute = EFI_MEMORY_UC; - k->start = md->phys_addr; - k->num_pages = (lim - md->phys_addr) - >> EFI_PAGE_SHIFT; - k++; - } - } - as = contig_low; - } else - as = md->phys_addr; - - if (efi_md_end(md) > contig_high) { - lim = max(md->phys_addr, contig_high); - if (efi_uc(md)) { - if (lim == md->phys_addr && k > kern_memmap && - (k-1)->attribute == EFI_MEMORY_UC && - kmd_end(k-1) == md->phys_addr) { - (k-1)->num_pages += md->num_pages; - } else { - k->attribute = EFI_MEMORY_UC; - k->start = lim; - k->num_pages = (efi_md_end(md) - lim) - >> EFI_PAGE_SHIFT; - k++; - } - } - ae = contig_high; - } else - ae = efi_md_end(md); - - /* keep within max_addr= and min_addr= command line arg */ - as = max(as, min_addr); - ae = min(ae, max_addr); - if (ae <= as) - continue; - - /* avoid going over mem= command line arg */ - if (total_mem + (ae - as) > mem_limit) - ae -= total_mem + (ae - as) - mem_limit; - - if (ae <= as) - continue; - if (prev && kmd_end(prev) == md->phys_addr) { - prev->num_pages += (ae - as) >> EFI_PAGE_SHIFT; - total_mem += ae - as; - continue; - } - k->attribute = EFI_MEMORY_WB; - k->start = as; - k->num_pages = (ae - as) >> EFI_PAGE_SHIFT; - total_mem += ae - as; - prev = k++; - } - k->start = ~0L; /* end-marker */ - - /* reserve the memory we are using for kern_memmap */ - *s = (u64)kern_memmap; - *e = (u64)++k; - - return total_mem; -} - -void -efi_initialize_iomem_resources(struct resource *code_resource, - struct resource *data_resource, - struct resource *bss_resource) -{ - struct resource *res; - void *efi_map_start, *efi_map_end, *p; - efi_memory_desc_t *md; - u64 efi_desc_size; - char *name; - unsigned long flags, desc; - - efi_map_start = __va(ia64_boot_param->efi_memmap); - efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size; - efi_desc_size = ia64_boot_param->efi_memdesc_size; - - res = NULL; - - for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) { - md = p; - - if (md->num_pages == 0) /* should not happen */ - continue; - - flags = IORESOURCE_MEM | IORESOURCE_BUSY; - desc = IORES_DESC_NONE; - - switch (md->type) { - - case EFI_MEMORY_MAPPED_IO: - case EFI_MEMORY_MAPPED_IO_PORT_SPACE: - continue; - - case EFI_LOADER_CODE: - case EFI_LOADER_DATA: - case EFI_BOOT_SERVICES_DATA: - case EFI_BOOT_SERVICES_CODE: - case EFI_CONVENTIONAL_MEMORY: - if (md->attribute & EFI_MEMORY_WP) { - name = "System ROM"; - flags |= IORESOURCE_READONLY; - } else if (md->attribute == EFI_MEMORY_UC) { - name = "Uncached RAM"; - } else { - name = "System RAM"; - flags |= IORESOURCE_SYSRAM; - } - break; - - case EFI_ACPI_MEMORY_NVS: - name = "ACPI Non-volatile Storage"; - desc = IORES_DESC_ACPI_NV_STORAGE; - break; - - case EFI_UNUSABLE_MEMORY: - name = "reserved"; - flags |= IORESOURCE_DISABLED; - break; - - case EFI_PERSISTENT_MEMORY: - name = "Persistent Memory"; - desc = IORES_DESC_PERSISTENT_MEMORY; - break; - - case EFI_RESERVED_TYPE: - case EFI_RUNTIME_SERVICES_CODE: - case EFI_RUNTIME_SERVICES_DATA: - case EFI_ACPI_RECLAIM_MEMORY: - default: - name = "reserved"; - break; - } - - if ((res = kzalloc(sizeof(struct resource), - GFP_KERNEL)) == NULL) { - printk(KERN_ERR - "failed to allocate resource for iomem\n"); - return; - } - - res->name = name; - res->start = md->phys_addr; - res->end = md->phys_addr + efi_md_size(md) - 1; - res->flags = flags; - res->desc = desc; - - if (insert_resource(&iomem_resource, res) < 0) - kfree(res); - else { - /* - * We don't know which region contains - * kernel data so we try it repeatedly and - * let the resource manager test it. - */ - insert_resource(res, code_resource); - insert_resource(res, data_resource); - insert_resource(res, bss_resource); -#ifdef CONFIG_KEXEC - insert_resource(res, &efi_memmap_res); - insert_resource(res, &boot_param_res); - if (crashk_res.end > crashk_res.start) - insert_resource(res, &crashk_res); -#endif - } - } -} - -#ifdef CONFIG_KEXEC -/* find a block of memory aligned to 64M exclude reserved regions - rsvd_regions are sorted - */ -unsigned long __init -kdump_find_rsvd_region (unsigned long size, struct rsvd_region *r, int n) -{ - int i; - u64 start, end; - u64 alignment = 1UL << _PAGE_SIZE_64M; - void *efi_map_start, *efi_map_end, *p; - efi_memory_desc_t *md; - u64 efi_desc_size; - - efi_map_start = __va(ia64_boot_param->efi_memmap); - efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size; - efi_desc_size = ia64_boot_param->efi_memdesc_size; - - for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) { - md = p; - if (!efi_wb(md)) - continue; - start = ALIGN(md->phys_addr, alignment); - end = efi_md_end(md); - for (i = 0; i < n; i++) { - if (__pa(r[i].start) >= start && __pa(r[i].end) < end) { - if (__pa(r[i].start) > start + size) - return start; - start = ALIGN(__pa(r[i].end), alignment); - if (i < n-1 && - __pa(r[i+1].start) < start + size) - continue; - else - break; - } - } - if (end > start + size) - return start; - } - - printk(KERN_WARNING - "Cannot reserve 0x%lx byte of memory for crashdump\n", size); - return ~0UL; -} -#endif - -#ifdef CONFIG_CRASH_DUMP -/* locate the size find a the descriptor at a certain address */ -unsigned long __init -vmcore_find_descriptor_size (unsigned long address) -{ - void *efi_map_start, *efi_map_end, *p; - efi_memory_desc_t *md; - u64 efi_desc_size; - unsigned long ret = 0; - - efi_map_start = __va(ia64_boot_param->efi_memmap); - efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size; - efi_desc_size = ia64_boot_param->efi_memdesc_size; - - for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) { - md = p; - if (efi_wb(md) && md->type == EFI_LOADER_DATA - && md->phys_addr == address) { - ret = efi_md_size(md); - break; - } - } - - if (ret == 0) - printk(KERN_WARNING "Cannot locate EFI vmcore descriptor\n"); - - return ret; -} -#endif - -char *efi_systab_show_arch(char *str) -{ - if (mps_phys != EFI_INVALID_TABLE_ADDR) - str += sprintf(str, "MPS=0x%lx\n", mps_phys); - if (hcdp_phys != EFI_INVALID_TABLE_ADDR) - str += sprintf(str, "HCDP=0x%lx\n", hcdp_phys); - return str; -} diff --git a/arch/ia64/kernel/efi_stub.S b/arch/ia64/kernel/efi_stub.S deleted file mode 100644 index 1fd61b78fb29..000000000000 --- a/arch/ia64/kernel/efi_stub.S +++ /dev/null @@ -1,87 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * EFI call stub. - * - * Copyright (C) 1999-2001 Hewlett-Packard Co - * David Mosberger - * - * This stub allows us to make EFI calls in physical mode with interrupts - * turned off. We need this because we can't call SetVirtualMap() until - * the kernel has booted far enough to allow allocation of struct vm_area_struct - * entries (which we would need to map stuff with memory attributes other - * than uncached or writeback...). Since the GetTime() service gets called - * earlier than that, we need to be able to make physical mode EFI calls from - * the kernel. - */ - -/* - * PSR settings as per SAL spec (Chapter 8 in the "IA-64 System - * Abstraction Layer Specification", revision 2.6e). Note that - * psr.dfl and psr.dfh MUST be cleared, despite what this manual says. - * Otherwise, SAL dies whenever it's trying to do an IA-32 BIOS call - * (the br.ia instruction fails unless psr.dfl and psr.dfh are - * cleared). Fortunately, SAL promises not to touch the floating - * point regs, so at least we don't have to save f2-f127. - */ -#define PSR_BITS_TO_CLEAR \ - (IA64_PSR_I | IA64_PSR_IT | IA64_PSR_DT | IA64_PSR_RT | \ - IA64_PSR_DD | IA64_PSR_SS | IA64_PSR_RI | IA64_PSR_ED | \ - IA64_PSR_DFL | IA64_PSR_DFH) - -#define PSR_BITS_TO_SET \ - (IA64_PSR_BN) - -#include -#include - -/* - * Inputs: - * in0 = address of function descriptor of EFI routine to call - * in1..in7 = arguments to routine - * - * Outputs: - * r8 = EFI_STATUS returned by called function - */ - -GLOBAL_ENTRY(efi_call_phys) - .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(8) - alloc loc1=ar.pfs,8,7,7,0 - ld8 r2=[in0],8 // load EFI function's entry point - mov loc0=rp - .body - ;; - mov loc2=gp // save global pointer - mov loc4=ar.rsc // save RSE configuration - mov ar.rsc=0 // put RSE in enforced lazy, LE mode - ;; - ld8 gp=[in0] // load EFI function's global pointer - movl r16=PSR_BITS_TO_CLEAR - mov loc3=psr // save processor status word - movl r17=PSR_BITS_TO_SET - ;; - or loc3=loc3,r17 - mov b6=r2 - ;; - andcm r16=loc3,r16 // get psr with IT, DT, and RT bits cleared - br.call.sptk.many rp=ia64_switch_mode_phys -.ret0: mov out4=in5 - mov out0=in1 - mov out1=in2 - mov out2=in3 - mov out3=in4 - mov out5=in6 - mov out6=in7 - mov loc5=r19 - mov loc6=r20 - br.call.sptk.many rp=b6 // call the EFI function -.ret1: mov ar.rsc=0 // put RSE in enforced lazy, LE mode - mov r16=loc3 - mov r19=loc5 - mov r20=loc6 - br.call.sptk.many rp=ia64_switch_mode_virt // return to virtual mode -.ret2: mov ar.rsc=loc4 // restore RSE configuration - mov ar.pfs=loc1 - mov rp=loc0 - mov gp=loc2 - br.ret.sptk.many rp -END(efi_call_phys) diff --git a/arch/ia64/kernel/elfcore.c b/arch/ia64/kernel/elfcore.c deleted file mode 100644 index 8895df121540..000000000000 --- a/arch/ia64/kernel/elfcore.c +++ /dev/null @@ -1,77 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include -#include -#include -#include - -#include - - -Elf64_Half elf_core_extra_phdrs(struct coredump_params *cprm) -{ - return GATE_EHDR->e_phnum; -} - -int elf_core_write_extra_phdrs(struct coredump_params *cprm, loff_t offset) -{ - const struct elf_phdr *const gate_phdrs = - (const struct elf_phdr *) (GATE_ADDR + GATE_EHDR->e_phoff); - int i; - Elf64_Off ofs = 0; - - for (i = 0; i < GATE_EHDR->e_phnum; ++i) { - struct elf_phdr phdr = gate_phdrs[i]; - - if (phdr.p_type == PT_LOAD) { - phdr.p_memsz = PAGE_ALIGN(phdr.p_memsz); - phdr.p_filesz = phdr.p_memsz; - if (ofs == 0) { - ofs = phdr.p_offset = offset; - offset += phdr.p_filesz; - } else { - phdr.p_offset = ofs; - } - } else { - phdr.p_offset += ofs; - } - phdr.p_paddr = 0; /* match other core phdrs */ - if (!dump_emit(cprm, &phdr, sizeof(phdr))) - return 0; - } - return 1; -} - -int elf_core_write_extra_data(struct coredump_params *cprm) -{ - const struct elf_phdr *const gate_phdrs = - (const struct elf_phdr *) (GATE_ADDR + GATE_EHDR->e_phoff); - int i; - - for (i = 0; i < GATE_EHDR->e_phnum; ++i) { - if (gate_phdrs[i].p_type == PT_LOAD) { - void *addr = (void *)gate_phdrs[i].p_vaddr; - size_t memsz = PAGE_ALIGN(gate_phdrs[i].p_memsz); - - if (!dump_emit(cprm, addr, memsz)) - return 0; - break; - } - } - return 1; -} - -size_t elf_core_extra_data_size(struct coredump_params *cprm) -{ - const struct elf_phdr *const gate_phdrs = - (const struct elf_phdr *) (GATE_ADDR + GATE_EHDR->e_phoff); - int i; - size_t size = 0; - - for (i = 0; i < GATE_EHDR->e_phnum; ++i) { - if (gate_phdrs[i].p_type == PT_LOAD) { - size += PAGE_ALIGN(gate_phdrs[i].p_memsz); - break; - } - } - return size; -} diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S deleted file mode 100644 index ac06d44b9b27..000000000000 --- a/arch/ia64/kernel/entry.S +++ /dev/null @@ -1,1427 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * arch/ia64/kernel/entry.S - * - * Kernel entry points. - * - * Copyright (C) 1998-2003, 2005 Hewlett-Packard Co - * David Mosberger-Tang - * Copyright (C) 1999, 2002-2003 - * Asit Mallick - * Don Dugger - * Suresh Siddha - * Fenghua Yu - * Copyright (C) 1999 VA Linux Systems - * Copyright (C) 1999 Walt Drummond - */ -/* - * ia64_switch_to now places correct virtual mapping in in TR2 for - * kernel stack. This allows us to handle interrupts without changing - * to physical mode. - * - * Jonathan Nicklin - * Patrick O'Rourke - * 11/07/2000 - */ -/* - * Copyright (c) 2008 Isaku Yamahata - * VA Linux Systems Japan K.K. - * pv_ops. - */ -/* - * Global (preserved) predicate usage on syscall entry/exit path: - * - * pKStk: See entry.h. - * pUStk: See entry.h. - * pSys: See entry.h. - * pNonSys: !pSys - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "minstate.h" - - /* - * execve() is special because in case of success, we need to - * setup a null register window frame. - */ -ENTRY(ia64_execve) - /* - * Allocate 8 input registers since ptrace() may clobber them - */ - .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(8) - alloc loc1=ar.pfs,8,2,3,0 - mov loc0=rp - .body - mov out0=in0 // filename - ;; // stop bit between alloc and call - mov out1=in1 // argv - mov out2=in2 // envp - br.call.sptk.many rp=sys_execve -.ret0: - cmp4.ge p6,p7=r8,r0 - mov ar.pfs=loc1 // restore ar.pfs - sxt4 r8=r8 // return 64-bit result - ;; - stf.spill [sp]=f0 - mov rp=loc0 -(p6) mov ar.pfs=r0 // clear ar.pfs on success -(p7) br.ret.sptk.many rp - - /* - * In theory, we'd have to zap this state only to prevent leaking of - * security sensitive state (e.g., if current->mm->dumpable is zero). However, - * this executes in less than 20 cycles even on Itanium, so it's not worth - * optimizing for...). - */ - mov ar.unat=0; mov ar.lc=0 - mov r4=0; mov f2=f0; mov b1=r0 - mov r5=0; mov f3=f0; mov b2=r0 - mov r6=0; mov f4=f0; mov b3=r0 - mov r7=0; mov f5=f0; mov b4=r0 - ldf.fill f12=[sp]; mov f13=f0; mov b5=r0 - ldf.fill f14=[sp]; ldf.fill f15=[sp]; mov f16=f0 - ldf.fill f17=[sp]; ldf.fill f18=[sp]; mov f19=f0 - ldf.fill f20=[sp]; ldf.fill f21=[sp]; mov f22=f0 - ldf.fill f23=[sp]; ldf.fill f24=[sp]; mov f25=f0 - ldf.fill f26=[sp]; ldf.fill f27=[sp]; mov f28=f0 - ldf.fill f29=[sp]; ldf.fill f30=[sp]; mov f31=f0 - br.ret.sptk.many rp -END(ia64_execve) - -/* - * sys_clone2(u64 flags, u64 ustack_base, u64 ustack_size, u64 parent_tidptr, u64 child_tidptr, - * u64 tls) - */ -GLOBAL_ENTRY(sys_clone2) - /* - * Allocate 8 input registers since ptrace() may clobber them - */ - .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(8) - alloc r16=ar.pfs,8,2,6,0 - DO_SAVE_SWITCH_STACK - mov loc0=rp - mov loc1=r16 // save ar.pfs across ia64_clone - .body - mov out0=in0 - mov out1=in1 - mov out2=in2 - mov out3=in3 - mov out4=in4 - mov out5=in5 - br.call.sptk.many rp=ia64_clone -.ret1: .restore sp - adds sp=IA64_SWITCH_STACK_SIZE,sp // pop the switch stack - mov ar.pfs=loc1 - mov rp=loc0 - br.ret.sptk.many rp -END(sys_clone2) - -/* - * sys_clone(u64 flags, u64 ustack_base, u64 parent_tidptr, u64 child_tidptr, u64 tls) - * Deprecated. Use sys_clone2() instead. - */ -GLOBAL_ENTRY(sys_clone) - /* - * Allocate 8 input registers since ptrace() may clobber them - */ - .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(8) - alloc r16=ar.pfs,8,2,6,0 - DO_SAVE_SWITCH_STACK - mov loc0=rp - mov loc1=r16 // save ar.pfs across ia64_clone - .body - mov out0=in0 - mov out1=in1 - mov out2=16 // stacksize (compensates for 16-byte scratch area) - mov out3=in3 - mov out4=in4 - mov out5=in5 - br.call.sptk.many rp=ia64_clone -.ret2: .restore sp - adds sp=IA64_SWITCH_STACK_SIZE,sp // pop the switch stack - mov ar.pfs=loc1 - mov rp=loc0 - br.ret.sptk.many rp -END(sys_clone) - -/* - * prev_task <- ia64_switch_to(struct task_struct *next) - * With Ingo's new scheduler, interrupts are disabled when this routine gets - * called. The code starting at .map relies on this. The rest of the code - * doesn't care about the interrupt masking status. - */ -GLOBAL_ENTRY(ia64_switch_to) - .prologue - alloc r16=ar.pfs,1,0,0,0 - DO_SAVE_SWITCH_STACK - .body - - adds r22=IA64_TASK_THREAD_KSP_OFFSET,r13 - movl r25=init_task - mov r27=IA64_KR(CURRENT_STACK) - adds r21=IA64_TASK_THREAD_KSP_OFFSET,in0 - dep r20=0,in0,61,3 // physical address of "next" - ;; - st8 [r22]=sp // save kernel stack pointer of old task - shr.u r26=r20,IA64_GRANULE_SHIFT - cmp.eq p7,p6=r25,in0 - ;; - /* - * If we've already mapped this task's page, we can skip doing it again. - */ -(p6) cmp.eq p7,p6=r26,r27 -(p6) br.cond.dpnt .map - ;; -.done: - ld8 sp=[r21] // load kernel stack pointer of new task - MOV_TO_KR(CURRENT, in0, r8, r9) // update "current" application register - mov r8=r13 // return pointer to previously running task - mov r13=in0 // set "current" pointer - ;; - DO_LOAD_SWITCH_STACK - -#ifdef CONFIG_SMP - sync.i // ensure "fc"s done by this CPU are visible on other CPUs -#endif - br.ret.sptk.many rp // boogie on out in new context - -.map: - RSM_PSR_IC(r25) // interrupts (psr.i) are already disabled here - movl r25=PAGE_KERNEL - ;; - srlz.d - or r23=r25,r20 // construct PA | page properties - mov r25=IA64_GRANULE_SHIFT<<2 - ;; - MOV_TO_ITIR(p0, r25, r8) - MOV_TO_IFA(in0, r8) // VA of next task... - ;; - mov r25=IA64_TR_CURRENT_STACK - MOV_TO_KR(CURRENT_STACK, r26, r8, r9) // remember last page we mapped... - ;; - itr.d dtr[r25]=r23 // wire in new mapping... - SSM_PSR_IC_AND_SRLZ_D(r8, r9) // reenable the psr.ic bit - br.cond.sptk .done -END(ia64_switch_to) - -/* - * Note that interrupts are enabled during save_switch_stack and load_switch_stack. This - * means that we may get an interrupt with "sp" pointing to the new kernel stack while - * ar.bspstore is still pointing to the old kernel backing store area. Since ar.rsc, - * ar.rnat, ar.bsp, and ar.bspstore are all preserved by interrupts, this is not a - * problem. Also, we don't need to specify unwind information for preserved registers - * that are not modified in save_switch_stack as the right unwind information is already - * specified at the call-site of save_switch_stack. - */ - -/* - * save_switch_stack: - * - r16 holds ar.pfs - * - b7 holds address to return to - * - rp (b0) holds return address to save - */ -GLOBAL_ENTRY(save_switch_stack) - .prologue - .altrp b7 - flushrs // flush dirty regs to backing store (must be first in insn group) - .save @priunat,r17 - mov r17=ar.unat // preserve caller's - .body -#ifdef CONFIG_ITANIUM - adds r2=16+128,sp - adds r3=16+64,sp - adds r14=SW(R4)+16,sp - ;; - st8.spill [r14]=r4,16 // spill r4 - lfetch.fault.excl.nt1 [r3],128 - ;; - lfetch.fault.excl.nt1 [r2],128 - lfetch.fault.excl.nt1 [r3],128 - ;; - lfetch.fault.excl [r2] - lfetch.fault.excl [r3] - adds r15=SW(R5)+16,sp -#else - add r2=16+3*128,sp - add r3=16,sp - add r14=SW(R4)+16,sp - ;; - st8.spill [r14]=r4,SW(R6)-SW(R4) // spill r4 and prefetch offset 0x1c0 - lfetch.fault.excl.nt1 [r3],128 // prefetch offset 0x010 - ;; - lfetch.fault.excl.nt1 [r3],128 // prefetch offset 0x090 - lfetch.fault.excl.nt1 [r2],128 // prefetch offset 0x190 - ;; - lfetch.fault.excl.nt1 [r3] // prefetch offset 0x110 - lfetch.fault.excl.nt1 [r2] // prefetch offset 0x210 - adds r15=SW(R5)+16,sp -#endif - ;; - st8.spill [r15]=r5,SW(R7)-SW(R5) // spill r5 - mov.m ar.rsc=0 // put RSE in mode: enforced lazy, little endian, pl 0 - add r2=SW(F2)+16,sp // r2 = &sw->f2 - ;; - st8.spill [r14]=r6,SW(B0)-SW(R6) // spill r6 - mov.m r18=ar.fpsr // preserve fpsr - add r3=SW(F3)+16,sp // r3 = &sw->f3 - ;; - stf.spill [r2]=f2,32 - mov.m r19=ar.rnat - mov r21=b0 - - stf.spill [r3]=f3,32 - st8.spill [r15]=r7,SW(B2)-SW(R7) // spill r7 - mov r22=b1 - ;; - // since we're done with the spills, read and save ar.unat: - mov.m r29=ar.unat - mov.m r20=ar.bspstore - mov r23=b2 - stf.spill [r2]=f4,32 - stf.spill [r3]=f5,32 - mov r24=b3 - ;; - st8 [r14]=r21,SW(B1)-SW(B0) // save b0 - st8 [r15]=r23,SW(B3)-SW(B2) // save b2 - mov r25=b4 - mov r26=b5 - ;; - st8 [r14]=r22,SW(B4)-SW(B1) // save b1 - st8 [r15]=r24,SW(AR_PFS)-SW(B3) // save b3 - mov r21=ar.lc // I-unit - stf.spill [r2]=f12,32 - stf.spill [r3]=f13,32 - ;; - st8 [r14]=r25,SW(B5)-SW(B4) // save b4 - st8 [r15]=r16,SW(AR_LC)-SW(AR_PFS) // save ar.pfs - stf.spill [r2]=f14,32 - stf.spill [r3]=f15,32 - ;; - st8 [r14]=r26 // save b5 - st8 [r15]=r21 // save ar.lc - stf.spill [r2]=f16,32 - stf.spill [r3]=f17,32 - ;; - stf.spill [r2]=f18,32 - stf.spill [r3]=f19,32 - ;; - stf.spill [r2]=f20,32 - stf.spill [r3]=f21,32 - ;; - stf.spill [r2]=f22,32 - stf.spill [r3]=f23,32 - ;; - stf.spill [r2]=f24,32 - stf.spill [r3]=f25,32 - ;; - stf.spill [r2]=f26,32 - stf.spill [r3]=f27,32 - ;; - stf.spill [r2]=f28,32 - stf.spill [r3]=f29,32 - ;; - stf.spill [r2]=f30,SW(AR_UNAT)-SW(F30) - stf.spill [r3]=f31,SW(PR)-SW(F31) - add r14=SW(CALLER_UNAT)+16,sp - ;; - st8 [r2]=r29,SW(AR_RNAT)-SW(AR_UNAT) // save ar.unat - st8 [r14]=r17,SW(AR_FPSR)-SW(CALLER_UNAT) // save caller_unat - mov r21=pr - ;; - st8 [r2]=r19,SW(AR_BSPSTORE)-SW(AR_RNAT) // save ar.rnat - st8 [r3]=r21 // save predicate registers - ;; - st8 [r2]=r20 // save ar.bspstore - st8 [r14]=r18 // save fpsr - mov ar.rsc=3 // put RSE back into eager mode, pl 0 - br.cond.sptk.many b7 -END(save_switch_stack) - -/* - * load_switch_stack: - * - "invala" MUST be done at call site (normally in DO_LOAD_SWITCH_STACK) - * - b7 holds address to return to - * - must not touch r8-r11 - */ -GLOBAL_ENTRY(load_switch_stack) - .prologue - .altrp b7 - - .body - lfetch.fault.nt1 [sp] - adds r2=SW(AR_BSPSTORE)+16,sp - adds r3=SW(AR_UNAT)+16,sp - mov ar.rsc=0 // put RSE into enforced lazy mode - adds r14=SW(CALLER_UNAT)+16,sp - adds r15=SW(AR_FPSR)+16,sp - ;; - ld8 r27=[r2],(SW(B0)-SW(AR_BSPSTORE)) // bspstore - ld8 r29=[r3],(SW(B1)-SW(AR_UNAT)) // unat - ;; - ld8 r21=[r2],16 // restore b0 - ld8 r22=[r3],16 // restore b1 - ;; - ld8 r23=[r2],16 // restore b2 - ld8 r24=[r3],16 // restore b3 - ;; - ld8 r25=[r2],16 // restore b4 - ld8 r26=[r3],16 // restore b5 - ;; - ld8 r16=[r2],(SW(PR)-SW(AR_PFS)) // ar.pfs - ld8 r17=[r3],(SW(AR_RNAT)-SW(AR_LC)) // ar.lc - ;; - ld8 r28=[r2] // restore pr - ld8 r30=[r3] // restore rnat - ;; - ld8 r18=[r14],16 // restore caller's unat - ld8 r19=[r15],24 // restore fpsr - ;; - ldf.fill f2=[r14],32 - ldf.fill f3=[r15],32 - ;; - ldf.fill f4=[r14],32 - ldf.fill f5=[r15],32 - ;; - ldf.fill f12=[r14],32 - ldf.fill f13=[r15],32 - ;; - ldf.fill f14=[r14],32 - ldf.fill f15=[r15],32 - ;; - ldf.fill f16=[r14],32 - ldf.fill f17=[r15],32 - ;; - ldf.fill f18=[r14],32 - ldf.fill f19=[r15],32 - mov b0=r21 - ;; - ldf.fill f20=[r14],32 - ldf.fill f21=[r15],32 - mov b1=r22 - ;; - ldf.fill f22=[r14],32 - ldf.fill f23=[r15],32 - mov b2=r23 - ;; - mov ar.bspstore=r27 - mov ar.unat=r29 // establish unat holding the NaT bits for r4-r7 - mov b3=r24 - ;; - ldf.fill f24=[r14],32 - ldf.fill f25=[r15],32 - mov b4=r25 - ;; - ldf.fill f26=[r14],32 - ldf.fill f27=[r15],32 - mov b5=r26 - ;; - ldf.fill f28=[r14],32 - ldf.fill f29=[r15],32 - mov ar.pfs=r16 - ;; - ldf.fill f30=[r14],32 - ldf.fill f31=[r15],24 - mov ar.lc=r17 - ;; - ld8.fill r4=[r14],16 - ld8.fill r5=[r15],16 - mov pr=r28,-1 - ;; - ld8.fill r6=[r14],16 - ld8.fill r7=[r15],16 - - mov ar.unat=r18 // restore caller's unat - mov ar.rnat=r30 // must restore after bspstore but before rsc! - mov ar.fpsr=r19 // restore fpsr - mov ar.rsc=3 // put RSE back into eager mode, pl 0 - br.cond.sptk.many b7 -END(load_switch_stack) - - /* - * Invoke a system call, but do some tracing before and after the call. - * We MUST preserve the current register frame throughout this routine - * because some system calls (such as ia64_execve) directly - * manipulate ar.pfs. - */ -GLOBAL_ENTRY(ia64_trace_syscall) - PT_REGS_UNWIND_INFO(0) - /* - * We need to preserve the scratch registers f6-f11 in case the system - * call is sigreturn. - */ - adds r16=PT(F6)+16,sp - adds r17=PT(F7)+16,sp - ;; - stf.spill [r16]=f6,32 - stf.spill [r17]=f7,32 - ;; - stf.spill [r16]=f8,32 - stf.spill [r17]=f9,32 - ;; - stf.spill [r16]=f10 - stf.spill [r17]=f11 - br.call.sptk.many rp=syscall_trace_enter // give parent a chance to catch syscall args - cmp.lt p6,p0=r8,r0 // check tracehook - adds r2=PT(R8)+16,sp // r2 = &pt_regs.r8 - adds r3=PT(R10)+16,sp // r3 = &pt_regs.r10 - mov r10=0 -(p6) br.cond.sptk strace_error // syscall failed -> - adds r16=PT(F6)+16,sp - adds r17=PT(F7)+16,sp - ;; - ldf.fill f6=[r16],32 - ldf.fill f7=[r17],32 - ;; - ldf.fill f8=[r16],32 - ldf.fill f9=[r17],32 - ;; - ldf.fill f10=[r16] - ldf.fill f11=[r17] - // the syscall number may have changed, so re-load it and re-calculate the - // syscall entry-point: - adds r15=PT(R15)+16,sp // r15 = &pt_regs.r15 (syscall #) - ;; - ld8 r15=[r15] - mov r3=NR_syscalls - 1 - ;; - adds r15=-1024,r15 - movl r16=sys_call_table - ;; - shladd r20=r15,3,r16 // r20 = sys_call_table + 8*(syscall-1024) - cmp.leu p6,p7=r15,r3 - ;; -(p6) ld8 r20=[r20] // load address of syscall entry point -(p7) movl r20=sys_ni_syscall - ;; - mov b6=r20 - br.call.sptk.many rp=b6 // do the syscall -.strace_check_retval: - cmp.lt p6,p0=r8,r0 // syscall failed? - adds r2=PT(R8)+16,sp // r2 = &pt_regs.r8 - adds r3=PT(R10)+16,sp // r3 = &pt_regs.r10 - mov r10=0 -(p6) br.cond.sptk strace_error // syscall failed -> - ;; // avoid RAW on r10 -.strace_save_retval: -.mem.offset 0,0; st8.spill [r2]=r8 // store return value in slot for r8 -.mem.offset 8,0; st8.spill [r3]=r10 // clear error indication in slot for r10 - br.call.sptk.many rp=syscall_trace_leave // give parent a chance to catch return value -.ret3: -(pUStk) cmp.eq.unc p6,p0=r0,r0 // p6 <- pUStk -(pUStk) rsm psr.i // disable interrupts - br.cond.sptk ia64_work_pending_syscall_end - -strace_error: - ld8 r3=[r2] // load pt_regs.r8 - sub r9=0,r8 // negate return value to get errno value - ;; - cmp.ne p6,p0=r3,r0 // is pt_regs.r8!=0? - adds r3=16,r2 // r3=&pt_regs.r10 - ;; -(p6) mov r10=-1 -(p6) mov r8=r9 - br.cond.sptk .strace_save_retval -END(ia64_trace_syscall) - - /* - * When traced and returning from sigreturn, we invoke syscall_trace but then - * go straight to ia64_leave_kernel rather than ia64_leave_syscall. - */ -GLOBAL_ENTRY(ia64_strace_leave_kernel) - PT_REGS_UNWIND_INFO(0) -{ /* - * Some versions of gas generate bad unwind info if the first instruction of a - * procedure doesn't go into the first slot of a bundle. This is a workaround. - */ - nop.m 0 - nop.i 0 - br.call.sptk.many rp=syscall_trace_leave // give parent a chance to catch return value -} -.ret4: br.cond.sptk ia64_leave_kernel -END(ia64_strace_leave_kernel) - -ENTRY(call_payload) - .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(0) - /* call the kernel_thread payload; fn is in r4, arg - in r5 */ - alloc loc1=ar.pfs,0,3,1,0 - mov loc0=rp - mov loc2=gp - mov out0=r5 // arg - ld8 r14 = [r4], 8 // fn.address - ;; - mov b6 = r14 - ld8 gp = [r4] // fn.gp - ;; - br.call.sptk.many rp=b6 // fn(arg) -.ret12: mov gp=loc2 - mov rp=loc0 - mov ar.pfs=loc1 - /* ... and if it has returned, we are going to userland */ - cmp.ne pKStk,pUStk=r0,r0 - br.ret.sptk.many rp -END(call_payload) - -GLOBAL_ENTRY(ia64_ret_from_clone) - PT_REGS_UNWIND_INFO(0) -{ /* - * Some versions of gas generate bad unwind info if the first instruction of a - * procedure doesn't go into the first slot of a bundle. This is a workaround. - */ - nop.m 0 - nop.i 0 - /* - * We need to call schedule_tail() to complete the scheduling process. - * Called by ia64_switch_to() after ia64_clone()->copy_thread(). r8 contains the - * address of the previously executing task. - */ - br.call.sptk.many rp=ia64_invoke_schedule_tail -} -.ret8: -(pKStk) br.call.sptk.many rp=call_payload - adds r2=TI_FLAGS+IA64_TASK_SIZE,r13 - ;; - ld4 r2=[r2] - ;; - mov r8=0 - and r2=_TIF_SYSCALL_TRACEAUDIT,r2 - ;; - cmp.ne p6,p0=r2,r0 -(p6) br.cond.spnt .strace_check_retval - ;; // added stop bits to prevent r8 dependency -END(ia64_ret_from_clone) - // fall through -GLOBAL_ENTRY(ia64_ret_from_syscall) - PT_REGS_UNWIND_INFO(0) - cmp.ge p6,p7=r8,r0 // syscall executed successfully? - adds r2=PT(R8)+16,sp // r2 = &pt_regs.r8 - mov r10=r0 // clear error indication in r10 -(p7) br.cond.spnt handle_syscall_error // handle potential syscall failure -END(ia64_ret_from_syscall) - // fall through - -/* - * ia64_leave_syscall(): Same as ia64_leave_kernel, except that it doesn't - * need to switch to bank 0 and doesn't restore the scratch registers. - * To avoid leaking kernel bits, the scratch registers are set to - * the following known-to-be-safe values: - * - * r1: restored (global pointer) - * r2: cleared - * r3: 1 (when returning to user-level) - * r8-r11: restored (syscall return value(s)) - * r12: restored (user-level stack pointer) - * r13: restored (user-level thread pointer) - * r14: set to __kernel_syscall_via_epc - * r15: restored (syscall #) - * r16-r17: cleared - * r18: user-level b6 - * r19: cleared - * r20: user-level ar.fpsr - * r21: user-level b0 - * r22: cleared - * r23: user-level ar.bspstore - * r24: user-level ar.rnat - * r25: user-level ar.unat - * r26: user-level ar.pfs - * r27: user-level ar.rsc - * r28: user-level ip - * r29: user-level psr - * r30: user-level cfm - * r31: user-level pr - * f6-f11: cleared - * pr: restored (user-level pr) - * b0: restored (user-level rp) - * b6: restored - * b7: set to __kernel_syscall_via_epc - * ar.unat: restored (user-level ar.unat) - * ar.pfs: restored (user-level ar.pfs) - * ar.rsc: restored (user-level ar.rsc) - * ar.rnat: restored (user-level ar.rnat) - * ar.bspstore: restored (user-level ar.bspstore) - * ar.fpsr: restored (user-level ar.fpsr) - * ar.ccv: cleared - * ar.csd: cleared - * ar.ssd: cleared - */ -GLOBAL_ENTRY(ia64_leave_syscall) - PT_REGS_UNWIND_INFO(0) - /* - * work.need_resched etc. mustn't get changed by this CPU before it returns to - * user- or fsys-mode, hence we disable interrupts early on. - * - * p6 controls whether current_thread_info()->flags needs to be check for - * extra work. We always check for extra work when returning to user-level. - * With CONFIG_PREEMPTION, we also check for extra work when the preempt_count - * is 0. After extra work processing has been completed, execution - * resumes at ia64_work_processed_syscall with p6 set to 1 if the extra-work-check - * needs to be redone. - */ -#ifdef CONFIG_PREEMPTION - RSM_PSR_I(p0, r2, r18) // disable interrupts - cmp.eq pLvSys,p0=r0,r0 // pLvSys=1: leave from syscall -(pKStk) adds r20=TI_PRE_COUNT+IA64_TASK_SIZE,r13 - ;; - .pred.rel.mutex pUStk,pKStk -(pKStk) ld4 r21=[r20] // r21 <- preempt_count -(pUStk) mov r21=0 // r21 <- 0 - ;; - cmp.eq p6,p0=r21,r0 // p6 <- pUStk || (preempt_count == 0) -#else /* !CONFIG_PREEMPTION */ - RSM_PSR_I(pUStk, r2, r18) - cmp.eq pLvSys,p0=r0,r0 // pLvSys=1: leave from syscall -(pUStk) cmp.eq.unc p6,p0=r0,r0 // p6 <- pUStk -#endif -.global ia64_work_processed_syscall; -ia64_work_processed_syscall: -#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE - adds r2=PT(LOADRS)+16,r12 - MOV_FROM_ITC(pUStk, p9, r22, r19) // fetch time at leave - adds r18=TI_FLAGS+IA64_TASK_SIZE,r13 - ;; -(p6) ld4 r31=[r18] // load current_thread_info()->flags - ld8 r19=[r2],PT(B6)-PT(LOADRS) // load ar.rsc value for "loadrs" - adds r3=PT(AR_BSPSTORE)+16,r12 // deferred - ;; -#else - adds r2=PT(LOADRS)+16,r12 - adds r3=PT(AR_BSPSTORE)+16,r12 - adds r18=TI_FLAGS+IA64_TASK_SIZE,r13 - ;; -(p6) ld4 r31=[r18] // load current_thread_info()->flags - ld8 r19=[r2],PT(B6)-PT(LOADRS) // load ar.rsc value for "loadrs" - nop.i 0 - ;; -#endif - mov r16=ar.bsp // M2 get existing backing store pointer - ld8 r18=[r2],PT(R9)-PT(B6) // load b6 -(p6) and r15=TIF_WORK_MASK,r31 // any work other than TIF_SYSCALL_TRACE? - ;; - ld8 r23=[r3],PT(R11)-PT(AR_BSPSTORE) // load ar.bspstore (may be garbage) -(p6) cmp4.ne.unc p6,p0=r15, r0 // any special work pending? -(p6) br.cond.spnt .work_pending_syscall - ;; - // start restoring the state saved on the kernel stack (struct pt_regs): - ld8 r9=[r2],PT(CR_IPSR)-PT(R9) - ld8 r11=[r3],PT(CR_IIP)-PT(R11) -(pNonSys) break 0 // bug check: we shouldn't be here if pNonSys is TRUE! - ;; - invala // M0|1 invalidate ALAT - RSM_PSR_I_IC(r28, r29, r30) // M2 turn off interrupts and interruption collection - cmp.eq p9,p0=r0,r0 // A set p9 to indicate that we should restore cr.ifs - - ld8 r29=[r2],16 // M0|1 load cr.ipsr - ld8 r28=[r3],16 // M0|1 load cr.iip -#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE -(pUStk) add r14=TI_AC_LEAVE+IA64_TASK_SIZE,r13 - ;; - ld8 r30=[r2],16 // M0|1 load cr.ifs - ld8 r25=[r3],16 // M0|1 load ar.unat -(pUStk) add r15=IA64_TASK_THREAD_ON_USTACK_OFFSET,r13 - ;; -#else - mov r22=r0 // A clear r22 - ;; - ld8 r30=[r2],16 // M0|1 load cr.ifs - ld8 r25=[r3],16 // M0|1 load ar.unat -(pUStk) add r14=IA64_TASK_THREAD_ON_USTACK_OFFSET,r13 - ;; -#endif - ld8 r26=[r2],PT(B0)-PT(AR_PFS) // M0|1 load ar.pfs - MOV_FROM_PSR(pKStk, r22, r21) // M2 read PSR now that interrupts are disabled - nop 0 - ;; - ld8 r21=[r2],PT(AR_RNAT)-PT(B0) // M0|1 load b0 - ld8 r27=[r3],PT(PR)-PT(AR_RSC) // M0|1 load ar.rsc - mov f6=f0 // F clear f6 - ;; - ld8 r24=[r2],PT(AR_FPSR)-PT(AR_RNAT) // M0|1 load ar.rnat (may be garbage) - ld8 r31=[r3],PT(R1)-PT(PR) // M0|1 load predicates - mov f7=f0 // F clear f7 - ;; - ld8 r20=[r2],PT(R12)-PT(AR_FPSR) // M0|1 load ar.fpsr - ld8.fill r1=[r3],16 // M0|1 load r1 -(pUStk) mov r17=1 // A - ;; -#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE -(pUStk) st1 [r15]=r17 // M2|3 -#else -(pUStk) st1 [r14]=r17 // M2|3 -#endif - ld8.fill r13=[r3],16 // M0|1 - mov f8=f0 // F clear f8 - ;; - ld8.fill r12=[r2] // M0|1 restore r12 (sp) - ld8.fill r15=[r3] // M0|1 restore r15 - mov b6=r18 // I0 restore b6 - - LOAD_PHYS_STACK_REG_SIZE(r17) - mov f9=f0 // F clear f9 -(pKStk) br.cond.dpnt.many skip_rbs_switch // B - - srlz.d // M0 ensure interruption collection is off (for cover) - shr.u r18=r19,16 // I0|1 get byte size of existing "dirty" partition - COVER // B add current frame into dirty partition & set cr.ifs - ;; -#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE - mov r19=ar.bsp // M2 get new backing store pointer - st8 [r14]=r22 // M save time at leave - mov f10=f0 // F clear f10 - - mov r22=r0 // A clear r22 - movl r14=__kernel_syscall_via_epc // X - ;; -#else - mov r19=ar.bsp // M2 get new backing store pointer - mov f10=f0 // F clear f10 - - nop.m 0 - movl r14=__kernel_syscall_via_epc // X - ;; -#endif - mov.m ar.csd=r0 // M2 clear ar.csd - mov.m ar.ccv=r0 // M2 clear ar.ccv - mov b7=r14 // I0 clear b7 (hint with __kernel_syscall_via_epc) - - mov.m ar.ssd=r0 // M2 clear ar.ssd - mov f11=f0 // F clear f11 - br.cond.sptk.many rbs_switch // B -END(ia64_leave_syscall) - -GLOBAL_ENTRY(ia64_leave_kernel) - PT_REGS_UNWIND_INFO(0) - /* - * work.need_resched etc. mustn't get changed by this CPU before it returns to - * user- or fsys-mode, hence we disable interrupts early on. - * - * p6 controls whether current_thread_info()->flags needs to be check for - * extra work. We always check for extra work when returning to user-level. - * With CONFIG_PREEMPTION, we also check for extra work when the preempt_count - * is 0. After extra work processing has been completed, execution - * resumes at .work_processed_syscall with p6 set to 1 if the extra-work-check - * needs to be redone. - */ -#ifdef CONFIG_PREEMPTION - RSM_PSR_I(p0, r17, r31) // disable interrupts - cmp.eq p0,pLvSys=r0,r0 // pLvSys=0: leave from kernel -(pKStk) adds r20=TI_PRE_COUNT+IA64_TASK_SIZE,r13 - ;; - .pred.rel.mutex pUStk,pKStk -(pKStk) ld4 r21=[r20] // r21 <- preempt_count -(pUStk) mov r21=0 // r21 <- 0 - ;; - cmp.eq p6,p0=r21,r0 // p6 <- pUStk || (preempt_count == 0) -#else - RSM_PSR_I(pUStk, r17, r31) - cmp.eq p0,pLvSys=r0,r0 // pLvSys=0: leave from kernel -(pUStk) cmp.eq.unc p6,p0=r0,r0 // p6 <- pUStk -#endif -.work_processed_kernel: - adds r17=TI_FLAGS+IA64_TASK_SIZE,r13 - ;; -(p6) ld4 r31=[r17] // load current_thread_info()->flags - adds r21=PT(PR)+16,r12 - ;; - - lfetch [r21],PT(CR_IPSR)-PT(PR) - adds r2=PT(B6)+16,r12 - adds r3=PT(R16)+16,r12 - ;; - lfetch [r21] - ld8 r28=[r2],8 // load b6 - adds r29=PT(R24)+16,r12 - - ld8.fill r16=[r3],PT(AR_CSD)-PT(R16) - adds r30=PT(AR_CCV)+16,r12 -(p6) and r19=TIF_WORK_MASK,r31 // any work other than TIF_SYSCALL_TRACE? - ;; - ld8.fill r24=[r29] - ld8 r15=[r30] // load ar.ccv -(p6) cmp4.ne.unc p6,p0=r19, r0 // any special work pending? - ;; - ld8 r29=[r2],16 // load b7 - ld8 r30=[r3],16 // load ar.csd -(p6) br.cond.spnt .work_pending - ;; - ld8 r31=[r2],16 // load ar.ssd - ld8.fill r8=[r3],16 - ;; - ld8.fill r9=[r2],16 - ld8.fill r10=[r3],PT(R17)-PT(R10) - ;; - ld8.fill r11=[r2],PT(R18)-PT(R11) - ld8.fill r17=[r3],16 - ;; - ld8.fill r18=[r2],16 - ld8.fill r19=[r3],16 - ;; - ld8.fill r20=[r2],16 - ld8.fill r21=[r3],16 - mov ar.csd=r30 - mov ar.ssd=r31 - ;; - RSM_PSR_I_IC(r23, r22, r25) // initiate turning off of interrupt and interruption collection - invala // invalidate ALAT - ;; - ld8.fill r22=[r2],24 - ld8.fill r23=[r3],24 - mov b6=r28 - ;; - ld8.fill r25=[r2],16 - ld8.fill r26=[r3],16 - mov b7=r29 - ;; - ld8.fill r27=[r2],16 - ld8.fill r28=[r3],16 - ;; - ld8.fill r29=[r2],16 - ld8.fill r30=[r3],24 - ;; - ld8.fill r31=[r2],PT(F9)-PT(R31) - adds r3=PT(F10)-PT(F6),r3 - ;; - ldf.fill f9=[r2],PT(F6)-PT(F9) - ldf.fill f10=[r3],PT(F8)-PT(F10) - ;; - ldf.fill f6=[r2],PT(F7)-PT(F6) - ;; - ldf.fill f7=[r2],PT(F11)-PT(F7) - ldf.fill f8=[r3],32 - ;; - srlz.d // ensure that inter. collection is off (VHPT is don't care, since text is pinned) - mov ar.ccv=r15 - ;; - ldf.fill f11=[r2] - BSW_0(r2, r3, r15) // switch back to bank 0 (no stop bit required beforehand...) - ;; -(pUStk) mov r18=IA64_KR(CURRENT)// M2 (12 cycle read latency) - adds r16=PT(CR_IPSR)+16,r12 - adds r17=PT(CR_IIP)+16,r12 - -#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE - .pred.rel.mutex pUStk,pKStk - MOV_FROM_PSR(pKStk, r22, r29) // M2 read PSR now that interrupts are disabled - MOV_FROM_ITC(pUStk, p9, r22, r29) // M fetch time at leave - nop.i 0 - ;; -#else - MOV_FROM_PSR(pKStk, r22, r29) // M2 read PSR now that interrupts are disabled - nop.i 0 - nop.i 0 - ;; -#endif - ld8 r29=[r16],16 // load cr.ipsr - ld8 r28=[r17],16 // load cr.iip - ;; - ld8 r30=[r16],16 // load cr.ifs - ld8 r25=[r17],16 // load ar.unat - ;; - ld8 r26=[r16],16 // load ar.pfs - ld8 r27=[r17],16 // load ar.rsc - cmp.eq p9,p0=r0,r0 // set p9 to indicate that we should restore cr.ifs - ;; - ld8 r24=[r16],16 // load ar.rnat (may be garbage) - ld8 r23=[r17],16 // load ar.bspstore (may be garbage) - ;; - ld8 r31=[r16],16 // load predicates - ld8 r21=[r17],16 // load b0 - ;; - ld8 r19=[r16],16 // load ar.rsc value for "loadrs" - ld8.fill r1=[r17],16 // load r1 - ;; - ld8.fill r12=[r16],16 - ld8.fill r13=[r17],16 -#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE -(pUStk) adds r3=TI_AC_LEAVE+IA64_TASK_SIZE,r18 -#else -(pUStk) adds r18=IA64_TASK_THREAD_ON_USTACK_OFFSET,r18 -#endif - ;; - ld8 r20=[r16],16 // ar.fpsr - ld8.fill r15=[r17],16 -#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE -(pUStk) adds r18=IA64_TASK_THREAD_ON_USTACK_OFFSET,r18 // deferred -#endif - ;; - ld8.fill r14=[r16],16 - ld8.fill r2=[r17] -(pUStk) mov r17=1 - ;; -#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE - // mmi_ : ld8 st1 shr;; mmi_ : st8 st1 shr;; - // mib : mov add br -> mib : ld8 add br - // bbb_ : br nop cover;; mbb_ : mov br cover;; - // - // no one require bsp in r16 if (pKStk) branch is selected. -(pUStk) st8 [r3]=r22 // save time at leave -(pUStk) st1 [r18]=r17 // restore current->thread.on_ustack - shr.u r18=r19,16 // get byte size of existing "dirty" partition - ;; - ld8.fill r3=[r16] // deferred - LOAD_PHYS_STACK_REG_SIZE(r17) -(pKStk) br.cond.dpnt skip_rbs_switch - mov r16=ar.bsp // get existing backing store pointer -#else - ld8.fill r3=[r16] -(pUStk) st1 [r18]=r17 // restore current->thread.on_ustack - shr.u r18=r19,16 // get byte size of existing "dirty" partition - ;; - mov r16=ar.bsp // get existing backing store pointer - LOAD_PHYS_STACK_REG_SIZE(r17) -(pKStk) br.cond.dpnt skip_rbs_switch -#endif - - /* - * Restore user backing store. - * - * NOTE: alloc, loadrs, and cover can't be predicated. - */ -(pNonSys) br.cond.dpnt dont_preserve_current_frame - COVER // add current frame into dirty partition and set cr.ifs - ;; - mov r19=ar.bsp // get new backing store pointer -rbs_switch: - sub r16=r16,r18 // krbs = old bsp - size of dirty partition - cmp.ne p9,p0=r0,r0 // clear p9 to skip restore of cr.ifs - ;; - sub r19=r19,r16 // calculate total byte size of dirty partition - add r18=64,r18 // don't force in0-in7 into memory... - ;; - shl r19=r19,16 // shift size of dirty partition into loadrs position - ;; -dont_preserve_current_frame: - /* - * To prevent leaking bits between the kernel and user-space, - * we must clear the stacked registers in the "invalid" partition here. - * Not pretty, but at least it's fast (3.34 registers/cycle on Itanium, - * 5 registers/cycle on McKinley). - */ -# define pRecurse p6 -# define pReturn p7 -#ifdef CONFIG_ITANIUM -# define Nregs 10 -#else -# define Nregs 14 -#endif - alloc loc0=ar.pfs,2,Nregs-2,2,0 - shr.u loc1=r18,9 // RNaTslots <= floor(dirtySize / (64*8)) - sub r17=r17,r18 // r17 = (physStackedSize + 8) - dirtySize - ;; - mov ar.rsc=r19 // load ar.rsc to be used for "loadrs" - shladd in0=loc1,3,r17 - mov in1=0 - ;; - TEXT_ALIGN(32) -rse_clear_invalid: -#ifdef CONFIG_ITANIUM - // cycle 0 - { .mii - alloc loc0=ar.pfs,2,Nregs-2,2,0 - cmp.lt pRecurse,p0=Nregs*8,in0 // if more than Nregs regs left to clear, (re)curse - add out0=-Nregs*8,in0 -}{ .mfb - add out1=1,in1 // increment recursion count - nop.f 0 - nop.b 0 // can't do br.call here because of alloc (WAW on CFM) - ;; -}{ .mfi // cycle 1 - mov loc1=0 - nop.f 0 - mov loc2=0 -}{ .mib - mov loc3=0 - mov loc4=0 -(pRecurse) br.call.sptk.many b0=rse_clear_invalid - -}{ .mfi // cycle 2 - mov loc5=0 - nop.f 0 - cmp.ne pReturn,p0=r0,in1 // if recursion count != 0, we need to do a br.ret -}{ .mib - mov loc6=0 - mov loc7=0 -(pReturn) br.ret.sptk.many b0 -} -#else /* !CONFIG_ITANIUM */ - alloc loc0=ar.pfs,2,Nregs-2,2,0 - cmp.lt pRecurse,p0=Nregs*8,in0 // if more than Nregs regs left to clear, (re)curse - add out0=-Nregs*8,in0 - add out1=1,in1 // increment recursion count - mov loc1=0 - mov loc2=0 - ;; - mov loc3=0 - mov loc4=0 - mov loc5=0 - mov loc6=0 - mov loc7=0 -(pRecurse) br.call.dptk.few b0=rse_clear_invalid - ;; - mov loc8=0 - mov loc9=0 - cmp.ne pReturn,p0=r0,in1 // if recursion count != 0, we need to do a br.ret - mov loc10=0 - mov loc11=0 -(pReturn) br.ret.dptk.many b0 -#endif /* !CONFIG_ITANIUM */ -# undef pRecurse -# undef pReturn - ;; - alloc r17=ar.pfs,0,0,0,0 // drop current register frame - ;; - loadrs - ;; -skip_rbs_switch: - mov ar.unat=r25 // M2 -(pKStk) extr.u r22=r22,21,1 // I0 extract current value of psr.pp from r22 -(pLvSys)mov r19=r0 // A clear r19 for leave_syscall, no-op otherwise - ;; -(pUStk) mov ar.bspstore=r23 // M2 -(pKStk) dep r29=r22,r29,21,1 // I0 update ipsr.pp with psr.pp -(pLvSys)mov r16=r0 // A clear r16 for leave_syscall, no-op otherwise - ;; - MOV_TO_IPSR(p0, r29, r25) // M2 - mov ar.pfs=r26 // I0 -(pLvSys)mov r17=r0 // A clear r17 for leave_syscall, no-op otherwise - - MOV_TO_IFS(p9, r30, r25)// M2 - mov b0=r21 // I0 -(pLvSys)mov r18=r0 // A clear r18 for leave_syscall, no-op otherwise - - mov ar.fpsr=r20 // M2 - MOV_TO_IIP(r28, r25) // M2 - nop 0 - ;; -(pUStk) mov ar.rnat=r24 // M2 must happen with RSE in lazy mode - nop 0 -(pLvSys)mov r2=r0 - - mov ar.rsc=r27 // M2 - mov pr=r31,-1 // I0 - RFI // B - - /* - * On entry: - * r20 = ¤t->thread_info->pre_count (if CONFIG_PREEMPTION) - * r31 = current->thread_info->flags - * On exit: - * p6 = TRUE if work-pending-check needs to be redone - * - * Interrupts are disabled on entry, reenabled depend on work, and - * disabled on exit. - */ -.work_pending_syscall: - add r2=-8,r2 - add r3=-8,r3 - ;; - st8 [r2]=r8 - st8 [r3]=r10 -.work_pending: - tbit.z p6,p0=r31,TIF_NEED_RESCHED // is resched not needed? -(p6) br.cond.sptk.few .notify - br.call.spnt.many rp=preempt_schedule_irq -.ret9: cmp.eq p6,p0=r0,r0 // p6 <- 1 (re-check) -(pLvSys)br.cond.sptk.few ia64_work_pending_syscall_end - br.cond.sptk.many .work_processed_kernel - -.notify: -(pUStk) br.call.spnt.many rp=notify_resume_user -.ret10: cmp.ne p6,p0=r0,r0 // p6 <- 0 (don't re-check) -(pLvSys)br.cond.sptk.few ia64_work_pending_syscall_end - br.cond.sptk.many .work_processed_kernel - -.global ia64_work_pending_syscall_end; -ia64_work_pending_syscall_end: - adds r2=PT(R8)+16,r12 - adds r3=PT(R10)+16,r12 - ;; - ld8 r8=[r2] - ld8 r10=[r3] - br.cond.sptk.many ia64_work_processed_syscall -END(ia64_leave_kernel) - -ENTRY(handle_syscall_error) - /* - * Some system calls (e.g., ptrace, mmap) can return arbitrary values which could - * lead us to mistake a negative return value as a failed syscall. Those syscall - * must deposit a non-zero value in pt_regs.r8 to indicate an error. If - * pt_regs.r8 is zero, we assume that the call completed successfully. - */ - PT_REGS_UNWIND_INFO(0) - ld8 r3=[r2] // load pt_regs.r8 - ;; - cmp.eq p6,p7=r3,r0 // is pt_regs.r8==0? - ;; -(p7) mov r10=-1 -(p7) sub r8=0,r8 // negate return value to get errno - br.cond.sptk ia64_leave_syscall -END(handle_syscall_error) - - /* - * Invoke schedule_tail(task) while preserving in0-in7, which may be needed - * in case a system call gets restarted. - */ -GLOBAL_ENTRY(ia64_invoke_schedule_tail) - .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(8) - alloc loc1=ar.pfs,8,2,1,0 - mov loc0=rp - mov out0=r8 // Address of previous task - ;; - br.call.sptk.many rp=schedule_tail -.ret11: mov ar.pfs=loc1 - mov rp=loc0 - br.ret.sptk.many rp -END(ia64_invoke_schedule_tail) - - /* - * Setup stack and call do_notify_resume_user(), keeping interrupts - * disabled. - * - * Note that pSys and pNonSys need to be set up by the caller. - * We declare 8 input registers so the system call args get preserved, - * in case we need to restart a system call. - */ -GLOBAL_ENTRY(notify_resume_user) - .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(8) - alloc loc1=ar.pfs,8,2,3,0 // preserve all eight input regs in case of syscall restart! - mov r9=ar.unat - mov loc0=rp // save return address - mov out0=0 // there is no "oldset" - adds out1=8,sp // out1=&sigscratch->ar_pfs -(pSys) mov out2=1 // out2==1 => we're in a syscall - ;; -(pNonSys) mov out2=0 // out2==0 => not a syscall - .fframe 16 - .spillsp ar.unat, 16 - st8 [sp]=r9,-16 // allocate space for ar.unat and save it - st8 [out1]=loc1,-8 // save ar.pfs, out1=&sigscratch - .body - br.call.sptk.many rp=do_notify_resume_user -.ret15: .restore sp - adds sp=16,sp // pop scratch stack space - ;; - ld8 r9=[sp] // load new unat from sigscratch->scratch_unat - mov rp=loc0 - ;; - mov ar.unat=r9 - mov ar.pfs=loc1 - br.ret.sptk.many rp -END(notify_resume_user) - -ENTRY(sys_rt_sigreturn) - PT_REGS_UNWIND_INFO(0) - /* - * Allocate 8 input registers since ptrace() may clobber them - */ - alloc r2=ar.pfs,8,0,1,0 - .prologue - PT_REGS_SAVES(16) - adds sp=-16,sp - .body - cmp.eq pNonSys,pSys=r0,r0 // sigreturn isn't a normal syscall... - ;; - /* - * leave_kernel() restores f6-f11 from pt_regs, but since the streamlined - * syscall-entry path does not save them we save them here instead. Note: we - * don't need to save any other registers that are not saved by the stream-lined - * syscall path, because restore_sigcontext() restores them. - */ - adds r16=PT(F6)+32,sp - adds r17=PT(F7)+32,sp - ;; - stf.spill [r16]=f6,32 - stf.spill [r17]=f7,32 - ;; - stf.spill [r16]=f8,32 - stf.spill [r17]=f9,32 - ;; - stf.spill [r16]=f10 - stf.spill [r17]=f11 - adds out0=16,sp // out0 = &sigscratch - br.call.sptk.many rp=ia64_rt_sigreturn -.ret19: .restore sp,0 - adds sp=16,sp - ;; - ld8 r9=[sp] // load new ar.unat - mov.sptk b7=r8,ia64_leave_kernel - ;; - mov ar.unat=r9 - br.many b7 -END(sys_rt_sigreturn) - -GLOBAL_ENTRY(ia64_prepare_handle_unaligned) - .prologue - /* - * r16 = fake ar.pfs, we simply need to make sure privilege is still 0 - */ - mov r16=r0 - DO_SAVE_SWITCH_STACK - br.call.sptk.many rp=ia64_handle_unaligned // stack frame setup in ivt -.ret21: .body - DO_LOAD_SWITCH_STACK - br.cond.sptk.many rp // goes to ia64_leave_kernel -END(ia64_prepare_handle_unaligned) - - // - // unw_init_running(void (*callback)(info, arg), void *arg) - // -# define EXTRA_FRAME_SIZE ((UNW_FRAME_INFO_SIZE+15)&~15) - -GLOBAL_ENTRY(unw_init_running) - .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(2) - alloc loc1=ar.pfs,2,3,3,0 - ;; - ld8 loc2=[in0],8 - mov loc0=rp - mov r16=loc1 - DO_SAVE_SWITCH_STACK - .body - - .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(2) - .fframe IA64_SWITCH_STACK_SIZE+EXTRA_FRAME_SIZE - SWITCH_STACK_SAVES(EXTRA_FRAME_SIZE) - adds sp=-EXTRA_FRAME_SIZE,sp - .body - ;; - adds out0=16,sp // &info - mov out1=r13 // current - adds out2=16+EXTRA_FRAME_SIZE,sp // &switch_stack - br.call.sptk.many rp=unw_init_frame_info -1: adds out0=16,sp // &info - mov b6=loc2 - mov loc2=gp // save gp across indirect function call - ;; - ld8 gp=[in0] - mov out1=in1 // arg - br.call.sptk.many rp=b6 // invoke the callback function -1: mov gp=loc2 // restore gp - - // For now, we don't allow changing registers from within - // unw_init_running; if we ever want to allow that, we'd - // have to do a load_switch_stack here: - .restore sp - adds sp=IA64_SWITCH_STACK_SIZE+EXTRA_FRAME_SIZE,sp - - mov ar.pfs=loc1 - mov rp=loc0 - br.ret.sptk.many rp -END(unw_init_running) -EXPORT_SYMBOL(unw_init_running) - -#ifdef CONFIG_FUNCTION_TRACER -#ifdef CONFIG_DYNAMIC_FTRACE -GLOBAL_ENTRY(_mcount) - br ftrace_stub -END(_mcount) -EXPORT_SYMBOL(_mcount) - -.here: - br.ret.sptk.many b0 - -GLOBAL_ENTRY(ftrace_caller) - alloc out0 = ar.pfs, 8, 0, 4, 0 - mov out3 = r0 - ;; - mov out2 = b0 - add r3 = 0x20, r3 - mov out1 = r1; - br.call.sptk.many b0 = ftrace_patch_gp - //this might be called from module, so we must patch gp -ftrace_patch_gp: - movl gp=__gp - mov b0 = r3 - ;; -.global ftrace_call; -ftrace_call: -{ - .mlx - nop.m 0x0 - movl r3 = .here;; -} - alloc loc0 = ar.pfs, 4, 4, 2, 0 - ;; - mov loc1 = b0 - mov out0 = b0 - mov loc2 = r8 - mov loc3 = r15 - ;; - adds out0 = -MCOUNT_INSN_SIZE, out0 - mov out1 = in2 - mov b6 = r3 - - br.call.sptk.many b0 = b6 - ;; - mov ar.pfs = loc0 - mov b0 = loc1 - mov r8 = loc2 - mov r15 = loc3 - br ftrace_stub - ;; -END(ftrace_caller) - -#else -GLOBAL_ENTRY(_mcount) - movl r2 = ftrace_stub - movl r3 = ftrace_trace_function;; - ld8 r3 = [r3];; - ld8 r3 = [r3];; - cmp.eq p7,p0 = r2, r3 -(p7) br.sptk.many ftrace_stub - ;; - - alloc loc0 = ar.pfs, 4, 4, 2, 0 - ;; - mov loc1 = b0 - mov out0 = b0 - mov loc2 = r8 - mov loc3 = r15 - ;; - adds out0 = -MCOUNT_INSN_SIZE, out0 - mov out1 = in2 - mov b6 = r3 - - br.call.sptk.many b0 = b6 - ;; - mov ar.pfs = loc0 - mov b0 = loc1 - mov r8 = loc2 - mov r15 = loc3 - br ftrace_stub - ;; -END(_mcount) -#endif - -GLOBAL_ENTRY(ftrace_stub) - mov r3 = b0 - movl r2 = _mcount_ret_helper - ;; - mov b6 = r2 - mov b7 = r3 - br.ret.sptk.many b6 - -_mcount_ret_helper: - mov b0 = r42 - mov r1 = r41 - mov ar.pfs = r40 - br b7 -END(ftrace_stub) - -#endif /* CONFIG_FUNCTION_TRACER */ - -#define __SYSCALL(nr, entry) data8 entry - .rodata - .align 8 - .globl sys_call_table -sys_call_table: -#include diff --git a/arch/ia64/kernel/entry.h b/arch/ia64/kernel/entry.h deleted file mode 100644 index 6463dc316263..000000000000 --- a/arch/ia64/kernel/entry.h +++ /dev/null @@ -1,83 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ - -/* - * Preserved registers that are shared between code in ivt.S and - * entry.S. Be careful not to step on these! - */ -#define PRED_LEAVE_SYSCALL 1 /* TRUE iff leave from syscall */ -#define PRED_KERNEL_STACK 2 /* returning to kernel-stacks? */ -#define PRED_USER_STACK 3 /* returning to user-stacks? */ -#define PRED_SYSCALL 4 /* inside a system call? */ -#define PRED_NON_SYSCALL 5 /* complement of PRED_SYSCALL */ - -#ifdef __ASSEMBLY__ -# define PASTE2(x,y) x##y -# define PASTE(x,y) PASTE2(x,y) - -# define pLvSys PASTE(p,PRED_LEAVE_SYSCALL) -# define pKStk PASTE(p,PRED_KERNEL_STACK) -# define pUStk PASTE(p,PRED_USER_STACK) -# define pSys PASTE(p,PRED_SYSCALL) -# define pNonSys PASTE(p,PRED_NON_SYSCALL) -#endif - -#define PT(f) (IA64_PT_REGS_##f##_OFFSET) -#define SW(f) (IA64_SWITCH_STACK_##f##_OFFSET) -#define SOS(f) (IA64_SAL_OS_STATE_##f##_OFFSET) - -#define PT_REGS_SAVES(off) \ - .unwabi 3, 'i'; \ - .fframe IA64_PT_REGS_SIZE+16+(off); \ - .spillsp rp, PT(CR_IIP)+16+(off); \ - .spillsp ar.pfs, PT(CR_IFS)+16+(off); \ - .spillsp ar.unat, PT(AR_UNAT)+16+(off); \ - .spillsp ar.fpsr, PT(AR_FPSR)+16+(off); \ - .spillsp pr, PT(PR)+16+(off); - -#define PT_REGS_UNWIND_INFO(off) \ - .prologue; \ - PT_REGS_SAVES(off); \ - .body - -#define SWITCH_STACK_SAVES(off) \ - .savesp ar.unat,SW(CALLER_UNAT)+16+(off); \ - .savesp ar.fpsr,SW(AR_FPSR)+16+(off); \ - .spillsp f2,SW(F2)+16+(off); .spillsp f3,SW(F3)+16+(off); \ - .spillsp f4,SW(F4)+16+(off); .spillsp f5,SW(F5)+16+(off); \ - .spillsp f16,SW(F16)+16+(off); .spillsp f17,SW(F17)+16+(off); \ - .spillsp f18,SW(F18)+16+(off); .spillsp f19,SW(F19)+16+(off); \ - .spillsp f20,SW(F20)+16+(off); .spillsp f21,SW(F21)+16+(off); \ - .spillsp f22,SW(F22)+16+(off); .spillsp f23,SW(F23)+16+(off); \ - .spillsp f24,SW(F24)+16+(off); .spillsp f25,SW(F25)+16+(off); \ - .spillsp f26,SW(F26)+16+(off); .spillsp f27,SW(F27)+16+(off); \ - .spillsp f28,SW(F28)+16+(off); .spillsp f29,SW(F29)+16+(off); \ - .spillsp f30,SW(F30)+16+(off); .spillsp f31,SW(F31)+16+(off); \ - .spillsp r4,SW(R4)+16+(off); .spillsp r5,SW(R5)+16+(off); \ - .spillsp r6,SW(R6)+16+(off); .spillsp r7,SW(R7)+16+(off); \ - .spillsp b0,SW(B0)+16+(off); .spillsp b1,SW(B1)+16+(off); \ - .spillsp b2,SW(B2)+16+(off); .spillsp b3,SW(B3)+16+(off); \ - .spillsp b4,SW(B4)+16+(off); .spillsp b5,SW(B5)+16+(off); \ - .spillsp ar.pfs,SW(AR_PFS)+16+(off); .spillsp ar.lc,SW(AR_LC)+16+(off); \ - .spillsp @priunat,SW(AR_UNAT)+16+(off); \ - .spillsp ar.rnat,SW(AR_RNAT)+16+(off); \ - .spillsp ar.bspstore,SW(AR_BSPSTORE)+16+(off); \ - .spillsp pr,SW(PR)+16+(off) - -#define DO_SAVE_SWITCH_STACK \ - movl r28=1f; \ - ;; \ - .fframe IA64_SWITCH_STACK_SIZE; \ - adds sp=-IA64_SWITCH_STACK_SIZE,sp; \ - mov.ret.sptk b7=r28,1f; \ - SWITCH_STACK_SAVES(0); \ - br.cond.sptk.many save_switch_stack; \ -1: - -#define DO_LOAD_SWITCH_STACK \ - movl r28=1f; \ - ;; \ - invala; \ - mov.ret.sptk b7=r28,1f; \ - br.cond.sptk.many load_switch_stack; \ -1: .restore sp; \ - adds sp=IA64_SWITCH_STACK_SIZE,sp diff --git a/arch/ia64/kernel/err_inject.c b/arch/ia64/kernel/err_inject.c deleted file mode 100644 index dd5bfed52031..000000000000 --- a/arch/ia64/kernel/err_inject.c +++ /dev/null @@ -1,273 +0,0 @@ -/* - * err_inject.c - - * 1.) Inject errors to a processor. - * 2.) Query error injection capabilities. - * This driver along with user space code can be acting as an error - * injection tool. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or - * NON INFRINGEMENT. See the GNU General Public License for more - * details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - * Written by: Fenghua Yu , Intel Corporation - * Copyright (C) 2006, Intel Corp. All rights reserved. - * - */ -#include -#include -#include -#include -#include - -#define ERR_INJ_DEBUG - -#define ERR_DATA_BUFFER_SIZE 3 // Three 8-byte; - -#define define_one_ro(name) \ -static DEVICE_ATTR(name, 0444, show_##name, NULL) - -#define define_one_rw(name) \ -static DEVICE_ATTR(name, 0644, show_##name, store_##name) - -static u64 call_start[NR_CPUS]; -static u64 phys_addr[NR_CPUS]; -static u64 err_type_info[NR_CPUS]; -static u64 err_struct_info[NR_CPUS]; -static struct { - u64 data1; - u64 data2; - u64 data3; -} __attribute__((__aligned__(16))) err_data_buffer[NR_CPUS]; -static s64 status[NR_CPUS]; -static u64 capabilities[NR_CPUS]; -static u64 resources[NR_CPUS]; - -#define show(name) \ -static ssize_t \ -show_##name(struct device *dev, struct device_attribute *attr, \ - char *buf) \ -{ \ - u32 cpu=dev->id; \ - return sprintf(buf, "%llx\n", name[cpu]); \ -} - -#define store(name) \ -static ssize_t \ -store_##name(struct device *dev, struct device_attribute *attr, \ - const char *buf, size_t size) \ -{ \ - unsigned int cpu=dev->id; \ - name[cpu] = simple_strtoull(buf, NULL, 16); \ - return size; \ -} - -show(call_start) - -/* It's user's responsibility to call the PAL procedure on a specific - * processor. The cpu number in driver is only used for storing data. - */ -static ssize_t -store_call_start(struct device *dev, struct device_attribute *attr, - const char *buf, size_t size) -{ - unsigned int cpu=dev->id; - unsigned long call_start = simple_strtoull(buf, NULL, 16); - -#ifdef ERR_INJ_DEBUG - printk(KERN_DEBUG "pal_mc_err_inject for cpu%d:\n", cpu); - printk(KERN_DEBUG "err_type_info=%llx,\n", err_type_info[cpu]); - printk(KERN_DEBUG "err_struct_info=%llx,\n", err_struct_info[cpu]); - printk(KERN_DEBUG "err_data_buffer=%llx, %llx, %llx.\n", - err_data_buffer[cpu].data1, - err_data_buffer[cpu].data2, - err_data_buffer[cpu].data3); -#endif - switch (call_start) { - case 0: /* Do nothing. */ - break; - case 1: /* Call pal_mc_error_inject in physical mode. */ - status[cpu]=ia64_pal_mc_error_inject_phys(err_type_info[cpu], - err_struct_info[cpu], - ia64_tpa(&err_data_buffer[cpu]), - &capabilities[cpu], - &resources[cpu]); - break; - case 2: /* Call pal_mc_error_inject in virtual mode. */ - status[cpu]=ia64_pal_mc_error_inject_virt(err_type_info[cpu], - err_struct_info[cpu], - ia64_tpa(&err_data_buffer[cpu]), - &capabilities[cpu], - &resources[cpu]); - break; - default: - status[cpu] = -EINVAL; - break; - } - -#ifdef ERR_INJ_DEBUG - printk(KERN_DEBUG "Returns: status=%d,\n", (int)status[cpu]); - printk(KERN_DEBUG "capabilities=%llx,\n", capabilities[cpu]); - printk(KERN_DEBUG "resources=%llx\n", resources[cpu]); -#endif - return size; -} - -show(err_type_info) -store(err_type_info) - -static ssize_t -show_virtual_to_phys(struct device *dev, struct device_attribute *attr, - char *buf) -{ - unsigned int cpu=dev->id; - return sprintf(buf, "%llx\n", phys_addr[cpu]); -} - -static ssize_t -store_virtual_to_phys(struct device *dev, struct device_attribute *attr, - const char *buf, size_t size) -{ - unsigned int cpu=dev->id; - u64 virt_addr=simple_strtoull(buf, NULL, 16); - int ret; - - ret = get_user_pages_fast(virt_addr, 1, FOLL_WRITE, NULL); - if (ret<=0) { -#ifdef ERR_INJ_DEBUG - printk("Virtual address %llx is not existing.\n", virt_addr); -#endif - return -EINVAL; - } - - phys_addr[cpu] = ia64_tpa(virt_addr); - return size; -} - -show(err_struct_info) -store(err_struct_info) - -static ssize_t -show_err_data_buffer(struct device *dev, - struct device_attribute *attr, char *buf) -{ - unsigned int cpu=dev->id; - - return sprintf(buf, "%llx, %llx, %llx\n", - err_data_buffer[cpu].data1, - err_data_buffer[cpu].data2, - err_data_buffer[cpu].data3); -} - -static ssize_t -store_err_data_buffer(struct device *dev, - struct device_attribute *attr, - const char *buf, size_t size) -{ - unsigned int cpu=dev->id; - int ret; - -#ifdef ERR_INJ_DEBUG - printk("write err_data_buffer=[%llx,%llx,%llx] on cpu%d\n", - err_data_buffer[cpu].data1, - err_data_buffer[cpu].data2, - err_data_buffer[cpu].data3, - cpu); -#endif - ret = sscanf(buf, "%llx, %llx, %llx", - &err_data_buffer[cpu].data1, - &err_data_buffer[cpu].data2, - &err_data_buffer[cpu].data3); - if (ret!=ERR_DATA_BUFFER_SIZE) - return -EINVAL; - - return size; -} - -show(status) -show(capabilities) -show(resources) - -define_one_rw(call_start); -define_one_rw(err_type_info); -define_one_rw(err_struct_info); -define_one_rw(err_data_buffer); -define_one_rw(virtual_to_phys); -define_one_ro(status); -define_one_ro(capabilities); -define_one_ro(resources); - -static struct attribute *default_attrs[] = { - &dev_attr_call_start.attr, - &dev_attr_virtual_to_phys.attr, - &dev_attr_err_type_info.attr, - &dev_attr_err_struct_info.attr, - &dev_attr_err_data_buffer.attr, - &dev_attr_status.attr, - &dev_attr_capabilities.attr, - &dev_attr_resources.attr, - NULL -}; - -static struct attribute_group err_inject_attr_group = { - .attrs = default_attrs, - .name = "err_inject" -}; -/* Add/Remove err_inject interface for CPU device */ -static int err_inject_add_dev(unsigned int cpu) -{ - struct device *sys_dev = get_cpu_device(cpu); - - return sysfs_create_group(&sys_dev->kobj, &err_inject_attr_group); -} - -static int err_inject_remove_dev(unsigned int cpu) -{ - struct device *sys_dev = get_cpu_device(cpu); - - sysfs_remove_group(&sys_dev->kobj, &err_inject_attr_group); - return 0; -} - -static enum cpuhp_state hp_online; - -static int __init err_inject_init(void) -{ - int ret; -#ifdef ERR_INJ_DEBUG - printk(KERN_INFO "Enter error injection driver.\n"); -#endif - - ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "ia64/err_inj:online", - err_inject_add_dev, err_inject_remove_dev); - if (ret >= 0) { - hp_online = ret; - ret = 0; - } - return ret; -} - -static void __exit err_inject_exit(void) -{ -#ifdef ERR_INJ_DEBUG - printk(KERN_INFO "Exit error injection driver.\n"); -#endif - cpuhp_remove_state(hp_online); -} - -module_init(err_inject_init); -module_exit(err_inject_exit); - -MODULE_AUTHOR("Fenghua Yu "); -MODULE_DESCRIPTION("MC error injection kernel sysfs interface"); -MODULE_LICENSE("GPL"); diff --git a/arch/ia64/kernel/esi.c b/arch/ia64/kernel/esi.c deleted file mode 100644 index 4df57c93e0a8..000000000000 --- a/arch/ia64/kernel/esi.c +++ /dev/null @@ -1,193 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Extensible SAL Interface (ESI) support routines. - * - * Copyright (C) 2006 Hewlett-Packard Co - * Alex Williamson - */ -#include -#include -#include -#include - -#include -#include - -MODULE_AUTHOR("Alex Williamson "); -MODULE_DESCRIPTION("Extensible SAL Interface (ESI) support"); -MODULE_LICENSE("GPL"); - -#define MODULE_NAME "esi" - -enum esi_systab_entry_type { - ESI_DESC_ENTRY_POINT = 0 -}; - -/* - * Entry type: Size: - * 0 48 - */ -#define ESI_DESC_SIZE(type) "\060"[(unsigned) (type)] - -typedef struct ia64_esi_desc_entry_point { - u8 type; - u8 reserved1[15]; - u64 esi_proc; - u64 gp; - efi_guid_t guid; -} ia64_esi_desc_entry_point_t; - -struct pdesc { - void *addr; - void *gp; -}; - -static struct ia64_sal_systab *esi_systab; - -extern unsigned long esi_phys; - -static int __init esi_init (void) -{ - struct ia64_sal_systab *systab; - char *p; - int i; - - if (esi_phys == EFI_INVALID_TABLE_ADDR) - return -ENODEV; - - systab = __va(esi_phys); - - if (strncmp(systab->signature, "ESIT", 4) != 0) { - printk(KERN_ERR "bad signature in ESI system table!"); - return -ENODEV; - } - - p = (char *) (systab + 1); - for (i = 0; i < systab->entry_count; i++) { - /* - * The first byte of each entry type contains the type - * descriptor. - */ - switch (*p) { - case ESI_DESC_ENTRY_POINT: - break; - default: - printk(KERN_WARNING "Unknown table type %d found in " - "ESI table, ignoring rest of table\n", *p); - return -ENODEV; - } - - p += ESI_DESC_SIZE(*p); - } - - esi_systab = systab; - return 0; -} - - -int ia64_esi_call (efi_guid_t guid, struct ia64_sal_retval *isrvp, - enum esi_proc_type proc_type, u64 func, - u64 arg1, u64 arg2, u64 arg3, u64 arg4, u64 arg5, u64 arg6, - u64 arg7) -{ - struct ia64_fpreg fr[6]; - unsigned long flags = 0; - int i; - char *p; - - if (!esi_systab) - return -1; - - p = (char *) (esi_systab + 1); - for (i = 0; i < esi_systab->entry_count; i++) { - if (*p == ESI_DESC_ENTRY_POINT) { - ia64_esi_desc_entry_point_t *esi = (void *)p; - if (!efi_guidcmp(guid, esi->guid)) { - ia64_sal_handler esi_proc; - struct pdesc pdesc; - - pdesc.addr = __va(esi->esi_proc); - pdesc.gp = __va(esi->gp); - - esi_proc = (ia64_sal_handler) &pdesc; - - ia64_save_scratch_fpregs(fr); - if (proc_type == ESI_PROC_SERIALIZED) - spin_lock_irqsave(&sal_lock, flags); - else if (proc_type == ESI_PROC_MP_SAFE) - local_irq_save(flags); - else - preempt_disable(); - *isrvp = (*esi_proc)(func, arg1, arg2, arg3, - arg4, arg5, arg6, arg7); - if (proc_type == ESI_PROC_SERIALIZED) - spin_unlock_irqrestore(&sal_lock, - flags); - else if (proc_type == ESI_PROC_MP_SAFE) - local_irq_restore(flags); - else - preempt_enable(); - ia64_load_scratch_fpregs(fr); - return 0; - } - } - p += ESI_DESC_SIZE(*p); - } - return -1; -} -EXPORT_SYMBOL_GPL(ia64_esi_call); - -int ia64_esi_call_phys (efi_guid_t guid, struct ia64_sal_retval *isrvp, - u64 func, u64 arg1, u64 arg2, u64 arg3, u64 arg4, - u64 arg5, u64 arg6, u64 arg7) -{ - struct ia64_fpreg fr[6]; - unsigned long flags; - u64 esi_params[8]; - char *p; - int i; - - if (!esi_systab) - return -1; - - p = (char *) (esi_systab + 1); - for (i = 0; i < esi_systab->entry_count; i++) { - if (*p == ESI_DESC_ENTRY_POINT) { - ia64_esi_desc_entry_point_t *esi = (void *)p; - if (!efi_guidcmp(guid, esi->guid)) { - ia64_sal_handler esi_proc; - struct pdesc pdesc; - - pdesc.addr = (void *)esi->esi_proc; - pdesc.gp = (void *)esi->gp; - - esi_proc = (ia64_sal_handler) &pdesc; - - esi_params[0] = func; - esi_params[1] = arg1; - esi_params[2] = arg2; - esi_params[3] = arg3; - esi_params[4] = arg4; - esi_params[5] = arg5; - esi_params[6] = arg6; - esi_params[7] = arg7; - ia64_save_scratch_fpregs(fr); - spin_lock_irqsave(&sal_lock, flags); - *isrvp = esi_call_phys(esi_proc, esi_params); - spin_unlock_irqrestore(&sal_lock, flags); - ia64_load_scratch_fpregs(fr); - return 0; - } - } - p += ESI_DESC_SIZE(*p); - } - return -1; -} -EXPORT_SYMBOL_GPL(ia64_esi_call_phys); - -static void __exit esi_exit (void) -{ -} - -module_init(esi_init); -module_exit(esi_exit); /* makes module removable... */ diff --git a/arch/ia64/kernel/esi_stub.S b/arch/ia64/kernel/esi_stub.S deleted file mode 100644 index 9928c5b2957c..000000000000 --- a/arch/ia64/kernel/esi_stub.S +++ /dev/null @@ -1,99 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * ESI call stub. - * - * Copyright (C) 2005 Hewlett-Packard Co - * Alex Williamson - * - * Based on EFI call stub by David Mosberger. The stub is virtually - * identical to the one for EFI phys-mode calls, except that ESI - * calls may have up to 8 arguments, so they get passed to this routine - * through memory. - * - * This stub allows us to make ESI calls in physical mode with interrupts - * turned off. ESI calls may not support calling from virtual mode. - * - * Google for "Extensible SAL specification" for a document describing the - * ESI standard. - */ - -/* - * PSR settings as per SAL spec (Chapter 8 in the "IA-64 System - * Abstraction Layer Specification", revision 2.6e). Note that - * psr.dfl and psr.dfh MUST be cleared, despite what this manual says. - * Otherwise, SAL dies whenever it's trying to do an IA-32 BIOS call - * (the br.ia instruction fails unless psr.dfl and psr.dfh are - * cleared). Fortunately, SAL promises not to touch the floating - * point regs, so at least we don't have to save f2-f127. - */ -#define PSR_BITS_TO_CLEAR \ - (IA64_PSR_I | IA64_PSR_IT | IA64_PSR_DT | IA64_PSR_RT | \ - IA64_PSR_DD | IA64_PSR_SS | IA64_PSR_RI | IA64_PSR_ED | \ - IA64_PSR_DFL | IA64_PSR_DFH) - -#define PSR_BITS_TO_SET \ - (IA64_PSR_BN) - -#include -#include -#include - -/* - * Inputs: - * in0 = address of function descriptor of ESI routine to call - * in1 = address of array of ESI parameters - * - * Outputs: - * r8 = result returned by called function - */ -GLOBAL_ENTRY(esi_call_phys) - .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(2) - alloc loc1=ar.pfs,2,7,8,0 - ld8 r2=[in0],8 // load ESI function's entry point - mov loc0=rp - .body - ;; - ld8 out0=[in1],8 // ESI params loaded from array - ;; // passing all as inputs doesn't work - ld8 out1=[in1],8 - ;; - ld8 out2=[in1],8 - ;; - ld8 out3=[in1],8 - ;; - ld8 out4=[in1],8 - ;; - ld8 out5=[in1],8 - ;; - ld8 out6=[in1],8 - ;; - ld8 out7=[in1] - mov loc2=gp // save global pointer - mov loc4=ar.rsc // save RSE configuration - mov ar.rsc=0 // put RSE in enforced lazy, LE mode - ;; - ld8 gp=[in0] // load ESI function's global pointer - movl r16=PSR_BITS_TO_CLEAR - mov loc3=psr // save processor status word - movl r17=PSR_BITS_TO_SET - ;; - or loc3=loc3,r17 - mov b6=r2 - ;; - andcm r16=loc3,r16 // get psr with IT, DT, and RT bits cleared - br.call.sptk.many rp=ia64_switch_mode_phys -.ret0: mov loc5=r19 // old ar.bsp - mov loc6=r20 // old sp - br.call.sptk.many rp=b6 // call the ESI function -.ret1: mov ar.rsc=0 // put RSE in enforced lazy, LE mode - mov r16=loc3 // save virtual mode psr - mov r19=loc5 // save virtual mode bspstore - mov r20=loc6 // save virtual mode sp - br.call.sptk.many rp=ia64_switch_mode_virt // return to virtual mode -.ret2: mov ar.rsc=loc4 // restore RSE configuration - mov ar.pfs=loc1 - mov rp=loc0 - mov gp=loc2 - br.ret.sptk.many rp -END(esi_call_phys) -EXPORT_SYMBOL_GPL(esi_call_phys) diff --git a/arch/ia64/kernel/fsys.S b/arch/ia64/kernel/fsys.S deleted file mode 100644 index cc4733e9990a..000000000000 --- a/arch/ia64/kernel/fsys.S +++ /dev/null @@ -1,837 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * This file contains the light-weight system call handlers (fsyscall-handlers). - * - * Copyright (C) 2003 Hewlett-Packard Co - * David Mosberger-Tang - * - * 25-Sep-03 davidm Implement fsys_rt_sigprocmask(). - * 18-Feb-03 louisk Implement fsys_gettimeofday(). - * 28-Feb-03 davidm Fixed several bugs in fsys_gettimeofday(). Tuned it some more, - * probably broke it along the way... ;-) - * 13-Jul-04 clameter Implement fsys_clock_gettime and revise fsys_gettimeofday to make - * it capable of using memory based clocks without falling back to C code. - * 08-Feb-07 Fenghua Yu Implement fsys_getcpu. - * - */ - -#include -#include -#include -#include -#include -#include -#include -#include - -#include "entry.h" -#include - -/* - * See Documentation/arch/ia64/fsys.rst for details on fsyscalls. - * - * On entry to an fsyscall handler: - * r10 = 0 (i.e., defaults to "successful syscall return") - * r11 = saved ar.pfs (a user-level value) - * r15 = system call number - * r16 = "current" task pointer (in normal kernel-mode, this is in r13) - * r32-r39 = system call arguments - * b6 = return address (a user-level value) - * ar.pfs = previous frame-state (a user-level value) - * PSR.be = cleared to zero (i.e., little-endian byte order is in effect) - * all other registers may contain values passed in from user-mode - * - * On return from an fsyscall handler: - * r11 = saved ar.pfs (as passed into the fsyscall handler) - * r15 = system call number (as passed into the fsyscall handler) - * r32-r39 = system call arguments (as passed into the fsyscall handler) - * b6 = return address (as passed into the fsyscall handler) - * ar.pfs = previous frame-state (as passed into the fsyscall handler) - */ - -ENTRY(fsys_ni_syscall) - .prologue - .altrp b6 - .body - mov r8=ENOSYS - mov r10=-1 - FSYS_RETURN -END(fsys_ni_syscall) - -ENTRY(fsys_getpid) - .prologue - .altrp b6 - .body - add r17=IA64_TASK_SIGNAL_OFFSET,r16 - ;; - ld8 r17=[r17] // r17 = current->signal - add r9=TI_FLAGS+IA64_TASK_SIZE,r16 - ;; - ld4 r9=[r9] - add r17=IA64_SIGNAL_PIDS_TGID_OFFSET,r17 - ;; - and r9=TIF_ALLWORK_MASK,r9 - ld8 r17=[r17] // r17 = current->signal->pids[PIDTYPE_TGID] - ;; - add r8=IA64_PID_LEVEL_OFFSET,r17 - ;; - ld4 r8=[r8] // r8 = pid->level - add r17=IA64_PID_UPID_OFFSET,r17 // r17 = &pid->numbers[0] - ;; - shl r8=r8,IA64_UPID_SHIFT - ;; - add r17=r17,r8 // r17 = &pid->numbers[pid->level] - ;; - ld4 r8=[r17] // r8 = pid->numbers[pid->level].nr - ;; - mov r17=0 - ;; - cmp.ne p8,p0=0,r9 -(p8) br.spnt.many fsys_fallback_syscall - FSYS_RETURN -END(fsys_getpid) - -ENTRY(fsys_set_tid_address) - .prologue - .altrp b6 - .body - add r9=TI_FLAGS+IA64_TASK_SIZE,r16 - add r17=IA64_TASK_THREAD_PID_OFFSET,r16 - ;; - ld4 r9=[r9] - tnat.z p6,p7=r32 // check argument register for being NaT - ld8 r17=[r17] // r17 = current->thread_pid - ;; - and r9=TIF_ALLWORK_MASK,r9 - add r8=IA64_PID_LEVEL_OFFSET,r17 - add r18=IA64_TASK_CLEAR_CHILD_TID_OFFSET,r16 - ;; - ld4 r8=[r8] // r8 = pid->level - add r17=IA64_PID_UPID_OFFSET,r17 // r17 = &pid->numbers[0] - ;; - shl r8=r8,IA64_UPID_SHIFT - ;; - add r17=r17,r8 // r17 = &pid->numbers[pid->level] - ;; - ld4 r8=[r17] // r8 = pid->numbers[pid->level].nr - ;; - cmp.ne p8,p0=0,r9 - mov r17=-1 - ;; -(p6) st8 [r18]=r32 -(p7) st8 [r18]=r17 -(p8) br.spnt.many fsys_fallback_syscall - ;; - mov r17=0 // i must not leak kernel bits... - mov r18=0 // i must not leak kernel bits... - FSYS_RETURN -END(fsys_set_tid_address) - -#if IA64_GTOD_SEQ_OFFSET !=0 -#error fsys_gettimeofday incompatible with changes to struct fsyscall_gtod_data_t -#endif -#if IA64_ITC_JITTER_OFFSET !=0 -#error fsys_gettimeofday incompatible with changes to struct itc_jitter_data_t -#endif -#define CLOCK_REALTIME 0 -#define CLOCK_MONOTONIC 1 -#define CLOCK_DIVIDE_BY_1000 0x4000 -#define CLOCK_ADD_MONOTONIC 0x8000 - -ENTRY(fsys_gettimeofday) - .prologue - .altrp b6 - .body - mov r31 = r32 - tnat.nz p6,p0 = r33 // guard against NaT argument -(p6) br.cond.spnt.few .fail_einval - mov r30 = CLOCK_DIVIDE_BY_1000 - ;; -.gettime: - // Register map - // Incoming r31 = pointer to address where to place result - // r30 = flags determining how time is processed - // r2,r3 = temp r4-r7 preserved - // r8 = result nanoseconds - // r9 = result seconds - // r10 = temporary storage for clock difference - // r11 = preserved: saved ar.pfs - // r12 = preserved: memory stack - // r13 = preserved: thread pointer - // r14 = address of mask / mask value - // r15 = preserved: system call number - // r16 = preserved: current task pointer - // r17 = (not used) - // r18 = (not used) - // r19 = address of itc_lastcycle - // r20 = struct fsyscall_gtod_data (= address of gtod_lock.sequence) - // r21 = address of mmio_ptr - // r22 = address of wall_time or monotonic_time - // r23 = address of shift / value - // r24 = address mult factor / cycle_last value - // r25 = itc_lastcycle value - // r26 = address clocksource cycle_last - // r27 = (not used) - // r28 = sequence number at the beginning of critical section - // r29 = address of itc_jitter - // r30 = time processing flags / memory address - // r31 = pointer to result - // Predicates - // p6,p7 short term use - // p8 = timesource ar.itc - // p9 = timesource mmio64 - // p10 = timesource mmio32 - not used - // p11 = timesource not to be handled by asm code - // p12 = memory time source ( = p9 | p10) - not used - // p13 = do cmpxchg with itc_lastcycle - // p14 = Divide by 1000 - // p15 = Add monotonic - // - // Note that instructions are optimized for McKinley. McKinley can - // process two bundles simultaneously and therefore we continuously - // try to feed the CPU two bundles and then a stop. - - add r2 = TI_FLAGS+IA64_TASK_SIZE,r16 - tnat.nz p6,p0 = r31 // guard against Nat argument -(p6) br.cond.spnt.few .fail_einval - movl r20 = fsyscall_gtod_data // load fsyscall gettimeofday data address - ;; - ld4 r2 = [r2] // process work pending flags - movl r29 = itc_jitter_data // itc_jitter - add r22 = IA64_GTOD_WALL_TIME_OFFSET,r20 // wall_time - add r21 = IA64_CLKSRC_MMIO_OFFSET,r20 - mov pr = r30,0xc000 // Set predicates according to function - ;; - and r2 = TIF_ALLWORK_MASK,r2 - add r19 = IA64_ITC_LASTCYCLE_OFFSET,r29 -(p15) add r22 = IA64_GTOD_MONO_TIME_OFFSET,r20 // monotonic_time - ;; - add r26 = IA64_CLKSRC_CYCLE_LAST_OFFSET,r20 // clksrc_cycle_last - cmp.ne p6, p0 = 0, r2 // Fallback if work is scheduled -(p6) br.cond.spnt.many fsys_fallback_syscall - ;; - // Begin critical section -.time_redo: - ld4.acq r28 = [r20] // gtod_lock.sequence, Must take first - ;; - and r28 = ~1,r28 // And make sequence even to force retry if odd - ;; - ld8 r30 = [r21] // clocksource->mmio_ptr - add r24 = IA64_CLKSRC_MULT_OFFSET,r20 - ld4 r2 = [r29] // itc_jitter value - add r23 = IA64_CLKSRC_SHIFT_OFFSET,r20 - add r14 = IA64_CLKSRC_MASK_OFFSET,r20 - ;; - ld4 r3 = [r24] // clocksource mult value - ld8 r14 = [r14] // clocksource mask value - cmp.eq p8,p9 = 0,r30 // use cpu timer if no mmio_ptr - ;; - setf.sig f7 = r3 // Setup for mult scaling of counter -(p8) cmp.ne p13,p0 = r2,r0 // need itc_jitter compensation, set p13 - ld4 r23 = [r23] // clocksource shift value - ld8 r24 = [r26] // get clksrc_cycle_last value -(p9) cmp.eq p13,p0 = 0,r30 // if mmio_ptr, clear p13 jitter control - ;; - .pred.rel.mutex p8,p9 - MOV_FROM_ITC(p8, p6, r2, r10) // CPU_TIMER. 36 clocks latency!!! -(p9) ld8 r2 = [r30] // MMIO_TIMER. Could also have latency issues.. -(p13) ld8 r25 = [r19] // get itc_lastcycle value - ld8 r9 = [r22],IA64_TIME_SN_SPEC_SNSEC_OFFSET // sec - ;; - ld8 r8 = [r22],-IA64_TIME_SN_SPEC_SNSEC_OFFSET // snsec -(p13) sub r3 = r25,r2 // Diff needed before comparison (thanks davidm) - ;; -(p13) cmp.gt.unc p6,p7 = r3,r0 // check if it is less than last. p6,p7 cleared - sub r10 = r2,r24 // current_cycle - last_cycle - ;; -(p6) sub r10 = r25,r24 // time we got was less than last_cycle -(p7) mov ar.ccv = r25 // more than last_cycle. Prep for cmpxchg - ;; -(p7) cmpxchg8.rel r3 = [r19],r2,ar.ccv - ;; -(p7) cmp.ne p7,p0 = r25,r3 // if cmpxchg not successful - ;; -(p7) sub r10 = r3,r24 // then use new last_cycle instead - ;; - and r10 = r10,r14 // Apply mask - ;; - setf.sig f8 = r10 - nop.i 123 - ;; - // fault check takes 5 cycles and we have spare time -EX(.fail_efault, probe.w.fault r31, 3) - xmpy.l f8 = f8,f7 // nsec_per_cyc*(counter-last_counter) - ;; - getf.sig r2 = f8 - mf - ;; - ld4 r10 = [r20] // gtod_lock.sequence - add r8 = r8,r2 // Add xtime.nsecs - ;; - shr.u r8 = r8,r23 // shift by factor - cmp4.ne p7,p0 = r28,r10 -(p7) br.cond.dpnt.few .time_redo // sequence number changed, redo - // End critical section. - // Now r8=tv->tv_nsec and r9=tv->tv_sec - mov r10 = r0 - movl r2 = 1000000000 - add r23 = IA64_TIMESPEC_TV_NSEC_OFFSET, r31 -(p14) movl r3 = 2361183241434822607 // Prep for / 1000 hack - ;; -.time_normalize: - mov r21 = r8 - cmp.ge p6,p0 = r8,r2 -(p14) shr.u r20 = r8, 3 // We can repeat this if necessary just wasting time - ;; -(p14) setf.sig f8 = r20 -(p6) sub r8 = r8,r2 -(p6) add r9 = 1,r9 // two nops before the branch. -(p14) setf.sig f7 = r3 // Chances for repeats are 1 in 10000 for gettod -(p6) br.cond.dpnt.few .time_normalize - ;; - // Divided by 8 though shift. Now divide by 125 - // The compiler was able to do that with a multiply - // and a shift and we do the same -EX(.fail_efault, probe.w.fault r23, 3) // This also costs 5 cycles -(p14) xmpy.hu f8 = f8, f7 // xmpy has 5 cycles latency so use it - ;; -(p14) getf.sig r2 = f8 - ;; - mov r8 = r0 -(p14) shr.u r21 = r2, 4 - ;; -EX(.fail_efault, st8 [r31] = r9) -EX(.fail_efault, st8 [r23] = r21) - FSYS_RETURN -.fail_einval: - mov r8 = EINVAL - mov r10 = -1 - FSYS_RETURN -.fail_efault: - mov r8 = EFAULT - mov r10 = -1 - FSYS_RETURN -END(fsys_gettimeofday) - -ENTRY(fsys_clock_gettime) - .prologue - .altrp b6 - .body - cmp4.ltu p6, p0 = CLOCK_MONOTONIC, r32 - // Fallback if this is not CLOCK_REALTIME or CLOCK_MONOTONIC -(p6) br.spnt.few fsys_fallback_syscall - mov r31 = r33 - shl r30 = r32,15 - br.many .gettime -END(fsys_clock_gettime) - -/* - * fsys_getcpu doesn't use the third parameter in this implementation. It reads - * current_thread_info()->cpu and corresponding node in cpu_to_node_map. - */ -ENTRY(fsys_getcpu) - .prologue - .altrp b6 - .body - ;; - add r2=TI_FLAGS+IA64_TASK_SIZE,r16 - tnat.nz p6,p0 = r32 // guard against NaT argument - add r3=TI_CPU+IA64_TASK_SIZE,r16 - ;; - ld4 r3=[r3] // M r3 = thread_info->cpu - ld4 r2=[r2] // M r2 = thread_info->flags -(p6) br.cond.spnt.few .fail_einval // B - ;; - tnat.nz p7,p0 = r33 // I guard against NaT argument -(p7) br.cond.spnt.few .fail_einval // B - ;; - cmp.ne p6,p0=r32,r0 - cmp.ne p7,p0=r33,r0 - ;; -#ifdef CONFIG_NUMA - movl r17=cpu_to_node_map - ;; -EX(.fail_efault, (p6) probe.w.fault r32, 3) // M This takes 5 cycles -EX(.fail_efault, (p7) probe.w.fault r33, 3) // M This takes 5 cycles - shladd r18=r3,1,r17 - ;; - ld2 r20=[r18] // r20 = cpu_to_node_map[cpu] - and r2 = TIF_ALLWORK_MASK,r2 - ;; - cmp.ne p8,p0=0,r2 -(p8) br.spnt.many fsys_fallback_syscall - ;; - ;; -EX(.fail_efault, (p6) st4 [r32] = r3) -EX(.fail_efault, (p7) st2 [r33] = r20) - mov r8=0 - ;; -#else -EX(.fail_efault, (p6) probe.w.fault r32, 3) // M This takes 5 cycles -EX(.fail_efault, (p7) probe.w.fault r33, 3) // M This takes 5 cycles - and r2 = TIF_ALLWORK_MASK,r2 - ;; - cmp.ne p8,p0=0,r2 -(p8) br.spnt.many fsys_fallback_syscall - ;; -EX(.fail_efault, (p6) st4 [r32] = r3) -EX(.fail_efault, (p7) st2 [r33] = r0) - mov r8=0 - ;; -#endif - FSYS_RETURN -END(fsys_getcpu) - -ENTRY(fsys_fallback_syscall) - .prologue - .altrp b6 - .body - /* - * We only get here from light-weight syscall handlers. Thus, we already - * know that r15 contains a valid syscall number. No need to re-check. - */ - adds r17=-1024,r15 - movl r14=sys_call_table - ;; - RSM_PSR_I(p0, r26, r27) - shladd r18=r17,3,r14 - ;; - ld8 r18=[r18] // load normal (heavy-weight) syscall entry-point - MOV_FROM_PSR(p0, r29, r26) // read psr (12 cyc load latency) - mov r27=ar.rsc - mov r21=ar.fpsr - mov r26=ar.pfs -END(fsys_fallback_syscall) - /* FALL THROUGH */ -GLOBAL_ENTRY(fsys_bubble_down) - .prologue - .altrp b6 - .body - /* - * We get here for syscalls that don't have a lightweight - * handler. For those, we need to bubble down into the kernel - * and that requires setting up a minimal pt_regs structure, - * and initializing the CPU state more or less as if an - * interruption had occurred. To make syscall-restarts work, - * we setup pt_regs such that cr_iip points to the second - * instruction in syscall_via_break. Decrementing the IP - * hence will restart the syscall via break and not - * decrementing IP will return us to the caller, as usual. - * Note that we preserve the value of psr.pp rather than - * initializing it from dcr.pp. This makes it possible to - * distinguish fsyscall execution from other privileged - * execution. - * - * On entry: - * - normal fsyscall handler register usage, except - * that we also have: - * - r18: address of syscall entry point - * - r21: ar.fpsr - * - r26: ar.pfs - * - r27: ar.rsc - * - r29: psr - * - * We used to clear some PSR bits here but that requires slow - * serialization. Fortunately, that isn't really necessary. - * The rationale is as follows: we used to clear bits - * ~PSR_PRESERVED_BITS in PSR.L. Since - * PSR_PRESERVED_BITS==PSR.{UP,MFL,MFH,PK,DT,PP,SP,RT,IC}, we - * ended up clearing PSR.{BE,AC,I,DFL,DFH,DI,DB,SI,TB}. - * However, - * - * PSR.BE : already is turned off in __kernel_syscall_via_epc() - * PSR.AC : don't care (kernel normally turns PSR.AC on) - * PSR.I : already turned off by the time fsys_bubble_down gets - * invoked - * PSR.DFL: always 0 (kernel never turns it on) - * PSR.DFH: don't care --- kernel never touches f32-f127 on its own - * initiative - * PSR.DI : always 0 (kernel never turns it on) - * PSR.SI : always 0 (kernel never turns it on) - * PSR.DB : don't care --- kernel never enables kernel-level - * breakpoints - * PSR.TB : must be 0 already; if it wasn't zero on entry to - * __kernel_syscall_via_epc, the branch to fsys_bubble_down - * will trigger a taken branch; the taken-trap-handler then - * converts the syscall into a break-based system-call. - */ - /* - * Reading psr.l gives us only bits 0-31, psr.it, and psr.mc. - * The rest we have to synthesize. - */ -# define PSR_ONE_BITS ((3 << IA64_PSR_CPL0_BIT) \ - | (0x1 << IA64_PSR_RI_BIT) \ - | IA64_PSR_BN | IA64_PSR_I) - - invala // M0|1 - movl r14=ia64_ret_from_syscall // X - - nop.m 0 - movl r28=__kernel_syscall_via_break // X create cr.iip - ;; - - mov r2=r16 // A get task addr to addl-addressable register - adds r16=IA64_TASK_THREAD_ON_USTACK_OFFSET,r16 // A - mov r31=pr // I0 save pr (2 cyc) - ;; - st1 [r16]=r0 // M2|3 clear current->thread.on_ustack flag - addl r22=IA64_RBS_OFFSET,r2 // A compute base of RBS - add r3=TI_FLAGS+IA64_TASK_SIZE,r2 // A - ;; - ld4 r3=[r3] // M0|1 r3 = current_thread_info()->flags - lfetch.fault.excl.nt1 [r22] // M0|1 prefetch register backing-store - nop.i 0 - ;; - mov ar.rsc=0 // M2 set enforced lazy mode, pl 0, LE, loadrs=0 -#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE - MOV_FROM_ITC(p0, p6, r30, r23) // M get cycle for accounting -#else - nop.m 0 -#endif - nop.i 0 - ;; - mov r23=ar.bspstore // M2 (12 cyc) save ar.bspstore - mov.m r24=ar.rnat // M2 (5 cyc) read ar.rnat (dual-issues!) - nop.i 0 - ;; - mov ar.bspstore=r22 // M2 (6 cyc) switch to kernel RBS - movl r8=PSR_ONE_BITS // X - ;; - mov r25=ar.unat // M2 (5 cyc) save ar.unat - mov r19=b6 // I0 save b6 (2 cyc) - mov r20=r1 // A save caller's gp in r20 - ;; - or r29=r8,r29 // A construct cr.ipsr value to save - mov b6=r18 // I0 copy syscall entry-point to b6 (7 cyc) - addl r1=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r2 // A compute base of memory stack - - mov r18=ar.bsp // M2 save (kernel) ar.bsp (12 cyc) - cmp.ne pKStk,pUStk=r0,r0 // A set pKStk <- 0, pUStk <- 1 - br.call.sptk.many b7=ia64_syscall_setup // B - ;; -#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE - // mov.m r30=ar.itc is called in advance - add r16=TI_AC_STAMP+IA64_TASK_SIZE,r2 - add r17=TI_AC_LEAVE+IA64_TASK_SIZE,r2 - ;; - ld8 r18=[r16],TI_AC_STIME-TI_AC_STAMP // time at last check in kernel - ld8 r19=[r17],TI_AC_UTIME-TI_AC_LEAVE // time at leave kernel - ;; - ld8 r20=[r16],TI_AC_STAMP-TI_AC_STIME // cumulated stime - ld8 r21=[r17] // cumulated utime - sub r22=r19,r18 // stime before leave kernel - ;; - st8 [r16]=r30,TI_AC_STIME-TI_AC_STAMP // update stamp - sub r18=r30,r19 // elapsed time in user mode - ;; - add r20=r20,r22 // sum stime - add r21=r21,r18 // sum utime - ;; - st8 [r16]=r20 // update stime - st8 [r17]=r21 // update utime - ;; -#endif - mov ar.rsc=0x3 // M2 set eager mode, pl 0, LE, loadrs=0 - mov rp=r14 // I0 set the real return addr - and r3=_TIF_SYSCALL_TRACEAUDIT,r3 // A - ;; - SSM_PSR_I(p0, p6, r22) // M2 we're on kernel stacks now, reenable irqs - cmp.eq p8,p0=r3,r0 // A -(p10) br.cond.spnt.many ia64_ret_from_syscall // B return if bad call-frame or r15 is a NaT - - nop.m 0 -(p8) br.call.sptk.many b6=b6 // B (ignore return address) - br.cond.spnt ia64_trace_syscall // B -END(fsys_bubble_down) - - .rodata - .align 8 - .globl fsyscall_table - - data8 fsys_bubble_down -fsyscall_table: - data8 fsys_ni_syscall - data8 0 // exit // 1025 - data8 0 // read - data8 0 // write - data8 0 // open - data8 0 // close - data8 0 // creat // 1030 - data8 0 // link - data8 0 // unlink - data8 0 // execve - data8 0 // chdir - data8 0 // fchdir // 1035 - data8 0 // utimes - data8 0 // mknod - data8 0 // chmod - data8 0 // chown - data8 0 // lseek // 1040 - data8 fsys_getpid // getpid - data8 0 // getppid - data8 0 // mount - data8 0 // umount - data8 0 // setuid // 1045 - data8 0 // getuid - data8 0 // geteuid - data8 0 // ptrace - data8 0 // access - data8 0 // sync // 1050 - data8 0 // fsync - data8 0 // fdatasync - data8 0 // kill - data8 0 // rename - data8 0 // mkdir // 1055 - data8 0 // rmdir - data8 0 // dup - data8 0 // pipe - data8 0 // times - data8 0 // brk // 1060 - data8 0 // setgid - data8 0 // getgid - data8 0 // getegid - data8 0 // acct - data8 0 // ioctl // 1065 - data8 0 // fcntl - data8 0 // umask - data8 0 // chroot - data8 0 // ustat - data8 0 // dup2 // 1070 - data8 0 // setreuid - data8 0 // setregid - data8 0 // getresuid - data8 0 // setresuid - data8 0 // getresgid // 1075 - data8 0 // setresgid - data8 0 // getgroups - data8 0 // setgroups - data8 0 // getpgid - data8 0 // setpgid // 1080 - data8 0 // setsid - data8 0 // getsid - data8 0 // sethostname - data8 0 // setrlimit - data8 0 // getrlimit // 1085 - data8 0 // getrusage - data8 fsys_gettimeofday // gettimeofday - data8 0 // settimeofday - data8 0 // select - data8 0 // poll // 1090 - data8 0 // symlink - data8 0 // readlink - data8 0 // uselib - data8 0 // swapon - data8 0 // swapoff // 1095 - data8 0 // reboot - data8 0 // truncate - data8 0 // ftruncate - data8 0 // fchmod - data8 0 // fchown // 1100 - data8 0 // getpriority - data8 0 // setpriority - data8 0 // statfs - data8 0 // fstatfs - data8 0 // gettid // 1105 - data8 0 // semget - data8 0 // semop - data8 0 // semctl - data8 0 // msgget - data8 0 // msgsnd // 1110 - data8 0 // msgrcv - data8 0 // msgctl - data8 0 // shmget - data8 0 // shmat - data8 0 // shmdt // 1115 - data8 0 // shmctl - data8 0 // syslog - data8 0 // setitimer - data8 0 // getitimer - data8 0 // 1120 - data8 0 - data8 0 - data8 0 // vhangup - data8 0 // lchown - data8 0 // remap_file_pages // 1125 - data8 0 // wait4 - data8 0 // sysinfo - data8 0 // clone - data8 0 // setdomainname - data8 0 // newuname // 1130 - data8 0 // adjtimex - data8 0 - data8 0 // init_module - data8 0 // delete_module - data8 0 // 1135 - data8 0 - data8 0 // quotactl - data8 0 // bdflush - data8 0 // sysfs - data8 0 // personality // 1140 - data8 0 // afs_syscall - data8 0 // setfsuid - data8 0 // setfsgid - data8 0 // getdents - data8 0 // flock // 1145 - data8 0 // readv - data8 0 // writev - data8 0 // pread64 - data8 0 // pwrite64 - data8 0 // sysctl // 1150 - data8 0 // mmap - data8 0 // munmap - data8 0 // mlock - data8 0 // mlockall - data8 0 // mprotect // 1155 - data8 0 // mremap - data8 0 // msync - data8 0 // munlock - data8 0 // munlockall - data8 0 // sched_getparam // 1160 - data8 0 // sched_setparam - data8 0 // sched_getscheduler - data8 0 // sched_setscheduler - data8 0 // sched_yield - data8 0 // sched_get_priority_max // 1165 - data8 0 // sched_get_priority_min - data8 0 // sched_rr_get_interval - data8 0 // nanosleep - data8 0 // nfsservctl - data8 0 // prctl // 1170 - data8 0 // getpagesize - data8 0 // mmap2 - data8 0 // pciconfig_read - data8 0 // pciconfig_write - data8 0 // perfmonctl // 1175 - data8 0 // sigaltstack - data8 0 // rt_sigaction - data8 0 // rt_sigpending - data8 0 // rt_sigprocmask - data8 0 // rt_sigqueueinfo // 1180 - data8 0 // rt_sigreturn - data8 0 // rt_sigsuspend - data8 0 // rt_sigtimedwait - data8 0 // getcwd - data8 0 // capget // 1185 - data8 0 // capset - data8 0 // sendfile - data8 0 - data8 0 - data8 0 // socket // 1190 - data8 0 // bind - data8 0 // connect - data8 0 // listen - data8 0 // accept - data8 0 // getsockname // 1195 - data8 0 // getpeername - data8 0 // socketpair - data8 0 // send - data8 0 // sendto - data8 0 // recv // 1200 - data8 0 // recvfrom - data8 0 // shutdown - data8 0 // setsockopt - data8 0 // getsockopt - data8 0 // sendmsg // 1205 - data8 0 // recvmsg - data8 0 // pivot_root - data8 0 // mincore - data8 0 // madvise - data8 0 // newstat // 1210 - data8 0 // newlstat - data8 0 // newfstat - data8 0 // clone2 - data8 0 // getdents64 - data8 0 // getunwind // 1215 - data8 0 // readahead - data8 0 // setxattr - data8 0 // lsetxattr - data8 0 // fsetxattr - data8 0 // getxattr // 1220 - data8 0 // lgetxattr - data8 0 // fgetxattr - data8 0 // listxattr - data8 0 // llistxattr - data8 0 // flistxattr // 1225 - data8 0 // removexattr - data8 0 // lremovexattr - data8 0 // fremovexattr - data8 0 // tkill - data8 0 // futex // 1230 - data8 0 // sched_setaffinity - data8 0 // sched_getaffinity - data8 fsys_set_tid_address // set_tid_address - data8 0 // fadvise64_64 - data8 0 // tgkill // 1235 - data8 0 // exit_group - data8 0 // lookup_dcookie - data8 0 // io_setup - data8 0 // io_destroy - data8 0 // io_getevents // 1240 - data8 0 // io_submit - data8 0 // io_cancel - data8 0 // epoll_create - data8 0 // epoll_ctl - data8 0 // epoll_wait // 1245 - data8 0 // restart_syscall - data8 0 // semtimedop - data8 0 // timer_create - data8 0 // timer_settime - data8 0 // timer_gettime // 1250 - data8 0 // timer_getoverrun - data8 0 // timer_delete - data8 0 // clock_settime - data8 fsys_clock_gettime // clock_gettime - data8 0 // clock_getres // 1255 - data8 0 // clock_nanosleep - data8 0 // fstatfs64 - data8 0 // statfs64 - data8 0 // mbind - data8 0 // get_mempolicy // 1260 - data8 0 // set_mempolicy - data8 0 // mq_open - data8 0 // mq_unlink - data8 0 // mq_timedsend - data8 0 // mq_timedreceive // 1265 - data8 0 // mq_notify - data8 0 // mq_getsetattr - data8 0 // kexec_load - data8 0 // vserver - data8 0 // waitid // 1270 - data8 0 // add_key - data8 0 // request_key - data8 0 // keyctl - data8 0 // ioprio_set - data8 0 // ioprio_get // 1275 - data8 0 // move_pages - data8 0 // inotify_init - data8 0 // inotify_add_watch - data8 0 // inotify_rm_watch - data8 0 // migrate_pages // 1280 - data8 0 // openat - data8 0 // mkdirat - data8 0 // mknodat - data8 0 // fchownat - data8 0 // futimesat // 1285 - data8 0 // newfstatat - data8 0 // unlinkat - data8 0 // renameat - data8 0 // linkat - data8 0 // symlinkat // 1290 - data8 0 // readlinkat - data8 0 // fchmodat - data8 0 // faccessat - data8 0 - data8 0 // 1295 - data8 0 // unshare - data8 0 // splice - data8 0 // set_robust_list - data8 0 // get_robust_list - data8 0 // sync_file_range // 1300 - data8 0 // tee - data8 0 // vmsplice - data8 0 - data8 fsys_getcpu // getcpu // 1304 - - // fill in zeros for the remaining entries - .zero: - .space fsyscall_table + 8*NR_syscalls - .zero, 0 diff --git a/arch/ia64/kernel/fsyscall_gtod_data.h b/arch/ia64/kernel/fsyscall_gtod_data.h deleted file mode 100644 index cc2861445965..000000000000 --- a/arch/ia64/kernel/fsyscall_gtod_data.h +++ /dev/null @@ -1,30 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * (c) Copyright 2007 Hewlett-Packard Development Company, L.P. - * Contributed by Peter Keilty - * - * fsyscall gettimeofday data - */ - -/* like timespec, but includes "shifted nanoseconds" */ -struct time_sn_spec { - u64 sec; - u64 snsec; -}; - -struct fsyscall_gtod_data_t { - seqcount_t seq; - struct time_sn_spec wall_time; - struct time_sn_spec monotonic_time; - u64 clk_mask; - u32 clk_mult; - u32 clk_shift; - void *clk_fsys_mmio; - u64 clk_cycle_last; -} ____cacheline_aligned; - -struct itc_jitter_data_t { - int itc_jitter; - u64 itc_lastcycle; -} ____cacheline_aligned; - diff --git a/arch/ia64/kernel/ftrace.c b/arch/ia64/kernel/ftrace.c deleted file mode 100644 index d6360fd404ab..000000000000 --- a/arch/ia64/kernel/ftrace.c +++ /dev/null @@ -1,196 +0,0 @@ -/* - * Dynamic function tracing support. - * - * Copyright (C) 2008 Shaohua Li - * - * For licencing details, see COPYING. - * - * Defines low-level handling of mcount calls when the kernel - * is compiled with the -pg flag. When using dynamic ftrace, the - * mcount call-sites get patched lazily with NOP till they are - * enabled. All code mutation routines here take effect atomically. - */ - -#include -#include - -#include -#include - -/* In IA64, each function will be added below two bundles with -pg option */ -static unsigned char __attribute__((aligned(8))) -ftrace_orig_code[MCOUNT_INSN_SIZE] = { - 0x02, 0x40, 0x31, 0x10, 0x80, 0x05, /* alloc r40=ar.pfs,12,8,0 */ - 0xb0, 0x02, 0x00, 0x00, 0x42, 0x40, /* mov r43=r0;; */ - 0x05, 0x00, 0xc4, 0x00, /* mov r42=b0 */ - 0x11, 0x48, 0x01, 0x02, 0x00, 0x21, /* mov r41=r1 */ - 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, /* nop.i 0x0 */ - 0x08, 0x00, 0x00, 0x50 /* br.call.sptk.many b0 = _mcount;; */ -}; - -struct ftrace_orig_insn { - u64 dummy1, dummy2, dummy3; - u64 dummy4:64-41+13; - u64 imm20:20; - u64 dummy5:3; - u64 sign:1; - u64 dummy6:4; -}; - -/* mcount stub will be converted below for nop */ -static unsigned char ftrace_nop_code[MCOUNT_INSN_SIZE] = { - 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, /* [MII] nop.m 0x0 */ - 0x30, 0x00, 0x00, 0x60, 0x00, 0x00, /* mov r3=ip */ - 0x00, 0x00, 0x04, 0x00, /* nop.i 0x0 */ - 0x05, 0x00, 0x00, 0x00, 0x01, 0x00, /* [MLX] nop.m 0x0 */ - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* nop.x 0x0;; */ - 0x00, 0x00, 0x04, 0x00 -}; - -static unsigned char *ftrace_nop_replace(void) -{ - return ftrace_nop_code; -} - -/* - * mcount stub will be converted below for call - * Note: Just the last instruction is changed against nop - * */ -static unsigned char __attribute__((aligned(8))) -ftrace_call_code[MCOUNT_INSN_SIZE] = { - 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, /* [MII] nop.m 0x0 */ - 0x30, 0x00, 0x00, 0x60, 0x00, 0x00, /* mov r3=ip */ - 0x00, 0x00, 0x04, 0x00, /* nop.i 0x0 */ - 0x05, 0x00, 0x00, 0x00, 0x01, 0x00, /* [MLX] nop.m 0x0 */ - 0xff, 0xff, 0xff, 0xff, 0x7f, 0x00, /* brl.many .;;*/ - 0xf8, 0xff, 0xff, 0xc8 -}; - -struct ftrace_call_insn { - u64 dummy1, dummy2; - u64 dummy3:48; - u64 imm39_l:16; - u64 imm39_h:23; - u64 dummy4:13; - u64 imm20:20; - u64 dummy5:3; - u64 i:1; - u64 dummy6:4; -}; - -static unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr) -{ - struct ftrace_call_insn *code = (void *)ftrace_call_code; - unsigned long offset = addr - (ip + 0x10); - - code->imm39_l = offset >> 24; - code->imm39_h = offset >> 40; - code->imm20 = offset >> 4; - code->i = offset >> 63; - return ftrace_call_code; -} - -static int -ftrace_modify_code(unsigned long ip, unsigned char *old_code, - unsigned char *new_code, int do_check) -{ - unsigned char replaced[MCOUNT_INSN_SIZE]; - - /* - * Note: - * We are paranoid about modifying text, as if a bug was to happen, it - * could cause us to read or write to someplace that could cause harm. - * Carefully read and modify the code with probe_kernel_*(), and make - * sure what we read is what we expected it to be before modifying it. - */ - - if (!do_check) - goto skip_check; - - /* read the text we want to modify */ - if (copy_from_kernel_nofault(replaced, (void *)ip, MCOUNT_INSN_SIZE)) - return -EFAULT; - - /* Make sure it is what we expect it to be */ - if (memcmp(replaced, old_code, MCOUNT_INSN_SIZE) != 0) - return -EINVAL; - -skip_check: - /* replace the text with the new text */ - if (copy_to_kernel_nofault(((void *)ip), new_code, MCOUNT_INSN_SIZE)) - return -EPERM; - flush_icache_range(ip, ip + MCOUNT_INSN_SIZE); - - return 0; -} - -static int ftrace_make_nop_check(struct dyn_ftrace *rec, unsigned long addr) -{ - unsigned char __attribute__((aligned(8))) replaced[MCOUNT_INSN_SIZE]; - unsigned long ip = rec->ip; - - if (copy_from_kernel_nofault(replaced, (void *)ip, MCOUNT_INSN_SIZE)) - return -EFAULT; - if (rec->flags & FTRACE_FL_CONVERTED) { - struct ftrace_call_insn *call_insn, *tmp_call; - - call_insn = (void *)ftrace_call_code; - tmp_call = (void *)replaced; - call_insn->imm39_l = tmp_call->imm39_l; - call_insn->imm39_h = tmp_call->imm39_h; - call_insn->imm20 = tmp_call->imm20; - call_insn->i = tmp_call->i; - if (memcmp(replaced, ftrace_call_code, MCOUNT_INSN_SIZE) != 0) - return -EINVAL; - return 0; - } else { - struct ftrace_orig_insn *call_insn, *tmp_call; - - call_insn = (void *)ftrace_orig_code; - tmp_call = (void *)replaced; - call_insn->sign = tmp_call->sign; - call_insn->imm20 = tmp_call->imm20; - if (memcmp(replaced, ftrace_orig_code, MCOUNT_INSN_SIZE) != 0) - return -EINVAL; - return 0; - } -} - -int ftrace_make_nop(struct module *mod, - struct dyn_ftrace *rec, unsigned long addr) -{ - int ret; - char *new; - - ret = ftrace_make_nop_check(rec, addr); - if (ret) - return ret; - new = ftrace_nop_replace(); - return ftrace_modify_code(rec->ip, NULL, new, 0); -} - -int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) -{ - unsigned long ip = rec->ip; - unsigned char *old, *new; - - old= ftrace_nop_replace(); - new = ftrace_call_replace(ip, addr); - return ftrace_modify_code(ip, old, new, 1); -} - -/* in IA64, _mcount can't directly call ftrace_stub. Only jump is ok */ -int ftrace_update_ftrace_func(ftrace_func_t func) -{ - unsigned long ip; - unsigned long addr = ((struct fnptr *)ftrace_call)->ip; - - if (func == ftrace_stub) - return 0; - ip = ((struct fnptr *)func)->ip; - - ia64_patch_imm64(addr + 2, ip); - - flush_icache_range(addr, addr + 16); - return 0; -} diff --git a/arch/ia64/kernel/gate-data.S b/arch/ia64/kernel/gate-data.S deleted file mode 100644 index b3ef1c72e132..000000000000 --- a/arch/ia64/kernel/gate-data.S +++ /dev/null @@ -1,3 +0,0 @@ - .section .data..gate, "aw" - - .incbin "arch/ia64/kernel/gate.so" diff --git a/arch/ia64/kernel/gate.S b/arch/ia64/kernel/gate.S deleted file mode 100644 index 9f235cd551ab..000000000000 --- a/arch/ia64/kernel/gate.S +++ /dev/null @@ -1,380 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * This file contains the code that gets mapped at the upper end of each task's text - * region. For now, it contains the signal trampoline code only. - * - * Copyright (C) 1999-2003 Hewlett-Packard Co - * David Mosberger-Tang - */ - - -#include -#include -#include -#include -#include -#include -#include -#include - -/* - * We can't easily refer to symbols inside the kernel. To avoid full runtime relocation, - * complications with the linker (which likes to create PLT stubs for branches - * to targets outside the shared object) and to avoid multi-phase kernel builds, we - * simply create minimalistic "patch lists" in special ELF sections. - */ - .section ".data..patch.fsyscall_table", "a" - .previous -#define LOAD_FSYSCALL_TABLE(reg) \ -[1:] movl reg=0; \ - .xdata4 ".data..patch.fsyscall_table", 1b-. - - .section ".data..patch.brl_fsys_bubble_down", "a" - .previous -#define BRL_COND_FSYS_BUBBLE_DOWN(pr) \ -[1:](pr)brl.cond.sptk 0; \ - ;; \ - .xdata4 ".data..patch.brl_fsys_bubble_down", 1b-. - -GLOBAL_ENTRY(__kernel_syscall_via_break) - .prologue - .altrp b6 - .body - /* - * Note: for (fast) syscall restart to work, the break instruction must be - * the first one in the bundle addressed by syscall_via_break. - */ -{ .mib - break 0x100000 - nop.i 0 - br.ret.sptk.many b6 -} -END(__kernel_syscall_via_break) - -# define ARG0_OFF (16 + IA64_SIGFRAME_ARG0_OFFSET) -# define ARG1_OFF (16 + IA64_SIGFRAME_ARG1_OFFSET) -# define ARG2_OFF (16 + IA64_SIGFRAME_ARG2_OFFSET) -# define SIGHANDLER_OFF (16 + IA64_SIGFRAME_HANDLER_OFFSET) -# define SIGCONTEXT_OFF (16 + IA64_SIGFRAME_SIGCONTEXT_OFFSET) - -# define FLAGS_OFF IA64_SIGCONTEXT_FLAGS_OFFSET -# define CFM_OFF IA64_SIGCONTEXT_CFM_OFFSET -# define FR6_OFF IA64_SIGCONTEXT_FR6_OFFSET -# define BSP_OFF IA64_SIGCONTEXT_AR_BSP_OFFSET -# define RNAT_OFF IA64_SIGCONTEXT_AR_RNAT_OFFSET -# define UNAT_OFF IA64_SIGCONTEXT_AR_UNAT_OFFSET -# define FPSR_OFF IA64_SIGCONTEXT_AR_FPSR_OFFSET -# define PR_OFF IA64_SIGCONTEXT_PR_OFFSET -# define RP_OFF IA64_SIGCONTEXT_IP_OFFSET -# define SP_OFF IA64_SIGCONTEXT_R12_OFFSET -# define RBS_BASE_OFF IA64_SIGCONTEXT_RBS_BASE_OFFSET -# define LOADRS_OFF IA64_SIGCONTEXT_LOADRS_OFFSET -# define base0 r2 -# define base1 r3 - /* - * When we get here, the memory stack looks like this: - * - * +===============================+ - * | | - * // struct sigframe // - * | | - * +-------------------------------+ <-- sp+16 - * | 16 byte of scratch | - * | space | - * +-------------------------------+ <-- sp - * - * The register stack looks _exactly_ the way it looked at the time the signal - * occurred. In other words, we're treading on a potential mine-field: each - * incoming general register may be a NaT value (including sp, in which case the - * process ends up dying with a SIGSEGV). - * - * The first thing need to do is a cover to get the registers onto the backing - * store. Once that is done, we invoke the signal handler which may modify some - * of the machine state. After returning from the signal handler, we return - * control to the previous context by executing a sigreturn system call. A signal - * handler may call the rt_sigreturn() function to directly return to a given - * sigcontext. However, the user-level sigreturn() needs to do much more than - * calling the rt_sigreturn() system call as it needs to unwind the stack to - * restore preserved registers that may have been saved on the signal handler's - * call stack. - */ - -#define SIGTRAMP_SAVES \ - .unwabi 3, 's'; /* mark this as a sigtramp handler (saves scratch regs) */ \ - .unwabi @svr4, 's'; /* backwards compatibility with old unwinders (remove in v2.7) */ \ - .savesp ar.unat, UNAT_OFF+SIGCONTEXT_OFF; \ - .savesp ar.fpsr, FPSR_OFF+SIGCONTEXT_OFF; \ - .savesp pr, PR_OFF+SIGCONTEXT_OFF; \ - .savesp rp, RP_OFF+SIGCONTEXT_OFF; \ - .savesp ar.pfs, CFM_OFF+SIGCONTEXT_OFF; \ - .vframesp SP_OFF+SIGCONTEXT_OFF - -GLOBAL_ENTRY(__kernel_sigtramp) - // describe the state that is active when we get here: - .prologue - SIGTRAMP_SAVES - .body - - .label_state 1 - - adds base0=SIGHANDLER_OFF,sp - adds base1=RBS_BASE_OFF+SIGCONTEXT_OFF,sp - br.call.sptk.many rp=1f -1: - ld8 r17=[base0],(ARG0_OFF-SIGHANDLER_OFF) // get pointer to signal handler's plabel - ld8 r15=[base1] // get address of new RBS base (or NULL) - cover // push args in interrupted frame onto backing store - ;; - cmp.ne p1,p0=r15,r0 // do we need to switch rbs? (note: pr is saved by kernel) - mov.m r9=ar.bsp // fetch ar.bsp - .spillsp.p p1, ar.rnat, RNAT_OFF+SIGCONTEXT_OFF -(p1) br.cond.spnt setup_rbs // yup -> (clobbers p8, r14-r16, and r18-r20) -back_from_setup_rbs: - alloc r8=ar.pfs,0,0,3,0 - ld8 out0=[base0],16 // load arg0 (signum) - adds base1=(ARG1_OFF-(RBS_BASE_OFF+SIGCONTEXT_OFF)),base1 - ;; - ld8 out1=[base1] // load arg1 (siginfop) - ld8 r10=[r17],8 // get signal handler entry point - ;; - ld8 out2=[base0] // load arg2 (sigcontextp) - ld8 gp=[r17] // get signal handler's global pointer - adds base0=(BSP_OFF+SIGCONTEXT_OFF),sp - ;; - .spillsp ar.bsp, BSP_OFF+SIGCONTEXT_OFF - st8 [base0]=r9 // save sc_ar_bsp - adds base0=(FR6_OFF+SIGCONTEXT_OFF),sp - adds base1=(FR6_OFF+16+SIGCONTEXT_OFF),sp - ;; - stf.spill [base0]=f6,32 - stf.spill [base1]=f7,32 - ;; - stf.spill [base0]=f8,32 - stf.spill [base1]=f9,32 - mov b6=r10 - ;; - stf.spill [base0]=f10,32 - stf.spill [base1]=f11,32 - ;; - stf.spill [base0]=f12,32 - stf.spill [base1]=f13,32 - ;; - stf.spill [base0]=f14,32 - stf.spill [base1]=f15,32 - br.call.sptk.many rp=b6 // call the signal handler -.ret0: adds base0=(BSP_OFF+SIGCONTEXT_OFF),sp - ;; - ld8 r15=[base0] // fetch sc_ar_bsp - mov r14=ar.bsp - ;; - cmp.ne p1,p0=r14,r15 // do we need to restore the rbs? -(p1) br.cond.spnt restore_rbs // yup -> (clobbers r14-r18, f6 & f7) - ;; -back_from_restore_rbs: - adds base0=(FR6_OFF+SIGCONTEXT_OFF),sp - adds base1=(FR6_OFF+16+SIGCONTEXT_OFF),sp - ;; - ldf.fill f6=[base0],32 - ldf.fill f7=[base1],32 - ;; - ldf.fill f8=[base0],32 - ldf.fill f9=[base1],32 - ;; - ldf.fill f10=[base0],32 - ldf.fill f11=[base1],32 - ;; - ldf.fill f12=[base0],32 - ldf.fill f13=[base1],32 - ;; - ldf.fill f14=[base0],32 - ldf.fill f15=[base1],32 - mov r15=__NR_rt_sigreturn - .restore sp // pop .prologue - break __BREAK_SYSCALL - - .prologue - SIGTRAMP_SAVES -setup_rbs: - mov ar.rsc=0 // put RSE into enforced lazy mode - ;; - .save ar.rnat, r19 - mov r19=ar.rnat // save RNaT before switching backing store area - adds r14=(RNAT_OFF+SIGCONTEXT_OFF),sp - - mov r18=ar.bspstore - mov ar.bspstore=r15 // switch over to new register backing store area - ;; - - .spillsp ar.rnat, RNAT_OFF+SIGCONTEXT_OFF - st8 [r14]=r19 // save sc_ar_rnat - .body - mov.m r16=ar.bsp // sc_loadrs <- (new bsp - new bspstore) << 16 - adds r14=(LOADRS_OFF+SIGCONTEXT_OFF),sp - ;; - invala - sub r15=r16,r15 - extr.u r20=r18,3,6 - ;; - mov ar.rsc=0xf // set RSE into eager mode, pl 3 - cmp.eq p8,p0=63,r20 - shl r15=r15,16 - ;; - st8 [r14]=r15 // save sc_loadrs -(p8) st8 [r18]=r19 // if bspstore points at RNaT slot, store RNaT there now - .restore sp // pop .prologue - br.cond.sptk back_from_setup_rbs - - .prologue - SIGTRAMP_SAVES - .spillsp ar.rnat, RNAT_OFF+SIGCONTEXT_OFF - .body -restore_rbs: - // On input: - // r14 = bsp1 (bsp at the time of return from signal handler) - // r15 = bsp0 (bsp at the time the signal occurred) - // - // Here, we need to calculate bspstore0, the value that ar.bspstore needs - // to be set to, based on bsp0 and the size of the dirty partition on - // the alternate stack (sc_loadrs >> 16). This can be done with the - // following algorithm: - // - // bspstore0 = rse_skip_regs(bsp0, -rse_num_regs(bsp1 - (loadrs >> 19), bsp1)); - // - // This is what the code below does. - // - alloc r2=ar.pfs,0,0,0,0 // alloc null frame - adds r16=(LOADRS_OFF+SIGCONTEXT_OFF),sp - adds r18=(RNAT_OFF+SIGCONTEXT_OFF),sp - ;; - ld8 r17=[r16] - ld8 r16=[r18] // get new rnat - extr.u r18=r15,3,6 // r18 <- rse_slot_num(bsp0) - ;; - mov ar.rsc=r17 // put RSE into enforced lazy mode - shr.u r17=r17,16 - ;; - sub r14=r14,r17 // r14 (bspstore1) <- bsp1 - (sc_loadrs >> 16) - shr.u r17=r17,3 // r17 <- (sc_loadrs >> 19) - ;; - loadrs // restore dirty partition - extr.u r14=r14,3,6 // r14 <- rse_slot_num(bspstore1) - ;; - add r14=r14,r17 // r14 <- rse_slot_num(bspstore1) + (sc_loadrs >> 19) - ;; - shr.u r14=r14,6 // r14 <- (rse_slot_num(bspstore1) + (sc_loadrs >> 19))/0x40 - ;; - sub r14=r14,r17 // r14 <- -rse_num_regs(bspstore1, bsp1) - movl r17=0x8208208208208209 - ;; - add r18=r18,r14 // r18 (delta) <- rse_slot_num(bsp0) - rse_num_regs(bspstore1,bsp1) - setf.sig f7=r17 - cmp.lt p7,p0=r14,r0 // p7 <- (r14 < 0)? - ;; -(p7) adds r18=-62,r18 // delta -= 62 - ;; - setf.sig f6=r18 - ;; - xmpy.h f6=f6,f7 - ;; - getf.sig r17=f6 - ;; - add r17=r17,r18 - shr r18=r18,63 - ;; - shr r17=r17,5 - ;; - sub r17=r17,r18 // r17 = delta/63 - ;; - add r17=r14,r17 // r17 <- delta/63 - rse_num_regs(bspstore1, bsp1) - ;; - shladd r15=r17,3,r15 // r15 <- bsp0 + 8*(delta/63 - rse_num_regs(bspstore1, bsp1)) - ;; - mov ar.bspstore=r15 // switch back to old register backing store area - ;; - mov ar.rnat=r16 // restore RNaT - mov ar.rsc=0xf // (will be restored later on from sc_ar_rsc) - // invala not necessary as that will happen when returning to user-mode - br.cond.sptk back_from_restore_rbs -END(__kernel_sigtramp) - -/* - * On entry: - * r11 = saved ar.pfs - * r15 = system call # - * b0 = saved return address - * b6 = return address - * On exit: - * r11 = saved ar.pfs - * r15 = system call # - * b0 = saved return address - * all other "scratch" registers: undefined - * all "preserved" registers: same as on entry - */ - -GLOBAL_ENTRY(__kernel_syscall_via_epc) - .prologue - .altrp b6 - .body -{ - /* - * Note: the kernel cannot assume that the first two instructions in this - * bundle get executed. The remaining code must be safe even if - * they do not get executed. - */ - adds r17=-1024,r15 // A - mov r10=0 // A default to successful syscall execution - epc // B causes split-issue -} - ;; - RSM_PSR_BE_I(r20, r22) // M2 (5 cyc to srlz.d) - LOAD_FSYSCALL_TABLE(r14) // X - ;; - mov r16=IA64_KR(CURRENT) // M2 (12 cyc) - shladd r18=r17,3,r14 // A - mov r19=NR_syscalls-1 // A - ;; - lfetch [r18] // M0|1 - MOV_FROM_PSR(p0, r29, r8) // M2 (12 cyc) - // If r17 is a NaT, p6 will be zero - cmp.geu p6,p7=r19,r17 // A (sysnr > 0 && sysnr < 1024+NR_syscalls)? - ;; - mov r21=ar.fpsr // M2 (12 cyc) - tnat.nz p10,p9=r15 // I0 - mov.i r26=ar.pfs // I0 (would stall anyhow due to srlz.d...) - ;; - srlz.d // M0 (forces split-issue) ensure PSR.BE==0 -(p6) ld8 r18=[r18] // M0|1 - nop.i 0 - ;; - nop.m 0 -(p6) tbit.z.unc p8,p0=r18,0 // I0 (dual-issues with "mov b7=r18"!) - nop.i 0 - ;; - SSM_PSR_I(p8, p14, r25) -(p6) mov b7=r18 // I0 -(p8) br.dptk.many b7 // B - - mov r27=ar.rsc // M2 (12 cyc) -/* - * brl.cond doesn't work as intended because the linker would convert this branch - * into a branch to a PLT. Perhaps there will be a way to avoid this with some - * future version of the linker. In the meantime, we just use an indirect branch - * instead. - */ -#ifdef CONFIG_ITANIUM -(p6) add r14=-8,r14 // r14 <- addr of fsys_bubble_down entry - ;; -(p6) ld8 r14=[r14] // r14 <- fsys_bubble_down - ;; -(p6) mov b7=r14 -(p6) br.sptk.many b7 -#else - BRL_COND_FSYS_BUBBLE_DOWN(p6) -#endif - SSM_PSR_I(p0, p14, r10) - mov r10=-1 -(p10) mov r8=EINVAL -(p9) mov r8=ENOSYS - FSYS_RETURN - -END(__kernel_syscall_via_epc) diff --git a/arch/ia64/kernel/gate.lds.S b/arch/ia64/kernel/gate.lds.S deleted file mode 100644 index 461c7e69d465..000000000000 --- a/arch/ia64/kernel/gate.lds.S +++ /dev/null @@ -1,108 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Linker script for gate DSO. The gate pages are an ELF shared object - * prelinked to its virtual address, with only one read-only segment and - * one execute-only segment (both fit in one page). This script controls - * its layout. - */ - -#include - -SECTIONS -{ - . = GATE_ADDR + SIZEOF_HEADERS; - - .hash : { *(.hash) } :readable - .gnu.hash : { *(.gnu.hash) } - .dynsym : { *(.dynsym) } - .dynstr : { *(.dynstr) } - .gnu.version : { *(.gnu.version) } - .gnu.version_d : { *(.gnu.version_d) } - .gnu.version_r : { *(.gnu.version_r) } - - .note : { *(.note*) } :readable :note - - .dynamic : { *(.dynamic) } :readable :dynamic - - /* - * This linker script is used both with -r and with -shared. For - * the layouts to match, we need to skip more than enough space for - * the dynamic symbol table et al. If this amount is insufficient, - * ld -shared will barf. Just increase it here. - */ - . = GATE_ADDR + 0x600; - - .data..patch : { - __start_gate_mckinley_e9_patchlist = .; - *(.data..patch.mckinley_e9) - __end_gate_mckinley_e9_patchlist = .; - - __start_gate_vtop_patchlist = .; - *(.data..patch.vtop) - __end_gate_vtop_patchlist = .; - - __start_gate_fsyscall_patchlist = .; - *(.data..patch.fsyscall_table) - __end_gate_fsyscall_patchlist = .; - - __start_gate_brl_fsys_bubble_down_patchlist = .; - *(.data..patch.brl_fsys_bubble_down) - __end_gate_brl_fsys_bubble_down_patchlist = .; - } :readable - - .IA_64.unwind_info : { *(.IA_64.unwind_info*) } - .IA_64.unwind : { *(.IA_64.unwind*) } :readable :unwind -#ifdef HAVE_BUGGY_SEGREL - .text (GATE_ADDR + PAGE_SIZE) : { *(.text) *(.text.*) } :readable -#else - . = ALIGN(PERCPU_PAGE_SIZE) + (. & (PERCPU_PAGE_SIZE - 1)); - .text : { *(.text) *(.text.*) } :epc -#endif - - /DISCARD/ : { - *(.got.plt) *(.got) - *(.data .data.* .gnu.linkonce.d.*) - *(.dynbss) - *(.bss .bss.* .gnu.linkonce.b.*) - *(__ex_table) - *(__mca_table) - } -} - -/* - * ld does not recognize this name token; use the constant. - */ -#define PT_IA_64_UNWIND 0x70000001 - -/* - * We must supply the ELF program headers explicitly to get just one - * PT_LOAD segment, and set the flags explicitly to make segments read-only. - */ -PHDRS -{ - readable PT_LOAD FILEHDR PHDRS FLAGS(4); /* PF_R */ -#ifndef HAVE_BUGGY_SEGREL - epc PT_LOAD FILEHDR PHDRS FLAGS(1); /* PF_X */ -#endif - dynamic PT_DYNAMIC FLAGS(4); /* PF_R */ - note PT_NOTE FLAGS(4); /* PF_R */ - unwind PT_IA_64_UNWIND; -} - -/* - * This controls what symbols we export from the DSO. - */ -VERSION -{ - LINUX_2.5 { - global: - __kernel_syscall_via_break; - __kernel_syscall_via_epc; - __kernel_sigtramp; - - local: *; - }; -} - -/* The ELF entry point can be used to set the AT_SYSINFO value. */ -ENTRY(__kernel_syscall_via_epc) diff --git a/arch/ia64/kernel/head.S b/arch/ia64/kernel/head.S deleted file mode 100644 index 85c8a57da402..000000000000 --- a/arch/ia64/kernel/head.S +++ /dev/null @@ -1,1167 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Here is where the ball gets rolling as far as the kernel is concerned. - * When control is transferred to _start, the bootload has already - * loaded us to the correct address. All that's left to do here is - * to set up the kernel's global pointer and jump to the kernel - * entry point. - * - * Copyright (C) 1998-2001, 2003, 2005 Hewlett-Packard Co - * David Mosberger-Tang - * Stephane Eranian - * Copyright (C) 1999 VA Linux Systems - * Copyright (C) 1999 Walt Drummond - * Copyright (C) 1999 Intel Corp. - * Copyright (C) 1999 Asit Mallick - * Copyright (C) 1999 Don Dugger - * Copyright (C) 2002 Fenghua Yu - * -Optimize __ia64_save_fpu() and __ia64_load_fpu() for Itanium 2. - * Copyright (C) 2004 Ashok Raj - * Support for CPU Hotplug - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#ifdef CONFIG_HOTPLUG_CPU -#define SAL_PSR_BITS_TO_SET \ - (IA64_PSR_AC | IA64_PSR_BN | IA64_PSR_MFH | IA64_PSR_MFL) - -#define SAVE_FROM_REG(src, ptr, dest) \ - mov dest=src;; \ - st8 [ptr]=dest,0x08 - -#define RESTORE_REG(reg, ptr, _tmp) \ - ld8 _tmp=[ptr],0x08;; \ - mov reg=_tmp - -#define SAVE_BREAK_REGS(ptr, _idx, _breg, _dest)\ - mov ar.lc=IA64_NUM_DBG_REGS-1;; \ - mov _idx=0;; \ -1: \ - SAVE_FROM_REG(_breg[_idx], ptr, _dest);; \ - add _idx=1,_idx;; \ - br.cloop.sptk.many 1b - -#define RESTORE_BREAK_REGS(ptr, _idx, _breg, _tmp, _lbl)\ - mov ar.lc=IA64_NUM_DBG_REGS-1;; \ - mov _idx=0;; \ -_lbl: RESTORE_REG(_breg[_idx], ptr, _tmp);; \ - add _idx=1, _idx;; \ - br.cloop.sptk.many _lbl - -#define SAVE_ONE_RR(num, _reg, _tmp) \ - movl _tmp=(num<<61);; \ - mov _reg=rr[_tmp] - -#define SAVE_REGION_REGS(_tmp, _r0, _r1, _r2, _r3, _r4, _r5, _r6, _r7) \ - SAVE_ONE_RR(0,_r0, _tmp);; \ - SAVE_ONE_RR(1,_r1, _tmp);; \ - SAVE_ONE_RR(2,_r2, _tmp);; \ - SAVE_ONE_RR(3,_r3, _tmp);; \ - SAVE_ONE_RR(4,_r4, _tmp);; \ - SAVE_ONE_RR(5,_r5, _tmp);; \ - SAVE_ONE_RR(6,_r6, _tmp);; \ - SAVE_ONE_RR(7,_r7, _tmp);; - -#define STORE_REGION_REGS(ptr, _r0, _r1, _r2, _r3, _r4, _r5, _r6, _r7) \ - st8 [ptr]=_r0, 8;; \ - st8 [ptr]=_r1, 8;; \ - st8 [ptr]=_r2, 8;; \ - st8 [ptr]=_r3, 8;; \ - st8 [ptr]=_r4, 8;; \ - st8 [ptr]=_r5, 8;; \ - st8 [ptr]=_r6, 8;; \ - st8 [ptr]=_r7, 8;; - -#define RESTORE_REGION_REGS(ptr, _idx1, _idx2, _tmp) \ - mov ar.lc=0x08-1;; \ - movl _idx1=0x00;; \ -RestRR: \ - dep.z _idx2=_idx1,61,3;; \ - ld8 _tmp=[ptr],8;; \ - mov rr[_idx2]=_tmp;; \ - srlz.d;; \ - add _idx1=1,_idx1;; \ - br.cloop.sptk.few RestRR - -#define SET_AREA_FOR_BOOTING_CPU(reg1, reg2) \ - movl reg1=sal_state_for_booting_cpu;; \ - ld8 reg2=[reg1];; - -/* - * Adjust region registers saved before starting to save - * break regs and rest of the states that need to be preserved. - */ -#define SAL_TO_OS_BOOT_HANDOFF_STATE_SAVE(_reg1,_reg2,_pred) \ - SAVE_FROM_REG(b0,_reg1,_reg2);; \ - SAVE_FROM_REG(b1,_reg1,_reg2);; \ - SAVE_FROM_REG(b2,_reg1,_reg2);; \ - SAVE_FROM_REG(b3,_reg1,_reg2);; \ - SAVE_FROM_REG(b4,_reg1,_reg2);; \ - SAVE_FROM_REG(b5,_reg1,_reg2);; \ - st8 [_reg1]=r1,0x08;; \ - st8 [_reg1]=r12,0x08;; \ - st8 [_reg1]=r13,0x08;; \ - SAVE_FROM_REG(ar.fpsr,_reg1,_reg2);; \ - SAVE_FROM_REG(ar.pfs,_reg1,_reg2);; \ - SAVE_FROM_REG(ar.rnat,_reg1,_reg2);; \ - SAVE_FROM_REG(ar.unat,_reg1,_reg2);; \ - SAVE_FROM_REG(ar.bspstore,_reg1,_reg2);; \ - SAVE_FROM_REG(cr.dcr,_reg1,_reg2);; \ - SAVE_FROM_REG(cr.iva,_reg1,_reg2);; \ - SAVE_FROM_REG(cr.pta,_reg1,_reg2);; \ - SAVE_FROM_REG(cr.itv,_reg1,_reg2);; \ - SAVE_FROM_REG(cr.pmv,_reg1,_reg2);; \ - SAVE_FROM_REG(cr.cmcv,_reg1,_reg2);; \ - SAVE_FROM_REG(cr.lrr0,_reg1,_reg2);; \ - SAVE_FROM_REG(cr.lrr1,_reg1,_reg2);; \ - st8 [_reg1]=r4,0x08;; \ - st8 [_reg1]=r5,0x08;; \ - st8 [_reg1]=r6,0x08;; \ - st8 [_reg1]=r7,0x08;; \ - st8 [_reg1]=_pred,0x08;; \ - SAVE_FROM_REG(ar.lc, _reg1, _reg2);; \ - stf.spill.nta [_reg1]=f2,16;; \ - stf.spill.nta [_reg1]=f3,16;; \ - stf.spill.nta [_reg1]=f4,16;; \ - stf.spill.nta [_reg1]=f5,16;; \ - stf.spill.nta [_reg1]=f16,16;; \ - stf.spill.nta [_reg1]=f17,16;; \ - stf.spill.nta [_reg1]=f18,16;; \ - stf.spill.nta [_reg1]=f19,16;; \ - stf.spill.nta [_reg1]=f20,16;; \ - stf.spill.nta [_reg1]=f21,16;; \ - stf.spill.nta [_reg1]=f22,16;; \ - stf.spill.nta [_reg1]=f23,16;; \ - stf.spill.nta [_reg1]=f24,16;; \ - stf.spill.nta [_reg1]=f25,16;; \ - stf.spill.nta [_reg1]=f26,16;; \ - stf.spill.nta [_reg1]=f27,16;; \ - stf.spill.nta [_reg1]=f28,16;; \ - stf.spill.nta [_reg1]=f29,16;; \ - stf.spill.nta [_reg1]=f30,16;; \ - stf.spill.nta [_reg1]=f31,16;; - -#else -#define SET_AREA_FOR_BOOTING_CPU(a1, a2) -#define SAL_TO_OS_BOOT_HANDOFF_STATE_SAVE(a1,a2, a3) -#define SAVE_REGION_REGS(_tmp, _r0, _r1, _r2, _r3, _r4, _r5, _r6, _r7) -#define STORE_REGION_REGS(ptr, _r0, _r1, _r2, _r3, _r4, _r5, _r6, _r7) -#endif - -#define SET_ONE_RR(num, pgsize, _tmp1, _tmp2, vhpt) \ - movl _tmp1=(num << 61);; \ - mov _tmp2=((ia64_rid(IA64_REGION_ID_KERNEL, (num<<61)) << 8) | (pgsize << 2) | vhpt);; \ - mov rr[_tmp1]=_tmp2 - - __PAGE_ALIGNED_DATA - - .global empty_zero_page -EXPORT_SYMBOL_GPL(empty_zero_page) -empty_zero_page: - .skip PAGE_SIZE - - .global swapper_pg_dir -swapper_pg_dir: - .skip PAGE_SIZE - - .rodata -halt_msg: - stringz "Halting kernel\n" - - __REF - - .global start_ap - - /* - * Start the kernel. When the bootloader passes control to _start(), r28 - * points to the address of the boot parameter area. Execution reaches - * here in physical mode. - */ -GLOBAL_ENTRY(_start) -start_ap: - .prologue - .save rp, r0 // terminate unwind chain with a NULL rp - .body - - rsm psr.i | psr.ic - ;; - srlz.i - ;; - { - flushrs // must be first insn in group - srlz.i - } - ;; - /* - * Save the region registers, predicate before they get clobbered - */ - SAVE_REGION_REGS(r2, r8,r9,r10,r11,r12,r13,r14,r15); - mov r25=pr;; - - /* - * Initialize kernel region registers: - * rr[0]: VHPT enabled, page size = PAGE_SHIFT - * rr[1]: VHPT enabled, page size = PAGE_SHIFT - * rr[2]: VHPT enabled, page size = PAGE_SHIFT - * rr[3]: VHPT enabled, page size = PAGE_SHIFT - * rr[4]: VHPT enabled, page size = PAGE_SHIFT - * rr[5]: VHPT enabled, page size = PAGE_SHIFT - * rr[6]: VHPT disabled, page size = IA64_GRANULE_SHIFT - * rr[7]: VHPT disabled, page size = IA64_GRANULE_SHIFT - * We initialize all of them to prevent inadvertently assuming - * something about the state of address translation early in boot. - */ - SET_ONE_RR(0, PAGE_SHIFT, r2, r16, 1);; - SET_ONE_RR(1, PAGE_SHIFT, r2, r16, 1);; - SET_ONE_RR(2, PAGE_SHIFT, r2, r16, 1);; - SET_ONE_RR(3, PAGE_SHIFT, r2, r16, 1);; - SET_ONE_RR(4, PAGE_SHIFT, r2, r16, 1);; - SET_ONE_RR(5, PAGE_SHIFT, r2, r16, 1);; - SET_ONE_RR(6, IA64_GRANULE_SHIFT, r2, r16, 0);; - SET_ONE_RR(7, IA64_GRANULE_SHIFT, r2, r16, 0);; - /* - * Now pin mappings into the TLB for kernel text and data - */ - mov r18=KERNEL_TR_PAGE_SHIFT<<2 - movl r17=KERNEL_START - ;; - mov cr.itir=r18 - mov cr.ifa=r17 - mov r16=IA64_TR_KERNEL - mov r3=ip - movl r18=PAGE_KERNEL - ;; - dep r2=0,r3,0,KERNEL_TR_PAGE_SHIFT - ;; - or r18=r2,r18 - ;; - srlz.i - ;; - itr.i itr[r16]=r18 - ;; - itr.d dtr[r16]=r18 - ;; - srlz.i - - /* - * Switch into virtual mode: - */ - movl r16=(IA64_PSR_IT|IA64_PSR_IC|IA64_PSR_DT|IA64_PSR_RT|IA64_PSR_DFH|IA64_PSR_BN \ - |IA64_PSR_DI) - ;; - mov cr.ipsr=r16 - movl r17=1f - ;; - mov cr.iip=r17 - mov cr.ifs=r0 - ;; - rfi - ;; -1: // now we are in virtual mode - - SET_AREA_FOR_BOOTING_CPU(r2, r16); - - STORE_REGION_REGS(r16, r8,r9,r10,r11,r12,r13,r14,r15); - SAL_TO_OS_BOOT_HANDOFF_STATE_SAVE(r16,r17,r25) - ;; - - // set IVT entry point---can't access I/O ports without it - movl r3=ia64_ivt - ;; - mov cr.iva=r3 - movl r2=FPSR_DEFAULT - ;; - srlz.i - movl gp=__gp - - mov ar.fpsr=r2 - ;; - -#define isAP p2 // are we an Application Processor? -#define isBP p3 // are we the Bootstrap Processor? - -#ifdef CONFIG_SMP - /* - * Find the init_task for the currently booting CPU. At poweron, and in - * UP mode, task_for_booting_cpu is NULL. - */ - movl r3=task_for_booting_cpu - ;; - ld8 r3=[r3] - movl r2=init_task - ;; - cmp.eq isBP,isAP=r3,r0 - ;; -(isAP) mov r2=r3 -#else - movl r2=init_task - cmp.eq isBP,isAP=r0,r0 -#endif - ;; - tpa r3=r2 // r3 == phys addr of task struct - mov r16=-1 -(isBP) br.cond.dpnt .load_current // BP stack is on region 5 --- no need to map it - - // load mapping for stack (virtaddr in r2, physaddr in r3) - rsm psr.ic - movl r17=PAGE_KERNEL - ;; - srlz.d - dep r18=0,r3,0,12 - ;; - or r18=r17,r18 - dep r2=-1,r3,61,3 // IMVA of task - ;; - mov r17=rr[r2] - shr.u r16=r3,IA64_GRANULE_SHIFT - ;; - dep r17=0,r17,8,24 - ;; - mov cr.itir=r17 - mov cr.ifa=r2 - - mov r19=IA64_TR_CURRENT_STACK - ;; - itr.d dtr[r19]=r18 - ;; - ssm psr.ic - srlz.d - ;; - -.load_current: - // load the "current" pointer (r13) and ar.k6 with the current task - mov IA64_KR(CURRENT)=r2 // virtual address - mov IA64_KR(CURRENT_STACK)=r16 - mov r13=r2 - /* - * Reserve space at the top of the stack for "struct pt_regs". Kernel - * threads don't store interesting values in that structure, but the space - * still needs to be there because time-critical stuff such as the context - * switching can be implemented more efficiently (for example, __switch_to() - * always sets the psr.dfh bit of the task it is switching to). - */ - - addl r12=IA64_STK_OFFSET-IA64_PT_REGS_SIZE-16,r2 - addl r2=IA64_RBS_OFFSET,r2 // initialize the RSE - mov ar.rsc=0 // place RSE in enforced lazy mode - ;; - loadrs // clear the dirty partition - movl r19=__phys_per_cpu_start - mov r18=PERCPU_PAGE_SIZE - ;; -#ifndef CONFIG_SMP - add r19=r19,r18 - ;; -#else -(isAP) br.few 2f - movl r20=__cpu0_per_cpu - ;; - shr.u r18=r18,3 -1: - ld8 r21=[r19],8;; - st8[r20]=r21,8 - adds r18=-1,r18;; - cmp4.lt p7,p6=0,r18 -(p7) br.cond.dptk.few 1b - mov r19=r20 - ;; -2: -#endif - tpa r19=r19 - ;; - .pred.rel.mutex isBP,isAP -(isBP) mov IA64_KR(PER_CPU_DATA)=r19 // per-CPU base for cpu0 -(isAP) mov IA64_KR(PER_CPU_DATA)=r0 // clear physical per-CPU base - ;; - mov ar.bspstore=r2 // establish the new RSE stack - ;; - mov ar.rsc=0x3 // place RSE in eager mode - -(isBP) dep r28=-1,r28,61,3 // make address virtual -(isBP) movl r2=ia64_boot_param - ;; -(isBP) st8 [r2]=r28 // save the address of the boot param area passed by the bootloader - -#ifdef CONFIG_SMP -(isAP) br.call.sptk.many rp=start_secondary -.ret0: -(isAP) br.cond.sptk self -#endif - - // This is executed by the bootstrap processor (bsp) only: - - br.call.sptk.many rp=start_kernel -.ret2: addl r3=@ltoff(halt_msg),gp - ;; - alloc r2=ar.pfs,8,0,2,0 - ;; - ld8 out0=[r3] - br.call.sptk.many b0=console_print - -self: hint @pause - br.sptk.many self // endless loop -END(_start) - - .text - -GLOBAL_ENTRY(ia64_save_debug_regs) - alloc r16=ar.pfs,1,0,0,0 - mov r20=ar.lc // preserve ar.lc - mov ar.lc=IA64_NUM_DBG_REGS-1 - mov r18=0 - add r19=IA64_NUM_DBG_REGS*8,in0 - ;; -1: mov r16=dbr[r18] -#ifdef CONFIG_ITANIUM - ;; - srlz.d -#endif - mov r17=ibr[r18] - add r18=1,r18 - ;; - st8.nta [in0]=r16,8 - st8.nta [r19]=r17,8 - br.cloop.sptk.many 1b - ;; - mov ar.lc=r20 // restore ar.lc - br.ret.sptk.many rp -END(ia64_save_debug_regs) - -GLOBAL_ENTRY(ia64_load_debug_regs) - alloc r16=ar.pfs,1,0,0,0 - lfetch.nta [in0] - mov r20=ar.lc // preserve ar.lc - add r19=IA64_NUM_DBG_REGS*8,in0 - mov ar.lc=IA64_NUM_DBG_REGS-1 - mov r18=-1 - ;; -1: ld8.nta r16=[in0],8 - ld8.nta r17=[r19],8 - add r18=1,r18 - ;; - mov dbr[r18]=r16 -#ifdef CONFIG_ITANIUM - ;; - srlz.d // Errata 132 (NoFix status) -#endif - mov ibr[r18]=r17 - br.cloop.sptk.many 1b - ;; - mov ar.lc=r20 // restore ar.lc - br.ret.sptk.many rp -END(ia64_load_debug_regs) - -GLOBAL_ENTRY(__ia64_save_fpu) - alloc r2=ar.pfs,1,4,0,0 - adds loc0=96*16-16,in0 - adds loc1=96*16-16-128,in0 - ;; - stf.spill.nta [loc0]=f127,-256 - stf.spill.nta [loc1]=f119,-256 - ;; - stf.spill.nta [loc0]=f111,-256 - stf.spill.nta [loc1]=f103,-256 - ;; - stf.spill.nta [loc0]=f95,-256 - stf.spill.nta [loc1]=f87,-256 - ;; - stf.spill.nta [loc0]=f79,-256 - stf.spill.nta [loc1]=f71,-256 - ;; - stf.spill.nta [loc0]=f63,-256 - stf.spill.nta [loc1]=f55,-256 - adds loc2=96*16-32,in0 - ;; - stf.spill.nta [loc0]=f47,-256 - stf.spill.nta [loc1]=f39,-256 - adds loc3=96*16-32-128,in0 - ;; - stf.spill.nta [loc2]=f126,-256 - stf.spill.nta [loc3]=f118,-256 - ;; - stf.spill.nta [loc2]=f110,-256 - stf.spill.nta [loc3]=f102,-256 - ;; - stf.spill.nta [loc2]=f94,-256 - stf.spill.nta [loc3]=f86,-256 - ;; - stf.spill.nta [loc2]=f78,-256 - stf.spill.nta [loc3]=f70,-256 - ;; - stf.spill.nta [loc2]=f62,-256 - stf.spill.nta [loc3]=f54,-256 - adds loc0=96*16-48,in0 - ;; - stf.spill.nta [loc2]=f46,-256 - stf.spill.nta [loc3]=f38,-256 - adds loc1=96*16-48-128,in0 - ;; - stf.spill.nta [loc0]=f125,-256 - stf.spill.nta [loc1]=f117,-256 - ;; - stf.spill.nta [loc0]=f109,-256 - stf.spill.nta [loc1]=f101,-256 - ;; - stf.spill.nta [loc0]=f93,-256 - stf.spill.nta [loc1]=f85,-256 - ;; - stf.spill.nta [loc0]=f77,-256 - stf.spill.nta [loc1]=f69,-256 - ;; - stf.spill.nta [loc0]=f61,-256 - stf.spill.nta [loc1]=f53,-256 - adds loc2=96*16-64,in0 - ;; - stf.spill.nta [loc0]=f45,-256 - stf.spill.nta [loc1]=f37,-256 - adds loc3=96*16-64-128,in0 - ;; - stf.spill.nta [loc2]=f124,-256 - stf.spill.nta [loc3]=f116,-256 - ;; - stf.spill.nta [loc2]=f108,-256 - stf.spill.nta [loc3]=f100,-256 - ;; - stf.spill.nta [loc2]=f92,-256 - stf.spill.nta [loc3]=f84,-256 - ;; - stf.spill.nta [loc2]=f76,-256 - stf.spill.nta [loc3]=f68,-256 - ;; - stf.spill.nta [loc2]=f60,-256 - stf.spill.nta [loc3]=f52,-256 - adds loc0=96*16-80,in0 - ;; - stf.spill.nta [loc2]=f44,-256 - stf.spill.nta [loc3]=f36,-256 - adds loc1=96*16-80-128,in0 - ;; - stf.spill.nta [loc0]=f123,-256 - stf.spill.nta [loc1]=f115,-256 - ;; - stf.spill.nta [loc0]=f107,-256 - stf.spill.nta [loc1]=f99,-256 - ;; - stf.spill.nta [loc0]=f91,-256 - stf.spill.nta [loc1]=f83,-256 - ;; - stf.spill.nta [loc0]=f75,-256 - stf.spill.nta [loc1]=f67,-256 - ;; - stf.spill.nta [loc0]=f59,-256 - stf.spill.nta [loc1]=f51,-256 - adds loc2=96*16-96,in0 - ;; - stf.spill.nta [loc0]=f43,-256 - stf.spill.nta [loc1]=f35,-256 - adds loc3=96*16-96-128,in0 - ;; - stf.spill.nta [loc2]=f122,-256 - stf.spill.nta [loc3]=f114,-256 - ;; - stf.spill.nta [loc2]=f106,-256 - stf.spill.nta [loc3]=f98,-256 - ;; - stf.spill.nta [loc2]=f90,-256 - stf.spill.nta [loc3]=f82,-256 - ;; - stf.spill.nta [loc2]=f74,-256 - stf.spill.nta [loc3]=f66,-256 - ;; - stf.spill.nta [loc2]=f58,-256 - stf.spill.nta [loc3]=f50,-256 - adds loc0=96*16-112,in0 - ;; - stf.spill.nta [loc2]=f42,-256 - stf.spill.nta [loc3]=f34,-256 - adds loc1=96*16-112-128,in0 - ;; - stf.spill.nta [loc0]=f121,-256 - stf.spill.nta [loc1]=f113,-256 - ;; - stf.spill.nta [loc0]=f105,-256 - stf.spill.nta [loc1]=f97,-256 - ;; - stf.spill.nta [loc0]=f89,-256 - stf.spill.nta [loc1]=f81,-256 - ;; - stf.spill.nta [loc0]=f73,-256 - stf.spill.nta [loc1]=f65,-256 - ;; - stf.spill.nta [loc0]=f57,-256 - stf.spill.nta [loc1]=f49,-256 - adds loc2=96*16-128,in0 - ;; - stf.spill.nta [loc0]=f41,-256 - stf.spill.nta [loc1]=f33,-256 - adds loc3=96*16-128-128,in0 - ;; - stf.spill.nta [loc2]=f120,-256 - stf.spill.nta [loc3]=f112,-256 - ;; - stf.spill.nta [loc2]=f104,-256 - stf.spill.nta [loc3]=f96,-256 - ;; - stf.spill.nta [loc2]=f88,-256 - stf.spill.nta [loc3]=f80,-256 - ;; - stf.spill.nta [loc2]=f72,-256 - stf.spill.nta [loc3]=f64,-256 - ;; - stf.spill.nta [loc2]=f56,-256 - stf.spill.nta [loc3]=f48,-256 - ;; - stf.spill.nta [loc2]=f40 - stf.spill.nta [loc3]=f32 - br.ret.sptk.many rp -END(__ia64_save_fpu) - -GLOBAL_ENTRY(__ia64_load_fpu) - alloc r2=ar.pfs,1,2,0,0 - adds r3=128,in0 - adds r14=256,in0 - adds r15=384,in0 - mov loc0=512 - mov loc1=-1024+16 - ;; - ldf.fill.nta f32=[in0],loc0 - ldf.fill.nta f40=[ r3],loc0 - ldf.fill.nta f48=[r14],loc0 - ldf.fill.nta f56=[r15],loc0 - ;; - ldf.fill.nta f64=[in0],loc0 - ldf.fill.nta f72=[ r3],loc0 - ldf.fill.nta f80=[r14],loc0 - ldf.fill.nta f88=[r15],loc0 - ;; - ldf.fill.nta f96=[in0],loc1 - ldf.fill.nta f104=[ r3],loc1 - ldf.fill.nta f112=[r14],loc1 - ldf.fill.nta f120=[r15],loc1 - ;; - ldf.fill.nta f33=[in0],loc0 - ldf.fill.nta f41=[ r3],loc0 - ldf.fill.nta f49=[r14],loc0 - ldf.fill.nta f57=[r15],loc0 - ;; - ldf.fill.nta f65=[in0],loc0 - ldf.fill.nta f73=[ r3],loc0 - ldf.fill.nta f81=[r14],loc0 - ldf.fill.nta f89=[r15],loc0 - ;; - ldf.fill.nta f97=[in0],loc1 - ldf.fill.nta f105=[ r3],loc1 - ldf.fill.nta f113=[r14],loc1 - ldf.fill.nta f121=[r15],loc1 - ;; - ldf.fill.nta f34=[in0],loc0 - ldf.fill.nta f42=[ r3],loc0 - ldf.fill.nta f50=[r14],loc0 - ldf.fill.nta f58=[r15],loc0 - ;; - ldf.fill.nta f66=[in0],loc0 - ldf.fill.nta f74=[ r3],loc0 - ldf.fill.nta f82=[r14],loc0 - ldf.fill.nta f90=[r15],loc0 - ;; - ldf.fill.nta f98=[in0],loc1 - ldf.fill.nta f106=[ r3],loc1 - ldf.fill.nta f114=[r14],loc1 - ldf.fill.nta f122=[r15],loc1 - ;; - ldf.fill.nta f35=[in0],loc0 - ldf.fill.nta f43=[ r3],loc0 - ldf.fill.nta f51=[r14],loc0 - ldf.fill.nta f59=[r15],loc0 - ;; - ldf.fill.nta f67=[in0],loc0 - ldf.fill.nta f75=[ r3],loc0 - ldf.fill.nta f83=[r14],loc0 - ldf.fill.nta f91=[r15],loc0 - ;; - ldf.fill.nta f99=[in0],loc1 - ldf.fill.nta f107=[ r3],loc1 - ldf.fill.nta f115=[r14],loc1 - ldf.fill.nta f123=[r15],loc1 - ;; - ldf.fill.nta f36=[in0],loc0 - ldf.fill.nta f44=[ r3],loc0 - ldf.fill.nta f52=[r14],loc0 - ldf.fill.nta f60=[r15],loc0 - ;; - ldf.fill.nta f68=[in0],loc0 - ldf.fill.nta f76=[ r3],loc0 - ldf.fill.nta f84=[r14],loc0 - ldf.fill.nta f92=[r15],loc0 - ;; - ldf.fill.nta f100=[in0],loc1 - ldf.fill.nta f108=[ r3],loc1 - ldf.fill.nta f116=[r14],loc1 - ldf.fill.nta f124=[r15],loc1 - ;; - ldf.fill.nta f37=[in0],loc0 - ldf.fill.nta f45=[ r3],loc0 - ldf.fill.nta f53=[r14],loc0 - ldf.fill.nta f61=[r15],loc0 - ;; - ldf.fill.nta f69=[in0],loc0 - ldf.fill.nta f77=[ r3],loc0 - ldf.fill.nta f85=[r14],loc0 - ldf.fill.nta f93=[r15],loc0 - ;; - ldf.fill.nta f101=[in0],loc1 - ldf.fill.nta f109=[ r3],loc1 - ldf.fill.nta f117=[r14],loc1 - ldf.fill.nta f125=[r15],loc1 - ;; - ldf.fill.nta f38 =[in0],loc0 - ldf.fill.nta f46 =[ r3],loc0 - ldf.fill.nta f54 =[r14],loc0 - ldf.fill.nta f62 =[r15],loc0 - ;; - ldf.fill.nta f70 =[in0],loc0 - ldf.fill.nta f78 =[ r3],loc0 - ldf.fill.nta f86 =[r14],loc0 - ldf.fill.nta f94 =[r15],loc0 - ;; - ldf.fill.nta f102=[in0],loc1 - ldf.fill.nta f110=[ r3],loc1 - ldf.fill.nta f118=[r14],loc1 - ldf.fill.nta f126=[r15],loc1 - ;; - ldf.fill.nta f39 =[in0],loc0 - ldf.fill.nta f47 =[ r3],loc0 - ldf.fill.nta f55 =[r14],loc0 - ldf.fill.nta f63 =[r15],loc0 - ;; - ldf.fill.nta f71 =[in0],loc0 - ldf.fill.nta f79 =[ r3],loc0 - ldf.fill.nta f87 =[r14],loc0 - ldf.fill.nta f95 =[r15],loc0 - ;; - ldf.fill.nta f103=[in0] - ldf.fill.nta f111=[ r3] - ldf.fill.nta f119=[r14] - ldf.fill.nta f127=[r15] - br.ret.sptk.many rp -END(__ia64_load_fpu) - -GLOBAL_ENTRY(__ia64_init_fpu) - stf.spill [sp]=f0 // M3 - mov f32=f0 // F - nop.b 0 - - ldfps f33,f34=[sp] // M0 - ldfps f35,f36=[sp] // M1 - mov f37=f0 // F - ;; - - setf.s f38=r0 // M2 - setf.s f39=r0 // M3 - mov f40=f0 // F - - ldfps f41,f42=[sp] // M0 - ldfps f43,f44=[sp] // M1 - mov f45=f0 // F - - setf.s f46=r0 // M2 - setf.s f47=r0 // M3 - mov f48=f0 // F - - ldfps f49,f50=[sp] // M0 - ldfps f51,f52=[sp] // M1 - mov f53=f0 // F - - setf.s f54=r0 // M2 - setf.s f55=r0 // M3 - mov f56=f0 // F - - ldfps f57,f58=[sp] // M0 - ldfps f59,f60=[sp] // M1 - mov f61=f0 // F - - setf.s f62=r0 // M2 - setf.s f63=r0 // M3 - mov f64=f0 // F - - ldfps f65,f66=[sp] // M0 - ldfps f67,f68=[sp] // M1 - mov f69=f0 // F - - setf.s f70=r0 // M2 - setf.s f71=r0 // M3 - mov f72=f0 // F - - ldfps f73,f74=[sp] // M0 - ldfps f75,f76=[sp] // M1 - mov f77=f0 // F - - setf.s f78=r0 // M2 - setf.s f79=r0 // M3 - mov f80=f0 // F - - ldfps f81,f82=[sp] // M0 - ldfps f83,f84=[sp] // M1 - mov f85=f0 // F - - setf.s f86=r0 // M2 - setf.s f87=r0 // M3 - mov f88=f0 // F - - /* - * When the instructions are cached, it would be faster to initialize - * the remaining registers with simply mov instructions (F-unit). - * This gets the time down to ~29 cycles. However, this would use up - * 33 bundles, whereas continuing with the above pattern yields - * 10 bundles and ~30 cycles. - */ - - ldfps f89,f90=[sp] // M0 - ldfps f91,f92=[sp] // M1 - mov f93=f0 // F - - setf.s f94=r0 // M2 - setf.s f95=r0 // M3 - mov f96=f0 // F - - ldfps f97,f98=[sp] // M0 - ldfps f99,f100=[sp] // M1 - mov f101=f0 // F - - setf.s f102=r0 // M2 - setf.s f103=r0 // M3 - mov f104=f0 // F - - ldfps f105,f106=[sp] // M0 - ldfps f107,f108=[sp] // M1 - mov f109=f0 // F - - setf.s f110=r0 // M2 - setf.s f111=r0 // M3 - mov f112=f0 // F - - ldfps f113,f114=[sp] // M0 - ldfps f115,f116=[sp] // M1 - mov f117=f0 // F - - setf.s f118=r0 // M2 - setf.s f119=r0 // M3 - mov f120=f0 // F - - ldfps f121,f122=[sp] // M0 - ldfps f123,f124=[sp] // M1 - mov f125=f0 // F - - setf.s f126=r0 // M2 - setf.s f127=r0 // M3 - br.ret.sptk.many rp // F -END(__ia64_init_fpu) - -/* - * Switch execution mode from virtual to physical - * - * Inputs: - * r16 = new psr to establish - * Output: - * r19 = old virtual address of ar.bsp - * r20 = old virtual address of sp - * - * Note: RSE must already be in enforced lazy mode - */ -GLOBAL_ENTRY(ia64_switch_mode_phys) - { - rsm psr.i | psr.ic // disable interrupts and interrupt collection - mov r15=ip - } - ;; - { - flushrs // must be first insn in group - srlz.i - } - ;; - mov cr.ipsr=r16 // set new PSR - add r3=1f-ia64_switch_mode_phys,r15 - - mov r19=ar.bsp - mov r20=sp - mov r14=rp // get return address into a general register - ;; - - // going to physical mode, use tpa to translate virt->phys - tpa r17=r19 - tpa r3=r3 - tpa sp=sp - tpa r14=r14 - ;; - - mov r18=ar.rnat // save ar.rnat - mov ar.bspstore=r17 // this steps on ar.rnat - mov cr.iip=r3 - mov cr.ifs=r0 - ;; - mov ar.rnat=r18 // restore ar.rnat - rfi // must be last insn in group - ;; -1: mov rp=r14 - br.ret.sptk.many rp -END(ia64_switch_mode_phys) - -/* - * Switch execution mode from physical to virtual - * - * Inputs: - * r16 = new psr to establish - * r19 = new bspstore to establish - * r20 = new sp to establish - * - * Note: RSE must already be in enforced lazy mode - */ -GLOBAL_ENTRY(ia64_switch_mode_virt) - { - rsm psr.i | psr.ic // disable interrupts and interrupt collection - mov r15=ip - } - ;; - { - flushrs // must be first insn in group - srlz.i - } - ;; - mov cr.ipsr=r16 // set new PSR - add r3=1f-ia64_switch_mode_virt,r15 - - mov r14=rp // get return address into a general register - ;; - - // going to virtual - // - for code addresses, set upper bits of addr to KERNEL_START - // - for stack addresses, copy from input argument - movl r18=KERNEL_START - dep r3=0,r3,KERNEL_TR_PAGE_SHIFT,64-KERNEL_TR_PAGE_SHIFT - dep r14=0,r14,KERNEL_TR_PAGE_SHIFT,64-KERNEL_TR_PAGE_SHIFT - mov sp=r20 - ;; - or r3=r3,r18 - or r14=r14,r18 - ;; - - mov r18=ar.rnat // save ar.rnat - mov ar.bspstore=r19 // this steps on ar.rnat - mov cr.iip=r3 - mov cr.ifs=r0 - ;; - mov ar.rnat=r18 // restore ar.rnat - rfi // must be last insn in group - ;; -1: mov rp=r14 - br.ret.sptk.many rp -END(ia64_switch_mode_virt) - -GLOBAL_ENTRY(ia64_delay_loop) - .prologue -{ nop 0 // work around GAS unwind info generation bug... - .save ar.lc,r2 - mov r2=ar.lc - .body - ;; - mov ar.lc=r32 -} - ;; - // force loop to be 32-byte aligned (GAS bug means we cannot use .align - // inside function body without corrupting unwind info). -{ nop 0 } -1: br.cloop.sptk.few 1b - ;; - mov ar.lc=r2 - br.ret.sptk.many rp -END(ia64_delay_loop) - -/* - * Return a CPU-local timestamp in nano-seconds. This timestamp is - * NOT synchronized across CPUs its return value must never be - * compared against the values returned on another CPU. The usage in - * kernel/sched/core.c ensures that. - * - * The return-value of sched_clock() is NOT supposed to wrap-around. - * If it did, it would cause some scheduling hiccups (at the worst). - * Fortunately, with a 64-bit cycle-counter ticking at 100GHz, even - * that would happen only once every 5+ years. - * - * The code below basically calculates: - * - * (ia64_get_itc() * local_cpu_data->nsec_per_cyc) >> IA64_NSEC_PER_CYC_SHIFT - * - * except that the multiplication and the shift are done with 128-bit - * intermediate precision so that we can produce a full 64-bit result. - */ -GLOBAL_ENTRY(ia64_native_sched_clock) - addl r8=THIS_CPU(ia64_cpu_info) + IA64_CPUINFO_NSEC_PER_CYC_OFFSET,r0 - mov.m r9=ar.itc // fetch cycle-counter (35 cyc) - ;; - ldf8 f8=[r8] - ;; - setf.sig f9=r9 // certain to stall, so issue it _after_ ldf8... - ;; - xmpy.lu f10=f9,f8 // calculate low 64 bits of 128-bit product (4 cyc) - xmpy.hu f11=f9,f8 // calculate high 64 bits of 128-bit product - ;; - getf.sig r8=f10 // (5 cyc) - getf.sig r9=f11 - ;; - shrp r8=r9,r8,IA64_NSEC_PER_CYC_SHIFT - br.ret.sptk.many rp -END(ia64_native_sched_clock) - -#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE -GLOBAL_ENTRY(cycle_to_nsec) - alloc r16=ar.pfs,1,0,0,0 - addl r8=THIS_CPU(ia64_cpu_info) + IA64_CPUINFO_NSEC_PER_CYC_OFFSET,r0 - ;; - ldf8 f8=[r8] - ;; - setf.sig f9=r32 - ;; - xmpy.lu f10=f9,f8 // calculate low 64 bits of 128-bit product (4 cyc) - xmpy.hu f11=f9,f8 // calculate high 64 bits of 128-bit product - ;; - getf.sig r8=f10 // (5 cyc) - getf.sig r9=f11 - ;; - shrp r8=r9,r8,IA64_NSEC_PER_CYC_SHIFT - br.ret.sptk.many rp -END(cycle_to_nsec) -#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ - -#ifdef CONFIG_IA64_BRL_EMU - -/* - * Assembly routines used by brl_emu.c to set preserved register state. - */ - -#define SET_REG(reg) \ - GLOBAL_ENTRY(ia64_set_##reg); \ - alloc r16=ar.pfs,1,0,0,0; \ - mov reg=r32; \ - ;; \ - br.ret.sptk.many rp; \ - END(ia64_set_##reg) - -SET_REG(b1); -SET_REG(b2); -SET_REG(b3); -SET_REG(b4); -SET_REG(b5); - -#endif /* CONFIG_IA64_BRL_EMU */ - -#ifdef CONFIG_SMP - -#ifdef CONFIG_HOTPLUG_CPU -GLOBAL_ENTRY(ia64_jump_to_sal) - alloc r16=ar.pfs,1,0,0,0;; - rsm psr.i | psr.ic -{ - flushrs - srlz.i -} - tpa r25=in0 - movl r18=tlb_purge_done;; - DATA_VA_TO_PA(r18);; - mov b1=r18 // Return location - movl r18=ia64_do_tlb_purge;; - DATA_VA_TO_PA(r18);; - mov b2=r18 // doing tlb_flush work - mov ar.rsc=0 // Put RSE in enforced lazy, LE mode - movl r17=1f;; - DATA_VA_TO_PA(r17);; - mov cr.iip=r17 - movl r16=SAL_PSR_BITS_TO_SET;; - mov cr.ipsr=r16 - mov cr.ifs=r0;; - rfi;; // note: this unmask MCA/INIT (psr.mc) -1: - /* - * Invalidate all TLB data/inst - */ - br.sptk.many b2;; // jump to tlb purge code - -tlb_purge_done: - RESTORE_REGION_REGS(r25, r17,r18,r19);; - RESTORE_REG(b0, r25, r17);; - RESTORE_REG(b1, r25, r17);; - RESTORE_REG(b2, r25, r17);; - RESTORE_REG(b3, r25, r17);; - RESTORE_REG(b4, r25, r17);; - RESTORE_REG(b5, r25, r17);; - ld8 r1=[r25],0x08;; - ld8 r12=[r25],0x08;; - ld8 r13=[r25],0x08;; - RESTORE_REG(ar.fpsr, r25, r17);; - RESTORE_REG(ar.pfs, r25, r17);; - RESTORE_REG(ar.rnat, r25, r17);; - RESTORE_REG(ar.unat, r25, r17);; - RESTORE_REG(ar.bspstore, r25, r17);; - RESTORE_REG(cr.dcr, r25, r17);; - RESTORE_REG(cr.iva, r25, r17);; - RESTORE_REG(cr.pta, r25, r17);; - srlz.d;; // required not to violate RAW dependency - RESTORE_REG(cr.itv, r25, r17);; - RESTORE_REG(cr.pmv, r25, r17);; - RESTORE_REG(cr.cmcv, r25, r17);; - RESTORE_REG(cr.lrr0, r25, r17);; - RESTORE_REG(cr.lrr1, r25, r17);; - ld8 r4=[r25],0x08;; - ld8 r5=[r25],0x08;; - ld8 r6=[r25],0x08;; - ld8 r7=[r25],0x08;; - ld8 r17=[r25],0x08;; - mov pr=r17,-1;; - RESTORE_REG(ar.lc, r25, r17);; - /* - * Now Restore floating point regs - */ - ldf.fill.nta f2=[r25],16;; - ldf.fill.nta f3=[r25],16;; - ldf.fill.nta f4=[r25],16;; - ldf.fill.nta f5=[r25],16;; - ldf.fill.nta f16=[r25],16;; - ldf.fill.nta f17=[r25],16;; - ldf.fill.nta f18=[r25],16;; - ldf.fill.nta f19=[r25],16;; - ldf.fill.nta f20=[r25],16;; - ldf.fill.nta f21=[r25],16;; - ldf.fill.nta f22=[r25],16;; - ldf.fill.nta f23=[r25],16;; - ldf.fill.nta f24=[r25],16;; - ldf.fill.nta f25=[r25],16;; - ldf.fill.nta f26=[r25],16;; - ldf.fill.nta f27=[r25],16;; - ldf.fill.nta f28=[r25],16;; - ldf.fill.nta f29=[r25],16;; - ldf.fill.nta f30=[r25],16;; - ldf.fill.nta f31=[r25],16;; - - /* - * Now that we have done all the register restores - * we are now ready for the big DIVE to SAL Land - */ - ssm psr.ic;; - srlz.d;; - br.ret.sptk.many b0;; -END(ia64_jump_to_sal) -#endif /* CONFIG_HOTPLUG_CPU */ - -#endif /* CONFIG_SMP */ diff --git a/arch/ia64/kernel/iosapic.c b/arch/ia64/kernel/iosapic.c deleted file mode 100644 index 99300850abc1..000000000000 --- a/arch/ia64/kernel/iosapic.c +++ /dev/null @@ -1,1137 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * I/O SAPIC support. - * - * Copyright (C) 1999 Intel Corp. - * Copyright (C) 1999 Asit Mallick - * Copyright (C) 2000-2002 J.I. Lee - * Copyright (C) 1999-2000, 2002-2003 Hewlett-Packard Co. - * David Mosberger-Tang - * Copyright (C) 1999 VA Linux Systems - * Copyright (C) 1999,2000 Walt Drummond - * - * 00/04/19 D. Mosberger Rewritten to mirror more closely the x86 I/O - * APIC code. In particular, we now have separate - * handlers for edge and level triggered - * interrupts. - * 00/10/27 Asit Mallick, Goutham Rao IRQ vector - * allocation PCI to vector mapping, shared PCI - * interrupts. - * 00/10/27 D. Mosberger Document things a bit more to make them more - * understandable. Clean up much of the old - * IOSAPIC cruft. - * 01/07/27 J.I. Lee PCI irq routing, Platform/Legacy interrupts - * and fixes for ACPI S5(SoftOff) support. - * 02/01/23 J.I. Lee iosapic pgm fixes for PCI irq routing from _PRT - * 02/01/07 E. Focht Redirectable interrupt - * vectors in iosapic_set_affinity(), - * initializations for /proc/irq/#/smp_affinity - * 02/04/02 P. Diefenbaugh Cleaned up ACPI PCI IRQ routing. - * 02/04/18 J.I. Lee bug fix in iosapic_init_pci_irq - * 02/04/30 J.I. Lee bug fix in find_iosapic to fix ACPI PCI IRQ to - * IOSAPIC mapping error - * 02/07/29 T. Kochi Allocate interrupt vectors dynamically - * 02/08/04 T. Kochi Cleaned up terminology (irq, global system - * interrupt, vector, etc.) - * 02/09/20 D. Mosberger Simplified by taking advantage of ACPI's - * pci_irq code. - * 03/02/19 B. Helgaas Make pcat_compat system-wide, not per-IOSAPIC. - * Remove iosapic_address & gsi_base from - * external interfaces. Rationalize - * __init/__devinit attributes. - * 04/12/04 Ashok Raj Intel Corporation 2004 - * Updated to work with irq migration necessary - * for CPU Hotplug - */ -/* - * Here is what the interrupt logic between a PCI device and the kernel looks - * like: - * - * (1) A PCI device raises one of the four interrupt pins (INTA, INTB, INTC, - * INTD). The device is uniquely identified by its bus-, and slot-number - * (the function number does not matter here because all functions share - * the same interrupt lines). - * - * (2) The motherboard routes the interrupt line to a pin on a IOSAPIC - * controller. Multiple interrupt lines may have to share the same - * IOSAPIC pin (if they're level triggered and use the same polarity). - * Each interrupt line has a unique Global System Interrupt (GSI) number - * which can be calculated as the sum of the controller's base GSI number - * and the IOSAPIC pin number to which the line connects. - * - * (3) The IOSAPIC uses an internal routing table entries (RTEs) to map the - * IOSAPIC pin into the IA-64 interrupt vector. This interrupt vector is then - * sent to the CPU. - * - * (4) The kernel recognizes an interrupt as an IRQ. The IRQ interface is - * used as architecture-independent interrupt handling mechanism in Linux. - * As an IRQ is a number, we have to have - * IA-64 interrupt vector number <-> IRQ number mapping. On smaller - * systems, we use one-to-one mapping between IA-64 vector and IRQ. - * - * To sum up, there are three levels of mappings involved: - * - * PCI pin -> global system interrupt (GSI) -> IA-64 vector <-> IRQ - * - * Note: The term "IRQ" is loosely used everywhere in Linux kernel to - * describe interrupts. Now we use "IRQ" only for Linux IRQ's. ISA IRQ - * (isa_irq) is the only exception in this source code. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include - -#undef DEBUG_INTERRUPT_ROUTING - -#ifdef DEBUG_INTERRUPT_ROUTING -#define DBG(fmt...) printk(fmt) -#else -#define DBG(fmt...) -#endif - -static DEFINE_SPINLOCK(iosapic_lock); - -/* - * These tables map IA-64 vectors to the IOSAPIC pin that generates this - * vector. - */ - -#define NO_REF_RTE 0 - -static struct iosapic { - char __iomem *addr; /* base address of IOSAPIC */ - unsigned int gsi_base; /* GSI base */ - unsigned short num_rte; /* # of RTEs on this IOSAPIC */ - int rtes_inuse; /* # of RTEs in use on this IOSAPIC */ -#ifdef CONFIG_NUMA - unsigned short node; /* numa node association via pxm */ -#endif - spinlock_t lock; /* lock for indirect reg access */ -} iosapic_lists[NR_IOSAPICS]; - -struct iosapic_rte_info { - struct list_head rte_list; /* RTEs sharing the same vector */ - char rte_index; /* IOSAPIC RTE index */ - int refcnt; /* reference counter */ - struct iosapic *iosapic; -} ____cacheline_aligned; - -static struct iosapic_intr_info { - struct list_head rtes; /* RTEs using this vector (empty => - * not an IOSAPIC interrupt) */ - int count; /* # of registered RTEs */ - u32 low32; /* current value of low word of - * Redirection table entry */ - unsigned int dest; /* destination CPU physical ID */ - unsigned char dmode : 3; /* delivery mode (see iosapic.h) */ - unsigned char polarity: 1; /* interrupt polarity - * (see iosapic.h) */ - unsigned char trigger : 1; /* trigger mode (see iosapic.h) */ -} iosapic_intr_info[NR_IRQS]; - -static unsigned char pcat_compat; /* 8259 compatibility flag */ - -static inline void -iosapic_write(struct iosapic *iosapic, unsigned int reg, u32 val) -{ - unsigned long flags; - - spin_lock_irqsave(&iosapic->lock, flags); - __iosapic_write(iosapic->addr, reg, val); - spin_unlock_irqrestore(&iosapic->lock, flags); -} - -/* - * Find an IOSAPIC associated with a GSI - */ -static inline int -find_iosapic (unsigned int gsi) -{ - int i; - - for (i = 0; i < NR_IOSAPICS; i++) { - if ((unsigned) (gsi - iosapic_lists[i].gsi_base) < - iosapic_lists[i].num_rte) - return i; - } - - return -1; -} - -static inline int __gsi_to_irq(unsigned int gsi) -{ - int irq; - struct iosapic_intr_info *info; - struct iosapic_rte_info *rte; - - for (irq = 0; irq < NR_IRQS; irq++) { - info = &iosapic_intr_info[irq]; - list_for_each_entry(rte, &info->rtes, rte_list) - if (rte->iosapic->gsi_base + rte->rte_index == gsi) - return irq; - } - return -1; -} - -int -gsi_to_irq (unsigned int gsi) -{ - unsigned long flags; - int irq; - - spin_lock_irqsave(&iosapic_lock, flags); - irq = __gsi_to_irq(gsi); - spin_unlock_irqrestore(&iosapic_lock, flags); - return irq; -} - -static struct iosapic_rte_info *find_rte(unsigned int irq, unsigned int gsi) -{ - struct iosapic_rte_info *rte; - - list_for_each_entry(rte, &iosapic_intr_info[irq].rtes, rte_list) - if (rte->iosapic->gsi_base + rte->rte_index == gsi) - return rte; - return NULL; -} - -static void -set_rte (unsigned int gsi, unsigned int irq, unsigned int dest, int mask) -{ - unsigned long pol, trigger, dmode; - u32 low32, high32; - int rte_index; - char redir; - struct iosapic_rte_info *rte; - ia64_vector vector = irq_to_vector(irq); - - DBG(KERN_DEBUG"IOSAPIC: routing vector %d to 0x%x\n", vector, dest); - - rte = find_rte(irq, gsi); - if (!rte) - return; /* not an IOSAPIC interrupt */ - - rte_index = rte->rte_index; - pol = iosapic_intr_info[irq].polarity; - trigger = iosapic_intr_info[irq].trigger; - dmode = iosapic_intr_info[irq].dmode; - - redir = (dmode == IOSAPIC_LOWEST_PRIORITY) ? 1 : 0; - -#ifdef CONFIG_SMP - set_irq_affinity_info(irq, (int)(dest & 0xffff), redir); -#endif - - low32 = ((pol << IOSAPIC_POLARITY_SHIFT) | - (trigger << IOSAPIC_TRIGGER_SHIFT) | - (dmode << IOSAPIC_DELIVERY_SHIFT) | - ((mask ? 1 : 0) << IOSAPIC_MASK_SHIFT) | - vector); - - /* dest contains both id and eid */ - high32 = (dest << IOSAPIC_DEST_SHIFT); - - iosapic_write(rte->iosapic, IOSAPIC_RTE_HIGH(rte_index), high32); - iosapic_write(rte->iosapic, IOSAPIC_RTE_LOW(rte_index), low32); - iosapic_intr_info[irq].low32 = low32; - iosapic_intr_info[irq].dest = dest; -} - -static void -iosapic_nop (struct irq_data *data) -{ - /* do nothing... */ -} - - -#ifdef CONFIG_KEXEC -void -kexec_disable_iosapic(void) -{ - struct iosapic_intr_info *info; - struct iosapic_rte_info *rte; - ia64_vector vec; - int irq; - - for (irq = 0; irq < NR_IRQS; irq++) { - info = &iosapic_intr_info[irq]; - vec = irq_to_vector(irq); - list_for_each_entry(rte, &info->rtes, - rte_list) { - iosapic_write(rte->iosapic, - IOSAPIC_RTE_LOW(rte->rte_index), - IOSAPIC_MASK|vec); - iosapic_eoi(rte->iosapic->addr, vec); - } - } -} -#endif - -static void -mask_irq (struct irq_data *data) -{ - unsigned int irq = data->irq; - u32 low32; - int rte_index; - struct iosapic_rte_info *rte; - - if (!iosapic_intr_info[irq].count) - return; /* not an IOSAPIC interrupt! */ - - /* set only the mask bit */ - low32 = iosapic_intr_info[irq].low32 |= IOSAPIC_MASK; - list_for_each_entry(rte, &iosapic_intr_info[irq].rtes, rte_list) { - rte_index = rte->rte_index; - iosapic_write(rte->iosapic, IOSAPIC_RTE_LOW(rte_index), low32); - } -} - -static void -unmask_irq (struct irq_data *data) -{ - unsigned int irq = data->irq; - u32 low32; - int rte_index; - struct iosapic_rte_info *rte; - - if (!iosapic_intr_info[irq].count) - return; /* not an IOSAPIC interrupt! */ - - low32 = iosapic_intr_info[irq].low32 &= ~IOSAPIC_MASK; - list_for_each_entry(rte, &iosapic_intr_info[irq].rtes, rte_list) { - rte_index = rte->rte_index; - iosapic_write(rte->iosapic, IOSAPIC_RTE_LOW(rte_index), low32); - } -} - - -static int -iosapic_set_affinity(struct irq_data *data, const struct cpumask *mask, - bool force) -{ -#ifdef CONFIG_SMP - unsigned int irq = data->irq; - u32 high32, low32; - int cpu, dest, rte_index; - int redir = (irq & IA64_IRQ_REDIRECTED) ? 1 : 0; - struct iosapic_rte_info *rte; - struct iosapic *iosapic; - - irq &= (~IA64_IRQ_REDIRECTED); - - cpu = cpumask_first_and(cpu_online_mask, mask); - if (cpu >= nr_cpu_ids) - return -1; - - if (irq_prepare_move(irq, cpu)) - return -1; - - dest = cpu_physical_id(cpu); - - if (!iosapic_intr_info[irq].count) - return -1; /* not an IOSAPIC interrupt */ - - set_irq_affinity_info(irq, dest, redir); - - /* dest contains both id and eid */ - high32 = dest << IOSAPIC_DEST_SHIFT; - - low32 = iosapic_intr_info[irq].low32 & ~(7 << IOSAPIC_DELIVERY_SHIFT); - if (redir) - /* change delivery mode to lowest priority */ - low32 |= (IOSAPIC_LOWEST_PRIORITY << IOSAPIC_DELIVERY_SHIFT); - else - /* change delivery mode to fixed */ - low32 |= (IOSAPIC_FIXED << IOSAPIC_DELIVERY_SHIFT); - low32 &= IOSAPIC_VECTOR_MASK; - low32 |= irq_to_vector(irq); - - iosapic_intr_info[irq].low32 = low32; - iosapic_intr_info[irq].dest = dest; - list_for_each_entry(rte, &iosapic_intr_info[irq].rtes, rte_list) { - iosapic = rte->iosapic; - rte_index = rte->rte_index; - iosapic_write(iosapic, IOSAPIC_RTE_HIGH(rte_index), high32); - iosapic_write(iosapic, IOSAPIC_RTE_LOW(rte_index), low32); - } - -#endif - return 0; -} - -/* - * Handlers for level-triggered interrupts. - */ - -static unsigned int -iosapic_startup_level_irq (struct irq_data *data) -{ - unmask_irq(data); - return 0; -} - -static void -iosapic_unmask_level_irq (struct irq_data *data) -{ - unsigned int irq = data->irq; - ia64_vector vec = irq_to_vector(irq); - struct iosapic_rte_info *rte; - int do_unmask_irq = 0; - - irq_complete_move(irq); - if (unlikely(irqd_is_setaffinity_pending(data))) { - do_unmask_irq = 1; - mask_irq(data); - } else - unmask_irq(data); - - list_for_each_entry(rte, &iosapic_intr_info[irq].rtes, rte_list) - iosapic_eoi(rte->iosapic->addr, vec); - - if (unlikely(do_unmask_irq)) { - irq_move_masked_irq(data); - unmask_irq(data); - } -} - -#define iosapic_shutdown_level_irq mask_irq -#define iosapic_enable_level_irq unmask_irq -#define iosapic_disable_level_irq mask_irq -#define iosapic_ack_level_irq iosapic_nop - -static struct irq_chip irq_type_iosapic_level = { - .name = "IO-SAPIC-level", - .irq_startup = iosapic_startup_level_irq, - .irq_shutdown = iosapic_shutdown_level_irq, - .irq_enable = iosapic_enable_level_irq, - .irq_disable = iosapic_disable_level_irq, - .irq_ack = iosapic_ack_level_irq, - .irq_mask = mask_irq, - .irq_unmask = iosapic_unmask_level_irq, - .irq_set_affinity = iosapic_set_affinity -}; - -/* - * Handlers for edge-triggered interrupts. - */ - -static unsigned int -iosapic_startup_edge_irq (struct irq_data *data) -{ - unmask_irq(data); - /* - * IOSAPIC simply drops interrupts pended while the - * corresponding pin was masked, so we can't know if an - * interrupt is pending already. Let's hope not... - */ - return 0; -} - -static void -iosapic_ack_edge_irq (struct irq_data *data) -{ - irq_complete_move(data->irq); - irq_move_irq(data); -} - -#define iosapic_enable_edge_irq unmask_irq -#define iosapic_disable_edge_irq iosapic_nop - -static struct irq_chip irq_type_iosapic_edge = { - .name = "IO-SAPIC-edge", - .irq_startup = iosapic_startup_edge_irq, - .irq_shutdown = iosapic_disable_edge_irq, - .irq_enable = iosapic_enable_edge_irq, - .irq_disable = iosapic_disable_edge_irq, - .irq_ack = iosapic_ack_edge_irq, - .irq_mask = mask_irq, - .irq_unmask = unmask_irq, - .irq_set_affinity = iosapic_set_affinity -}; - -static unsigned int -iosapic_version (char __iomem *addr) -{ - /* - * IOSAPIC Version Register return 32 bit structure like: - * { - * unsigned int version : 8; - * unsigned int reserved1 : 8; - * unsigned int max_redir : 8; - * unsigned int reserved2 : 8; - * } - */ - return __iosapic_read(addr, IOSAPIC_VERSION); -} - -static int iosapic_find_sharable_irq(unsigned long trigger, unsigned long pol) -{ - int i, irq = -ENOSPC, min_count = -1; - struct iosapic_intr_info *info; - - /* - * shared vectors for edge-triggered interrupts are not - * supported yet - */ - if (trigger == IOSAPIC_EDGE) - return -EINVAL; - - for (i = 0; i < NR_IRQS; i++) { - info = &iosapic_intr_info[i]; - if (info->trigger == trigger && info->polarity == pol && - (info->dmode == IOSAPIC_FIXED || - info->dmode == IOSAPIC_LOWEST_PRIORITY) && - can_request_irq(i, IRQF_SHARED)) { - if (min_count == -1 || info->count < min_count) { - irq = i; - min_count = info->count; - } - } - } - return irq; -} - -/* - * if the given vector is already owned by other, - * assign a new vector for the other and make the vector available - */ -static void __init -iosapic_reassign_vector (int irq) -{ - int new_irq; - - if (iosapic_intr_info[irq].count) { - new_irq = create_irq(); - if (new_irq < 0) - panic("%s: out of interrupt vectors!\n", __func__); - printk(KERN_INFO "Reassigning vector %d to %d\n", - irq_to_vector(irq), irq_to_vector(new_irq)); - memcpy(&iosapic_intr_info[new_irq], &iosapic_intr_info[irq], - sizeof(struct iosapic_intr_info)); - INIT_LIST_HEAD(&iosapic_intr_info[new_irq].rtes); - list_move(iosapic_intr_info[irq].rtes.next, - &iosapic_intr_info[new_irq].rtes); - memset(&iosapic_intr_info[irq], 0, - sizeof(struct iosapic_intr_info)); - iosapic_intr_info[irq].low32 = IOSAPIC_MASK; - INIT_LIST_HEAD(&iosapic_intr_info[irq].rtes); - } -} - -static inline int irq_is_shared (int irq) -{ - return (iosapic_intr_info[irq].count > 1); -} - -struct irq_chip* -ia64_native_iosapic_get_irq_chip(unsigned long trigger) -{ - if (trigger == IOSAPIC_EDGE) - return &irq_type_iosapic_edge; - else - return &irq_type_iosapic_level; -} - -static int -register_intr (unsigned int gsi, int irq, unsigned char delivery, - unsigned long polarity, unsigned long trigger) -{ - struct irq_chip *chip, *irq_type; - int index; - struct iosapic_rte_info *rte; - - index = find_iosapic(gsi); - if (index < 0) { - printk(KERN_WARNING "%s: No IOSAPIC for GSI %u\n", - __func__, gsi); - return -ENODEV; - } - - rte = find_rte(irq, gsi); - if (!rte) { - rte = kzalloc(sizeof (*rte), GFP_ATOMIC); - if (!rte) { - printk(KERN_WARNING "%s: cannot allocate memory\n", - __func__); - return -ENOMEM; - } - - rte->iosapic = &iosapic_lists[index]; - rte->rte_index = gsi - rte->iosapic->gsi_base; - rte->refcnt++; - list_add_tail(&rte->rte_list, &iosapic_intr_info[irq].rtes); - iosapic_intr_info[irq].count++; - iosapic_lists[index].rtes_inuse++; - } - else if (rte->refcnt == NO_REF_RTE) { - struct iosapic_intr_info *info = &iosapic_intr_info[irq]; - if (info->count > 0 && - (info->trigger != trigger || info->polarity != polarity)){ - printk (KERN_WARNING - "%s: cannot override the interrupt\n", - __func__); - return -EINVAL; - } - rte->refcnt++; - iosapic_intr_info[irq].count++; - iosapic_lists[index].rtes_inuse++; - } - - iosapic_intr_info[irq].polarity = polarity; - iosapic_intr_info[irq].dmode = delivery; - iosapic_intr_info[irq].trigger = trigger; - - irq_type = iosapic_get_irq_chip(trigger); - - chip = irq_get_chip(irq); - if (irq_type != NULL && chip != irq_type) { - if (chip != &no_irq_chip) - printk(KERN_WARNING - "%s: changing vector %d from %s to %s\n", - __func__, irq_to_vector(irq), - chip->name, irq_type->name); - chip = irq_type; - } - irq_set_chip_handler_name_locked(irq_get_irq_data(irq), chip, - trigger == IOSAPIC_EDGE ? handle_edge_irq : handle_level_irq, - NULL); - return 0; -} - -static unsigned int -get_target_cpu (unsigned int gsi, int irq) -{ -#ifdef CONFIG_SMP - static int cpu = -1; - extern int cpe_vector; - cpumask_t domain = irq_to_domain(irq); - - /* - * In case of vector shared by multiple RTEs, all RTEs that - * share the vector need to use the same destination CPU. - */ - if (iosapic_intr_info[irq].count) - return iosapic_intr_info[irq].dest; - - /* - * If the platform supports redirection via XTP, let it - * distribute interrupts. - */ - if (smp_int_redirect & SMP_IRQ_REDIRECTION) - return cpu_physical_id(smp_processor_id()); - - /* - * Some interrupts (ACPI SCI, for instance) are registered - * before the BSP is marked as online. - */ - if (!cpu_online(smp_processor_id())) - return cpu_physical_id(smp_processor_id()); - - if (cpe_vector > 0 && irq_to_vector(irq) == IA64_CPEP_VECTOR) - return get_cpei_target_cpu(); - -#ifdef CONFIG_NUMA - { - int num_cpus, cpu_index, iosapic_index, numa_cpu, i = 0; - const struct cpumask *cpu_mask; - - iosapic_index = find_iosapic(gsi); - if (iosapic_index < 0 || - iosapic_lists[iosapic_index].node == MAX_NUMNODES) - goto skip_numa_setup; - - cpu_mask = cpumask_of_node(iosapic_lists[iosapic_index].node); - num_cpus = 0; - for_each_cpu_and(numa_cpu, cpu_mask, &domain) { - if (cpu_online(numa_cpu)) - num_cpus++; - } - - if (!num_cpus) - goto skip_numa_setup; - - /* Use irq assignment to distribute across cpus in node */ - cpu_index = irq % num_cpus; - - for_each_cpu_and(numa_cpu, cpu_mask, &domain) - if (cpu_online(numa_cpu) && i++ >= cpu_index) - break; - - if (numa_cpu < nr_cpu_ids) - return cpu_physical_id(numa_cpu); - } -skip_numa_setup: -#endif - /* - * Otherwise, round-robin interrupt vectors across all the - * processors. (It'd be nice if we could be smarter in the - * case of NUMA.) - */ - do { - if (++cpu >= nr_cpu_ids) - cpu = 0; - } while (!cpu_online(cpu) || !cpumask_test_cpu(cpu, &domain)); - - return cpu_physical_id(cpu); -#else /* CONFIG_SMP */ - return cpu_physical_id(smp_processor_id()); -#endif -} - -static inline unsigned char choose_dmode(void) -{ -#ifdef CONFIG_SMP - if (smp_int_redirect & SMP_IRQ_REDIRECTION) - return IOSAPIC_LOWEST_PRIORITY; -#endif - return IOSAPIC_FIXED; -} - -/* - * ACPI can describe IOSAPIC interrupts via static tables and namespace - * methods. This provides an interface to register those interrupts and - * program the IOSAPIC RTE. - */ -int -iosapic_register_intr (unsigned int gsi, - unsigned long polarity, unsigned long trigger) -{ - int irq, mask = 1, err; - unsigned int dest; - unsigned long flags; - struct iosapic_rte_info *rte; - u32 low32; - unsigned char dmode; - struct irq_desc *desc; - - /* - * If this GSI has already been registered (i.e., it's a - * shared interrupt, or we lost a race to register it), - * don't touch the RTE. - */ - spin_lock_irqsave(&iosapic_lock, flags); - irq = __gsi_to_irq(gsi); - if (irq > 0) { - rte = find_rte(irq, gsi); - if(iosapic_intr_info[irq].count == 0) { - assign_irq_vector(irq); - irq_init_desc(irq); - } else if (rte->refcnt != NO_REF_RTE) { - rte->refcnt++; - goto unlock_iosapic_lock; - } - } else - irq = create_irq(); - - /* If vector is running out, we try to find a sharable vector */ - if (irq < 0) { - irq = iosapic_find_sharable_irq(trigger, polarity); - if (irq < 0) - goto unlock_iosapic_lock; - } - - desc = irq_to_desc(irq); - raw_spin_lock(&desc->lock); - dest = get_target_cpu(gsi, irq); - dmode = choose_dmode(); - err = register_intr(gsi, irq, dmode, polarity, trigger); - if (err < 0) { - raw_spin_unlock(&desc->lock); - irq = err; - goto unlock_iosapic_lock; - } - - /* - * If the vector is shared and already unmasked for other - * interrupt sources, don't mask it. - */ - low32 = iosapic_intr_info[irq].low32; - if (irq_is_shared(irq) && !(low32 & IOSAPIC_MASK)) - mask = 0; - set_rte(gsi, irq, dest, mask); - - printk(KERN_INFO "GSI %u (%s, %s) -> CPU %d (0x%04x) vector %d\n", - gsi, (trigger == IOSAPIC_EDGE ? "edge" : "level"), - (polarity == IOSAPIC_POL_HIGH ? "high" : "low"), - cpu_logical_id(dest), dest, irq_to_vector(irq)); - - raw_spin_unlock(&desc->lock); - unlock_iosapic_lock: - spin_unlock_irqrestore(&iosapic_lock, flags); - return irq; -} - -void -iosapic_unregister_intr (unsigned int gsi) -{ - unsigned long flags; - int irq, index; - u32 low32; - unsigned long trigger, polarity; - unsigned int dest; - struct iosapic_rte_info *rte; - - /* - * If the irq associated with the gsi is not found, - * iosapic_unregister_intr() is unbalanced. We need to check - * this again after getting locks. - */ - irq = gsi_to_irq(gsi); - if (irq < 0) { - printk(KERN_ERR "iosapic_unregister_intr(%u) unbalanced\n", - gsi); - WARN_ON(1); - return; - } - - spin_lock_irqsave(&iosapic_lock, flags); - if ((rte = find_rte(irq, gsi)) == NULL) { - printk(KERN_ERR "iosapic_unregister_intr(%u) unbalanced\n", - gsi); - WARN_ON(1); - goto out; - } - - if (--rte->refcnt > 0) - goto out; - - rte->refcnt = NO_REF_RTE; - - /* Mask the interrupt */ - low32 = iosapic_intr_info[irq].low32 | IOSAPIC_MASK; - iosapic_write(rte->iosapic, IOSAPIC_RTE_LOW(rte->rte_index), low32); - - iosapic_intr_info[irq].count--; - index = find_iosapic(gsi); - iosapic_lists[index].rtes_inuse--; - WARN_ON(iosapic_lists[index].rtes_inuse < 0); - - trigger = iosapic_intr_info[irq].trigger; - polarity = iosapic_intr_info[irq].polarity; - dest = iosapic_intr_info[irq].dest; - printk(KERN_INFO - "GSI %u (%s, %s) -> CPU %d (0x%04x) vector %d unregistered\n", - gsi, (trigger == IOSAPIC_EDGE ? "edge" : "level"), - (polarity == IOSAPIC_POL_HIGH ? "high" : "low"), - cpu_logical_id(dest), dest, irq_to_vector(irq)); - - if (iosapic_intr_info[irq].count == 0) { -#ifdef CONFIG_SMP - /* Clear affinity */ - irq_data_update_affinity(irq_get_irq_data(irq), cpu_all_mask); -#endif - /* Clear the interrupt information */ - iosapic_intr_info[irq].dest = 0; - iosapic_intr_info[irq].dmode = 0; - iosapic_intr_info[irq].polarity = 0; - iosapic_intr_info[irq].trigger = 0; - iosapic_intr_info[irq].low32 |= IOSAPIC_MASK; - - /* Destroy and reserve IRQ */ - destroy_and_reserve_irq(irq); - } - out: - spin_unlock_irqrestore(&iosapic_lock, flags); -} - -/* - * ACPI calls this when it finds an entry for a platform interrupt. - */ -int __init -iosapic_register_platform_intr (u32 int_type, unsigned int gsi, - int iosapic_vector, u16 eid, u16 id, - unsigned long polarity, unsigned long trigger) -{ - static const char * const name[] = {"unknown", "PMI", "INIT", "CPEI"}; - unsigned char delivery; - int irq, vector, mask = 0; - unsigned int dest = ((id << 8) | eid) & 0xffff; - - switch (int_type) { - case ACPI_INTERRUPT_PMI: - irq = vector = iosapic_vector; - bind_irq_vector(irq, vector, CPU_MASK_ALL); - /* - * since PMI vector is alloc'd by FW(ACPI) not by kernel, - * we need to make sure the vector is available - */ - iosapic_reassign_vector(irq); - delivery = IOSAPIC_PMI; - break; - case ACPI_INTERRUPT_INIT: - irq = create_irq(); - if (irq < 0) - panic("%s: out of interrupt vectors!\n", __func__); - vector = irq_to_vector(irq); - delivery = IOSAPIC_INIT; - break; - case ACPI_INTERRUPT_CPEI: - irq = vector = IA64_CPE_VECTOR; - BUG_ON(bind_irq_vector(irq, vector, CPU_MASK_ALL)); - delivery = IOSAPIC_FIXED; - mask = 1; - break; - default: - printk(KERN_ERR "%s: invalid int type 0x%x\n", __func__, - int_type); - return -1; - } - - register_intr(gsi, irq, delivery, polarity, trigger); - - printk(KERN_INFO - "PLATFORM int %s (0x%x): GSI %u (%s, %s) -> CPU %d (0x%04x)" - " vector %d\n", - int_type < ARRAY_SIZE(name) ? name[int_type] : "unknown", - int_type, gsi, (trigger == IOSAPIC_EDGE ? "edge" : "level"), - (polarity == IOSAPIC_POL_HIGH ? "high" : "low"), - cpu_logical_id(dest), dest, vector); - - set_rte(gsi, irq, dest, mask); - return vector; -} - -/* - * ACPI calls this when it finds an entry for a legacy ISA IRQ override. - */ -void iosapic_override_isa_irq(unsigned int isa_irq, unsigned int gsi, - unsigned long polarity, unsigned long trigger) -{ - int vector, irq; - unsigned int dest = cpu_physical_id(smp_processor_id()); - unsigned char dmode; - - irq = vector = isa_irq_to_vector(isa_irq); - BUG_ON(bind_irq_vector(irq, vector, CPU_MASK_ALL)); - dmode = choose_dmode(); - register_intr(gsi, irq, dmode, polarity, trigger); - - DBG("ISA: IRQ %u -> GSI %u (%s,%s) -> CPU %d (0x%04x) vector %d\n", - isa_irq, gsi, trigger == IOSAPIC_EDGE ? "edge" : "level", - polarity == IOSAPIC_POL_HIGH ? "high" : "low", - cpu_logical_id(dest), dest, vector); - - set_rte(gsi, irq, dest, 1); -} - -void __init -ia64_native_iosapic_pcat_compat_init(void) -{ - if (pcat_compat) { - /* - * Disable the compatibility mode interrupts (8259 style), - * needs IN/OUT support enabled. - */ - printk(KERN_INFO - "%s: Disabling PC-AT compatible 8259 interrupts\n", - __func__); - outb(0xff, 0xA1); - outb(0xff, 0x21); - } -} - -void __init -iosapic_system_init (int system_pcat_compat) -{ - int irq; - - for (irq = 0; irq < NR_IRQS; ++irq) { - iosapic_intr_info[irq].low32 = IOSAPIC_MASK; - /* mark as unused */ - INIT_LIST_HEAD(&iosapic_intr_info[irq].rtes); - - iosapic_intr_info[irq].count = 0; - } - - pcat_compat = system_pcat_compat; - if (pcat_compat) - iosapic_pcat_compat_init(); -} - -static inline int -iosapic_alloc (void) -{ - int index; - - for (index = 0; index < NR_IOSAPICS; index++) - if (!iosapic_lists[index].addr) - return index; - - printk(KERN_WARNING "%s: failed to allocate iosapic\n", __func__); - return -1; -} - -static inline void -iosapic_free (int index) -{ - memset(&iosapic_lists[index], 0, sizeof(iosapic_lists[0])); -} - -static inline int -iosapic_check_gsi_range (unsigned int gsi_base, unsigned int ver) -{ - int index; - unsigned int gsi_end, base, end; - - /* check gsi range */ - gsi_end = gsi_base + ((ver >> 16) & 0xff); - for (index = 0; index < NR_IOSAPICS; index++) { - if (!iosapic_lists[index].addr) - continue; - - base = iosapic_lists[index].gsi_base; - end = base + iosapic_lists[index].num_rte - 1; - - if (gsi_end < base || end < gsi_base) - continue; /* OK */ - - return -EBUSY; - } - return 0; -} - -static int -iosapic_delete_rte(unsigned int irq, unsigned int gsi) -{ - struct iosapic_rte_info *rte, *temp; - - list_for_each_entry_safe(rte, temp, &iosapic_intr_info[irq].rtes, - rte_list) { - if (rte->iosapic->gsi_base + rte->rte_index == gsi) { - if (rte->refcnt) - return -EBUSY; - - list_del(&rte->rte_list); - kfree(rte); - return 0; - } - } - - return -EINVAL; -} - -int iosapic_init(unsigned long phys_addr, unsigned int gsi_base) -{ - int num_rte, err, index; - unsigned int isa_irq, ver; - char __iomem *addr; - unsigned long flags; - - spin_lock_irqsave(&iosapic_lock, flags); - index = find_iosapic(gsi_base); - if (index >= 0) { - spin_unlock_irqrestore(&iosapic_lock, flags); - return -EBUSY; - } - - addr = ioremap(phys_addr, 0); - if (addr == NULL) { - spin_unlock_irqrestore(&iosapic_lock, flags); - return -ENOMEM; - } - ver = iosapic_version(addr); - if ((err = iosapic_check_gsi_range(gsi_base, ver))) { - iounmap(addr); - spin_unlock_irqrestore(&iosapic_lock, flags); - return err; - } - - /* - * The MAX_REDIR register holds the highest input pin number - * (starting from 0). We add 1 so that we can use it for - * number of pins (= RTEs) - */ - num_rte = ((ver >> 16) & 0xff) + 1; - - index = iosapic_alloc(); - iosapic_lists[index].addr = addr; - iosapic_lists[index].gsi_base = gsi_base; - iosapic_lists[index].num_rte = num_rte; -#ifdef CONFIG_NUMA - iosapic_lists[index].node = MAX_NUMNODES; -#endif - spin_lock_init(&iosapic_lists[index].lock); - spin_unlock_irqrestore(&iosapic_lock, flags); - - if ((gsi_base == 0) && pcat_compat) { - /* - * Map the legacy ISA devices into the IOSAPIC data. Some of - * these may get reprogrammed later on with data from the ACPI - * Interrupt Source Override table. - */ - for (isa_irq = 0; isa_irq < 16; ++isa_irq) - iosapic_override_isa_irq(isa_irq, isa_irq, - IOSAPIC_POL_HIGH, - IOSAPIC_EDGE); - } - return 0; -} - -int iosapic_remove(unsigned int gsi_base) -{ - int i, irq, index, err = 0; - unsigned long flags; - - spin_lock_irqsave(&iosapic_lock, flags); - index = find_iosapic(gsi_base); - if (index < 0) { - printk(KERN_WARNING "%s: No IOSAPIC for GSI base %u\n", - __func__, gsi_base); - goto out; - } - - if (iosapic_lists[index].rtes_inuse) { - err = -EBUSY; - printk(KERN_WARNING "%s: IOSAPIC for GSI base %u is busy\n", - __func__, gsi_base); - goto out; - } - - for (i = gsi_base; i < gsi_base + iosapic_lists[index].num_rte; i++) { - irq = __gsi_to_irq(i); - if (irq < 0) - continue; - - err = iosapic_delete_rte(irq, i); - if (err) - goto out; - } - - iounmap(iosapic_lists[index].addr); - iosapic_free(index); - out: - spin_unlock_irqrestore(&iosapic_lock, flags); - return err; -} - -#ifdef CONFIG_NUMA -void map_iosapic_to_node(unsigned int gsi_base, int node) -{ - int index; - - index = find_iosapic(gsi_base); - if (index < 0) { - printk(KERN_WARNING "%s: No IOSAPIC for GSI %u\n", - __func__, gsi_base); - return; - } - iosapic_lists[index].node = node; - return; -} -#endif diff --git a/arch/ia64/kernel/irq.c b/arch/ia64/kernel/irq.c deleted file mode 100644 index 275b9ea58c64..000000000000 --- a/arch/ia64/kernel/irq.c +++ /dev/null @@ -1,181 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * linux/arch/ia64/kernel/irq.c - * - * Copyright (C) 1992, 1998 Linus Torvalds, Ingo Molnar - * - * This file contains the code used by various IRQ handling routines: - * asking for different IRQs should be done through these routines - * instead of just grabbing them. Thus setups with different IRQ numbers - * shouldn't result in any weird surprises, and installing new handlers - * should be easier. - * - * Copyright (C) Ashok Raj, Intel Corporation 2004 - * - * 4/14/2004: Added code to handle cpu migration and do safe irq - * migration without losing interrupts for iosapic - * architecture. - */ - -#include -#include -#include -#include -#include -#include - -#include -#include - -/* - * 'what should we do if we get a hw irq event on an illegal vector'. - * each architecture has to answer this themselves. - */ -void ack_bad_irq(unsigned int irq) -{ - printk(KERN_ERR "Unexpected irq vector 0x%x on CPU %u!\n", irq, smp_processor_id()); -} - -/* - * Interrupt statistics: - */ - -atomic_t irq_err_count; - -/* - * /proc/interrupts printing: - */ -int arch_show_interrupts(struct seq_file *p, int prec) -{ - seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count)); - return 0; -} - -#ifdef CONFIG_SMP -static char irq_redir [NR_IRQS]; // = { [0 ... NR_IRQS-1] = 1 }; - -void set_irq_affinity_info (unsigned int irq, int hwid, int redir) -{ - if (irq < NR_IRQS) { - irq_data_update_affinity(irq_get_irq_data(irq), - cpumask_of(cpu_logical_id(hwid))); - irq_redir[irq] = (char) (redir & 0xff); - } -} -#endif /* CONFIG_SMP */ - -int __init arch_early_irq_init(void) -{ - ia64_mca_irq_init(); - return 0; -} - -#ifdef CONFIG_HOTPLUG_CPU -unsigned int vectors_in_migration[NR_IRQS]; - -/* - * Since cpu_online_mask is already updated, we just need to check for - * affinity that has zeros - */ -static void migrate_irqs(void) -{ - int irq, new_cpu; - - for (irq=0; irq < NR_IRQS; irq++) { - struct irq_desc *desc = irq_to_desc(irq); - struct irq_data *data = irq_desc_get_irq_data(desc); - struct irq_chip *chip = irq_data_get_irq_chip(data); - - if (irqd_irq_disabled(data)) - continue; - - /* - * No handling for now. - * TBD: Implement a disable function so we can now - * tell CPU not to respond to these local intr sources. - * such as ITV,CPEI,MCA etc. - */ - if (irqd_is_per_cpu(data)) - continue; - - if (cpumask_any_and(irq_data_get_affinity_mask(data), - cpu_online_mask) >= nr_cpu_ids) { - /* - * Save it for phase 2 processing - */ - vectors_in_migration[irq] = irq; - - new_cpu = cpumask_any(cpu_online_mask); - - /* - * Al three are essential, currently WARN_ON.. maybe panic? - */ - if (chip && chip->irq_disable && - chip->irq_enable && chip->irq_set_affinity) { - chip->irq_disable(data); - chip->irq_set_affinity(data, - cpumask_of(new_cpu), false); - chip->irq_enable(data); - } else { - WARN_ON((!chip || !chip->irq_disable || - !chip->irq_enable || - !chip->irq_set_affinity)); - } - } - } -} - -void fixup_irqs(void) -{ - unsigned int irq; - extern void ia64_process_pending_intr(void); - extern volatile int time_keeper_id; - - /* Mask ITV to disable timer */ - ia64_set_itv(1 << 16); - - /* - * Find a new timesync master - */ - if (smp_processor_id() == time_keeper_id) { - time_keeper_id = cpumask_first(cpu_online_mask); - printk ("CPU %d is now promoted to time-keeper master\n", time_keeper_id); - } - - /* - * Phase 1: Locate IRQs bound to this cpu and - * relocate them for cpu removal. - */ - migrate_irqs(); - - /* - * Phase 2: Perform interrupt processing for all entries reported in - * local APIC. - */ - ia64_process_pending_intr(); - - /* - * Phase 3: Now handle any interrupts not captured in local APIC. - * This is to account for cases that device interrupted during the time the - * rte was being disabled and re-programmed. - */ - for (irq=0; irq < NR_IRQS; irq++) { - if (vectors_in_migration[irq]) { - struct pt_regs *old_regs = set_irq_regs(NULL); - - vectors_in_migration[irq]=0; - generic_handle_irq(irq); - set_irq_regs(old_regs); - } - } - - /* - * Now let processor die. We do irq disable and max_xtp() to - * ensure there is no more interrupts routed to this processor. - * But the local timer interrupt can have 1 pending which we - * take care in timer_interrupt(). - */ - max_xtp(); - local_irq_disable(); -} -#endif diff --git a/arch/ia64/kernel/irq.h b/arch/ia64/kernel/irq.h deleted file mode 100644 index 4d16f3cbeb1d..000000000000 --- a/arch/ia64/kernel/irq.h +++ /dev/null @@ -1,3 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -extern void register_percpu_irq(ia64_vector vec, irq_handler_t handler, - unsigned long flags, const char *name); diff --git a/arch/ia64/kernel/irq_ia64.c b/arch/ia64/kernel/irq_ia64.c deleted file mode 100644 index 46e33c5cb53d..000000000000 --- a/arch/ia64/kernel/irq_ia64.c +++ /dev/null @@ -1,645 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * linux/arch/ia64/kernel/irq_ia64.c - * - * Copyright (C) 1998-2001 Hewlett-Packard Co - * Stephane Eranian - * David Mosberger-Tang - * - * 6/10/99: Updated to bring in sync with x86 version to facilitate - * support for SMP and different interrupt controllers. - * - * 09/15/00 Goutham Rao Implemented pci_irq_to_vector - * PCI to vector allocation routine. - * 04/14/2004 Ashok Raj - * Added CPU Hotplug handling for IPF. - */ - -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include - -#define IRQ_DEBUG 0 - -#define IRQ_VECTOR_UNASSIGNED (0) - -#define IRQ_UNUSED (0) -#define IRQ_USED (1) -#define IRQ_RSVD (2) - -int ia64_first_device_vector = IA64_DEF_FIRST_DEVICE_VECTOR; -int ia64_last_device_vector = IA64_DEF_LAST_DEVICE_VECTOR; - -/* default base addr of IPI table */ -void __iomem *ipi_base_addr = ((void __iomem *) - (__IA64_UNCACHED_OFFSET | IA64_IPI_DEFAULT_BASE_ADDR)); - -static cpumask_t vector_allocation_domain(int cpu); - -/* - * Legacy IRQ to IA-64 vector translation table. - */ -__u8 isa_irq_to_vector_map[16] = { - /* 8259 IRQ translation, first 16 entries */ - 0x2f, 0x20, 0x2e, 0x2d, 0x2c, 0x2b, 0x2a, 0x29, - 0x28, 0x27, 0x26, 0x25, 0x24, 0x23, 0x22, 0x21 -}; -EXPORT_SYMBOL(isa_irq_to_vector_map); - -DEFINE_SPINLOCK(vector_lock); - -struct irq_cfg irq_cfg[NR_IRQS] __read_mostly = { - [0 ... NR_IRQS - 1] = { - .vector = IRQ_VECTOR_UNASSIGNED, - .domain = CPU_MASK_NONE - } -}; - -DEFINE_PER_CPU(int[IA64_NUM_VECTORS], vector_irq) = { - [0 ... IA64_NUM_VECTORS - 1] = -1 -}; - -static cpumask_t vector_table[IA64_NUM_VECTORS] = { - [0 ... IA64_NUM_VECTORS - 1] = CPU_MASK_NONE -}; - -static int irq_status[NR_IRQS] = { - [0 ... NR_IRQS -1] = IRQ_UNUSED -}; - -static inline int find_unassigned_irq(void) -{ - int irq; - - for (irq = IA64_FIRST_DEVICE_VECTOR; irq < NR_IRQS; irq++) - if (irq_status[irq] == IRQ_UNUSED) - return irq; - return -ENOSPC; -} - -static inline int find_unassigned_vector(cpumask_t domain) -{ - cpumask_t mask; - int pos, vector; - - cpumask_and(&mask, &domain, cpu_online_mask); - if (cpumask_empty(&mask)) - return -EINVAL; - - for (pos = 0; pos < IA64_NUM_DEVICE_VECTORS; pos++) { - vector = IA64_FIRST_DEVICE_VECTOR + pos; - cpumask_and(&mask, &domain, &vector_table[vector]); - if (!cpumask_empty(&mask)) - continue; - return vector; - } - return -ENOSPC; -} - -static int __bind_irq_vector(int irq, int vector, cpumask_t domain) -{ - cpumask_t mask; - int cpu; - struct irq_cfg *cfg = &irq_cfg[irq]; - - BUG_ON((unsigned)irq >= NR_IRQS); - BUG_ON((unsigned)vector >= IA64_NUM_VECTORS); - - cpumask_and(&mask, &domain, cpu_online_mask); - if (cpumask_empty(&mask)) - return -EINVAL; - if ((cfg->vector == vector) && cpumask_equal(&cfg->domain, &domain)) - return 0; - if (cfg->vector != IRQ_VECTOR_UNASSIGNED) - return -EBUSY; - for_each_cpu(cpu, &mask) - per_cpu(vector_irq, cpu)[vector] = irq; - cfg->vector = vector; - cfg->domain = domain; - irq_status[irq] = IRQ_USED; - cpumask_or(&vector_table[vector], &vector_table[vector], &domain); - return 0; -} - -int bind_irq_vector(int irq, int vector, cpumask_t domain) -{ - unsigned long flags; - int ret; - - spin_lock_irqsave(&vector_lock, flags); - ret = __bind_irq_vector(irq, vector, domain); - spin_unlock_irqrestore(&vector_lock, flags); - return ret; -} - -static void __clear_irq_vector(int irq) -{ - int vector, cpu; - cpumask_t domain; - struct irq_cfg *cfg = &irq_cfg[irq]; - - BUG_ON((unsigned)irq >= NR_IRQS); - BUG_ON(cfg->vector == IRQ_VECTOR_UNASSIGNED); - vector = cfg->vector; - domain = cfg->domain; - for_each_cpu_and(cpu, &cfg->domain, cpu_online_mask) - per_cpu(vector_irq, cpu)[vector] = -1; - cfg->vector = IRQ_VECTOR_UNASSIGNED; - cfg->domain = CPU_MASK_NONE; - irq_status[irq] = IRQ_UNUSED; - cpumask_andnot(&vector_table[vector], &vector_table[vector], &domain); -} - -static void clear_irq_vector(int irq) -{ - unsigned long flags; - - spin_lock_irqsave(&vector_lock, flags); - __clear_irq_vector(irq); - spin_unlock_irqrestore(&vector_lock, flags); -} - -int -ia64_native_assign_irq_vector (int irq) -{ - unsigned long flags; - int vector, cpu; - cpumask_t domain = CPU_MASK_NONE; - - vector = -ENOSPC; - - spin_lock_irqsave(&vector_lock, flags); - for_each_online_cpu(cpu) { - domain = vector_allocation_domain(cpu); - vector = find_unassigned_vector(domain); - if (vector >= 0) - break; - } - if (vector < 0) - goto out; - if (irq == AUTO_ASSIGN) - irq = vector; - BUG_ON(__bind_irq_vector(irq, vector, domain)); - out: - spin_unlock_irqrestore(&vector_lock, flags); - return vector; -} - -void -ia64_native_free_irq_vector (int vector) -{ - if (vector < IA64_FIRST_DEVICE_VECTOR || - vector > IA64_LAST_DEVICE_VECTOR) - return; - clear_irq_vector(vector); -} - -int -reserve_irq_vector (int vector) -{ - if (vector < IA64_FIRST_DEVICE_VECTOR || - vector > IA64_LAST_DEVICE_VECTOR) - return -EINVAL; - return !!bind_irq_vector(vector, vector, CPU_MASK_ALL); -} - -/* - * Initialize vector_irq on a new cpu. This function must be called - * with vector_lock held. - */ -void __setup_vector_irq(int cpu) -{ - int irq, vector; - - /* Clear vector_irq */ - for (vector = 0; vector < IA64_NUM_VECTORS; ++vector) - per_cpu(vector_irq, cpu)[vector] = -1; - /* Mark the inuse vectors */ - for (irq = 0; irq < NR_IRQS; ++irq) { - if (!cpumask_test_cpu(cpu, &irq_cfg[irq].domain)) - continue; - vector = irq_to_vector(irq); - per_cpu(vector_irq, cpu)[vector] = irq; - } -} - -#ifdef CONFIG_SMP - -static enum vector_domain_type { - VECTOR_DOMAIN_NONE, - VECTOR_DOMAIN_PERCPU -} vector_domain_type = VECTOR_DOMAIN_NONE; - -static cpumask_t vector_allocation_domain(int cpu) -{ - if (vector_domain_type == VECTOR_DOMAIN_PERCPU) - return *cpumask_of(cpu); - return CPU_MASK_ALL; -} - -static int __irq_prepare_move(int irq, int cpu) -{ - struct irq_cfg *cfg = &irq_cfg[irq]; - int vector; - cpumask_t domain; - - if (cfg->move_in_progress || cfg->move_cleanup_count) - return -EBUSY; - if (cfg->vector == IRQ_VECTOR_UNASSIGNED || !cpu_online(cpu)) - return -EINVAL; - if (cpumask_test_cpu(cpu, &cfg->domain)) - return 0; - domain = vector_allocation_domain(cpu); - vector = find_unassigned_vector(domain); - if (vector < 0) - return -ENOSPC; - cfg->move_in_progress = 1; - cfg->old_domain = cfg->domain; - cfg->vector = IRQ_VECTOR_UNASSIGNED; - cfg->domain = CPU_MASK_NONE; - BUG_ON(__bind_irq_vector(irq, vector, domain)); - return 0; -} - -int irq_prepare_move(int irq, int cpu) -{ - unsigned long flags; - int ret; - - spin_lock_irqsave(&vector_lock, flags); - ret = __irq_prepare_move(irq, cpu); - spin_unlock_irqrestore(&vector_lock, flags); - return ret; -} - -void irq_complete_move(unsigned irq) -{ - struct irq_cfg *cfg = &irq_cfg[irq]; - cpumask_t cleanup_mask; - int i; - - if (likely(!cfg->move_in_progress)) - return; - - if (unlikely(cpumask_test_cpu(smp_processor_id(), &cfg->old_domain))) - return; - - cpumask_and(&cleanup_mask, &cfg->old_domain, cpu_online_mask); - cfg->move_cleanup_count = cpumask_weight(&cleanup_mask); - for_each_cpu(i, &cleanup_mask) - ia64_send_ipi(i, IA64_IRQ_MOVE_VECTOR, IA64_IPI_DM_INT, 0); - cfg->move_in_progress = 0; -} - -static irqreturn_t smp_irq_move_cleanup_interrupt(int irq, void *dev_id) -{ - int me = smp_processor_id(); - ia64_vector vector; - unsigned long flags; - - for (vector = IA64_FIRST_DEVICE_VECTOR; - vector < IA64_LAST_DEVICE_VECTOR; vector++) { - int irq; - struct irq_desc *desc; - struct irq_cfg *cfg; - irq = __this_cpu_read(vector_irq[vector]); - if (irq < 0) - continue; - - desc = irq_to_desc(irq); - cfg = irq_cfg + irq; - raw_spin_lock(&desc->lock); - if (!cfg->move_cleanup_count) - goto unlock; - - if (!cpumask_test_cpu(me, &cfg->old_domain)) - goto unlock; - - spin_lock_irqsave(&vector_lock, flags); - __this_cpu_write(vector_irq[vector], -1); - cpumask_clear_cpu(me, &vector_table[vector]); - spin_unlock_irqrestore(&vector_lock, flags); - cfg->move_cleanup_count--; - unlock: - raw_spin_unlock(&desc->lock); - } - return IRQ_HANDLED; -} - -static int __init parse_vector_domain(char *arg) -{ - if (!arg) - return -EINVAL; - if (!strcmp(arg, "percpu")) { - vector_domain_type = VECTOR_DOMAIN_PERCPU; - no_int_routing = 1; - } - return 0; -} -early_param("vector", parse_vector_domain); -#else -static cpumask_t vector_allocation_domain(int cpu) -{ - return CPU_MASK_ALL; -} -#endif - - -void destroy_and_reserve_irq(unsigned int irq) -{ - unsigned long flags; - - irq_init_desc(irq); - spin_lock_irqsave(&vector_lock, flags); - __clear_irq_vector(irq); - irq_status[irq] = IRQ_RSVD; - spin_unlock_irqrestore(&vector_lock, flags); -} - -/* - * Dynamic irq allocate and deallocation for MSI - */ -int create_irq(void) -{ - unsigned long flags; - int irq, vector, cpu; - cpumask_t domain = CPU_MASK_NONE; - - irq = vector = -ENOSPC; - spin_lock_irqsave(&vector_lock, flags); - for_each_online_cpu(cpu) { - domain = vector_allocation_domain(cpu); - vector = find_unassigned_vector(domain); - if (vector >= 0) - break; - } - if (vector < 0) - goto out; - irq = find_unassigned_irq(); - if (irq < 0) - goto out; - BUG_ON(__bind_irq_vector(irq, vector, domain)); - out: - spin_unlock_irqrestore(&vector_lock, flags); - if (irq >= 0) - irq_init_desc(irq); - return irq; -} - -void destroy_irq(unsigned int irq) -{ - irq_init_desc(irq); - clear_irq_vector(irq); -} - -#ifdef CONFIG_SMP -# define IS_RESCHEDULE(vec) (vec == IA64_IPI_RESCHEDULE) -# define IS_LOCAL_TLB_FLUSH(vec) (vec == IA64_IPI_LOCAL_TLB_FLUSH) -#else -# define IS_RESCHEDULE(vec) (0) -# define IS_LOCAL_TLB_FLUSH(vec) (0) -#endif -/* - * That's where the IVT branches when we get an external - * interrupt. This branches to the correct hardware IRQ handler via - * function ptr. - */ -void -ia64_handle_irq (ia64_vector vector, struct pt_regs *regs) -{ - struct pt_regs *old_regs = set_irq_regs(regs); - unsigned long saved_tpr; - -#if IRQ_DEBUG - { - unsigned long bsp, sp; - - /* - * Note: if the interrupt happened while executing in - * the context switch routine (ia64_switch_to), we may - * get a spurious stack overflow here. This is - * because the register and the memory stack are not - * switched atomically. - */ - bsp = ia64_getreg(_IA64_REG_AR_BSP); - sp = ia64_getreg(_IA64_REG_SP); - - if ((sp - bsp) < 1024) { - static DEFINE_RATELIMIT_STATE(ratelimit, 5 * HZ, 5); - - if (__ratelimit(&ratelimit)) { - printk("ia64_handle_irq: DANGER: less than " - "1KB of free stack space!!\n" - "(bsp=0x%lx, sp=%lx)\n", bsp, sp); - } - } - } -#endif /* IRQ_DEBUG */ - - /* - * Always set TPR to limit maximum interrupt nesting depth to - * 16 (without this, it would be ~240, which could easily lead - * to kernel stack overflows). - */ - irq_enter(); - saved_tpr = ia64_getreg(_IA64_REG_CR_TPR); - ia64_srlz_d(); - while (vector != IA64_SPURIOUS_INT_VECTOR) { - int irq = local_vector_to_irq(vector); - - if (unlikely(IS_LOCAL_TLB_FLUSH(vector))) { - smp_local_flush_tlb(); - kstat_incr_irq_this_cpu(irq); - } else if (unlikely(IS_RESCHEDULE(vector))) { - scheduler_ipi(); - kstat_incr_irq_this_cpu(irq); - } else { - ia64_setreg(_IA64_REG_CR_TPR, vector); - ia64_srlz_d(); - - if (unlikely(irq < 0)) { - printk(KERN_ERR "%s: Unexpected interrupt " - "vector %d on CPU %d is not mapped " - "to any IRQ!\n", __func__, vector, - smp_processor_id()); - } else - generic_handle_irq(irq); - - /* - * Disable interrupts and send EOI: - */ - local_irq_disable(); - ia64_setreg(_IA64_REG_CR_TPR, saved_tpr); - } - ia64_eoi(); - vector = ia64_get_ivr(); - } - /* - * This must be done *after* the ia64_eoi(). For example, the keyboard softirq - * handler needs to be able to wait for further keyboard interrupts, which can't - * come through until ia64_eoi() has been done. - */ - irq_exit(); - set_irq_regs(old_regs); -} - -#ifdef CONFIG_HOTPLUG_CPU -/* - * This function emulates a interrupt processing when a cpu is about to be - * brought down. - */ -void ia64_process_pending_intr(void) -{ - ia64_vector vector; - unsigned long saved_tpr; - extern unsigned int vectors_in_migration[NR_IRQS]; - - vector = ia64_get_ivr(); - - irq_enter(); - saved_tpr = ia64_getreg(_IA64_REG_CR_TPR); - ia64_srlz_d(); - - /* - * Perform normal interrupt style processing - */ - while (vector != IA64_SPURIOUS_INT_VECTOR) { - int irq = local_vector_to_irq(vector); - - if (unlikely(IS_LOCAL_TLB_FLUSH(vector))) { - smp_local_flush_tlb(); - kstat_incr_irq_this_cpu(irq); - } else if (unlikely(IS_RESCHEDULE(vector))) { - kstat_incr_irq_this_cpu(irq); - } else { - struct pt_regs *old_regs = set_irq_regs(NULL); - - ia64_setreg(_IA64_REG_CR_TPR, vector); - ia64_srlz_d(); - - /* - * Now try calling normal ia64_handle_irq as it would have got called - * from a real intr handler. Try passing null for pt_regs, hopefully - * it will work. I hope it works!. - * Probably could shared code. - */ - if (unlikely(irq < 0)) { - printk(KERN_ERR "%s: Unexpected interrupt " - "vector %d on CPU %d not being mapped " - "to any IRQ!!\n", __func__, vector, - smp_processor_id()); - } else { - vectors_in_migration[irq]=0; - generic_handle_irq(irq); - } - set_irq_regs(old_regs); - - /* - * Disable interrupts and send EOI - */ - local_irq_disable(); - ia64_setreg(_IA64_REG_CR_TPR, saved_tpr); - } - ia64_eoi(); - vector = ia64_get_ivr(); - } - irq_exit(); -} -#endif - - -#ifdef CONFIG_SMP - -static irqreturn_t dummy_handler (int irq, void *dev_id) -{ - BUG(); - return IRQ_NONE; -} - -/* - * KVM uses this interrupt to force a cpu out of guest mode - */ - -#endif - -void -register_percpu_irq(ia64_vector vec, irq_handler_t handler, unsigned long flags, - const char *name) -{ - unsigned int irq; - - irq = vec; - BUG_ON(bind_irq_vector(irq, vec, CPU_MASK_ALL)); - irq_set_status_flags(irq, IRQ_PER_CPU); - irq_set_chip(irq, &irq_type_ia64_lsapic); - if (handler) - if (request_irq(irq, handler, flags, name, NULL)) - pr_err("Failed to request irq %u (%s)\n", irq, name); - irq_set_handler(irq, handle_percpu_irq); -} - -void __init -ia64_native_register_ipi(void) -{ -#ifdef CONFIG_SMP - register_percpu_irq(IA64_IPI_VECTOR, handle_IPI, 0, "IPI"); - register_percpu_irq(IA64_IPI_RESCHEDULE, dummy_handler, 0, "resched"); - register_percpu_irq(IA64_IPI_LOCAL_TLB_FLUSH, dummy_handler, 0, - "tlb_flush"); -#endif -} - -void __init -init_IRQ (void) -{ - acpi_boot_init(); - ia64_register_ipi(); - register_percpu_irq(IA64_SPURIOUS_INT_VECTOR, NULL, 0, NULL); -#ifdef CONFIG_SMP - if (vector_domain_type != VECTOR_DOMAIN_NONE) { - register_percpu_irq(IA64_IRQ_MOVE_VECTOR, - smp_irq_move_cleanup_interrupt, 0, - "irq_move"); - } -#endif -} - -void -ia64_send_ipi (int cpu, int vector, int delivery_mode, int redirect) -{ - void __iomem *ipi_addr; - unsigned long ipi_data; - unsigned long phys_cpu_id; - - phys_cpu_id = cpu_physical_id(cpu); - - /* - * cpu number is in 8bit ID and 8bit EID - */ - - ipi_data = (delivery_mode << 8) | (vector & 0xff); - ipi_addr = ipi_base_addr + ((phys_cpu_id << 4) | ((redirect & 1) << 3)); - - writeq(ipi_data, ipi_addr); -} diff --git a/arch/ia64/kernel/irq_lsapic.c b/arch/ia64/kernel/irq_lsapic.c deleted file mode 100644 index 23bf4499a75d..000000000000 --- a/arch/ia64/kernel/irq_lsapic.c +++ /dev/null @@ -1,45 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * LSAPIC Interrupt Controller - * - * This takes care of interrupts that are generated by the CPU's - * internal Streamlined Advanced Programmable Interrupt Controller - * (LSAPIC), such as the ITC and IPI interrupts. - * - * Copyright (C) 1999 VA Linux Systems - * Copyright (C) 1999 Walt Drummond - * Copyright (C) 2000 Hewlett-Packard Co - * Copyright (C) 2000 David Mosberger-Tang - */ - -#include -#include - -static unsigned int -lsapic_noop_startup (struct irq_data *data) -{ - return 0; -} - -static void -lsapic_noop (struct irq_data *data) -{ - /* nothing to do... */ -} - -static int lsapic_retrigger(struct irq_data *data) -{ - ia64_resend_irq(data->irq); - - return 1; -} - -struct irq_chip irq_type_ia64_lsapic = { - .name = "LSAPIC", - .irq_startup = lsapic_noop_startup, - .irq_shutdown = lsapic_noop, - .irq_enable = lsapic_noop, - .irq_disable = lsapic_noop, - .irq_ack = lsapic_noop, - .irq_retrigger = lsapic_retrigger, -}; diff --git a/arch/ia64/kernel/ivt.S b/arch/ia64/kernel/ivt.S deleted file mode 100644 index da90c49df628..000000000000 --- a/arch/ia64/kernel/ivt.S +++ /dev/null @@ -1,1688 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * arch/ia64/kernel/ivt.S - * - * Copyright (C) 1998-2001, 2003, 2005 Hewlett-Packard Co - * Stephane Eranian - * David Mosberger - * Copyright (C) 2000, 2002-2003 Intel Co - * Asit Mallick - * Suresh Siddha - * Kenneth Chen - * Fenghua Yu - * - * 00/08/23 Asit Mallick TLB handling for SMP - * 00/12/20 David Mosberger-Tang DTLB/ITLB handler now uses virtual PT. - * - * Copyright (C) 2005 Hewlett-Packard Co - * Dan Magenheimer - * Xen paravirtualization - * Copyright (c) 2008 Isaku Yamahata - * VA Linux Systems Japan K.K. - * pv_ops. - * Yaozu (Eddie) Dong - */ -/* - * This file defines the interruption vector table used by the CPU. - * It does not include one entry per possible cause of interruption. - * - * The first 20 entries of the table contain 64 bundles each while the - * remaining 48 entries contain only 16 bundles each. - * - * The 64 bundles are used to allow inlining the whole handler for critical - * interruptions like TLB misses. - * - * For each entry, the comment is as follows: - * - * // 0x1c00 Entry 7 (size 64 bundles) Data Key Miss (12,51) - * entry offset ----/ / / / / - * entry number ---------/ / / / - * size of the entry -------------/ / / - * vector name -------------------------------------/ / - * interruptions triggering this vector ----------------------/ - * - * The table is 32KB in size and must be aligned on 32KB boundary. - * (The CPU ignores the 15 lower bits of the address) - * - * Table is based upon EAS2.6 (Oct 1999) - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#if 0 -# define PSR_DEFAULT_BITS psr.ac -#else -# define PSR_DEFAULT_BITS 0 -#endif - -#if 0 - /* - * This lets you track the last eight faults that occurred on the CPU. Make sure ar.k2 isn't - * needed for something else before enabling this... - */ -# define DBG_FAULT(i) mov r16=ar.k2;; shl r16=r16,8;; add r16=(i),r16;;mov ar.k2=r16 -#else -# define DBG_FAULT(i) -#endif - -#include "minstate.h" - -#define FAULT(n) \ - mov r31=pr; \ - mov r19=n;; /* prepare to save predicates */ \ - br.sptk.many dispatch_to_fault_handler - - .section .text..ivt,"ax" - - .align 32768 // align on 32KB boundary - .global ia64_ivt - EXPORT_SYMBOL(ia64_ivt) -ia64_ivt: -///////////////////////////////////////////////////////////////////////////////////////// -// 0x0000 Entry 0 (size 64 bundles) VHPT Translation (8,20,47) -ENTRY(vhpt_miss) - DBG_FAULT(0) - /* - * The VHPT vector is invoked when the TLB entry for the virtual page table - * is missing. This happens only as a result of a previous - * (the "original") TLB miss, which may either be caused by an instruction - * fetch or a data access (or non-access). - * - * What we do here is normal TLB miss handing for the _original_ miss, - * followed by inserting the TLB entry for the virtual page table page - * that the VHPT walker was attempting to access. The latter gets - * inserted as long as page table entry above pte level have valid - * mappings for the faulting address. The TLB entry for the original - * miss gets inserted only if the pte entry indicates that the page is - * present. - * - * do_page_fault gets invoked in the following cases: - * - the faulting virtual address uses unimplemented address bits - * - the faulting virtual address has no valid page table mapping - */ - MOV_FROM_IFA(r16) // get address that caused the TLB miss -#ifdef CONFIG_HUGETLB_PAGE - movl r18=PAGE_SHIFT - MOV_FROM_ITIR(r25) -#endif - ;; - RSM_PSR_DT // use physical addressing for data - mov r31=pr // save the predicate registers - mov r19=IA64_KR(PT_BASE) // get page table base address - shl r21=r16,3 // shift bit 60 into sign bit - shr.u r17=r16,61 // get the region number into r17 - ;; - shr.u r22=r21,3 -#ifdef CONFIG_HUGETLB_PAGE - extr.u r26=r25,2,6 - ;; - cmp.ne p8,p0=r18,r26 - sub r27=r26,r18 - ;; -(p8) dep r25=r18,r25,2,6 -(p8) shr r22=r22,r27 -#endif - ;; - cmp.eq p6,p7=5,r17 // is IFA pointing into to region 5? - shr.u r18=r22,PGDIR_SHIFT // get bottom portion of pgd index bit - ;; -(p7) dep r17=r17,r19,(PAGE_SHIFT-3),3 // put region number bits in place - - srlz.d - LOAD_PHYSICAL(p6, r19, swapper_pg_dir) // region 5 is rooted at swapper_pg_dir - - .pred.rel "mutex", p6, p7 -(p6) shr.u r21=r21,PGDIR_SHIFT+PAGE_SHIFT -(p7) shr.u r21=r21,PGDIR_SHIFT+PAGE_SHIFT-3 - ;; -(p6) dep r17=r18,r19,3,(PAGE_SHIFT-3) // r17=pgd_offset for region 5 -(p7) dep r17=r18,r17,3,(PAGE_SHIFT-6) // r17=pgd_offset for region[0-4] - cmp.eq p7,p6=0,r21 // unused address bits all zeroes? -#if CONFIG_PGTABLE_LEVELS == 4 - shr.u r28=r22,PUD_SHIFT // shift pud index into position -#else - shr.u r18=r22,PMD_SHIFT // shift pmd index into position -#endif - ;; - ld8 r17=[r17] // get *pgd (may be 0) - ;; -(p7) cmp.eq p6,p7=r17,r0 // was pgd_present(*pgd) == NULL? -#if CONFIG_PGTABLE_LEVELS == 4 - dep r28=r28,r17,3,(PAGE_SHIFT-3) // r28=pud_offset(pgd,addr) - ;; - shr.u r18=r22,PMD_SHIFT // shift pmd index into position -(p7) ld8 r29=[r28] // get *pud (may be 0) - ;; -(p7) cmp.eq.or.andcm p6,p7=r29,r0 // was pud_present(*pud) == NULL? - dep r17=r18,r29,3,(PAGE_SHIFT-3) // r17=pmd_offset(pud,addr) -#else - dep r17=r18,r17,3,(PAGE_SHIFT-3) // r17=pmd_offset(pgd,addr) -#endif - ;; -(p7) ld8 r20=[r17] // get *pmd (may be 0) - shr.u r19=r22,PAGE_SHIFT // shift pte index into position - ;; -(p7) cmp.eq.or.andcm p6,p7=r20,r0 // was pmd_present(*pmd) == NULL? - dep r21=r19,r20,3,(PAGE_SHIFT-3) // r21=pte_offset(pmd,addr) - ;; -(p7) ld8 r18=[r21] // read *pte - MOV_FROM_ISR(r19) // cr.isr bit 32 tells us if this is an insn miss - ;; -(p7) tbit.z p6,p7=r18,_PAGE_P_BIT // page present bit cleared? - MOV_FROM_IHA(r22) // get the VHPT address that caused the TLB miss - ;; // avoid RAW on p7 -(p7) tbit.nz.unc p10,p11=r19,32 // is it an instruction TLB miss? - dep r23=0,r20,0,PAGE_SHIFT // clear low bits to get page address - ;; - ITC_I_AND_D(p10, p11, r18, r24) // insert the instruction TLB entry and - // insert the data TLB entry -(p6) br.cond.spnt.many page_fault // handle bad address/page not present (page fault) - MOV_TO_IFA(r22, r24) - -#ifdef CONFIG_HUGETLB_PAGE - MOV_TO_ITIR(p8, r25, r24) // change to default page-size for VHPT -#endif - - /* - * Now compute and insert the TLB entry for the virtual page table. We never - * execute in a page table page so there is no need to set the exception deferral - * bit. - */ - adds r24=__DIRTY_BITS_NO_ED|_PAGE_PL_0|_PAGE_AR_RW,r23 - ;; - ITC_D(p7, r24, r25) - ;; -#ifdef CONFIG_SMP - /* - * Tell the assemblers dependency-violation checker that the above "itc" instructions - * cannot possibly affect the following loads: - */ - dv_serialize_data - - /* - * Re-check pagetable entry. If they changed, we may have received a ptc.g - * between reading the pagetable and the "itc". If so, flush the entry we - * inserted and retry. At this point, we have: - * - * r28 = equivalent of pud_offset(pgd, ifa) - * r17 = equivalent of pmd_offset(pud, ifa) - * r21 = equivalent of pte_offset(pmd, ifa) - * - * r29 = *pud - * r20 = *pmd - * r18 = *pte - */ - ld8 r25=[r21] // read *pte again - ld8 r26=[r17] // read *pmd again -#if CONFIG_PGTABLE_LEVELS == 4 - ld8 r19=[r28] // read *pud again -#endif - cmp.ne p6,p7=r0,r0 - ;; - cmp.ne.or.andcm p6,p7=r26,r20 // did *pmd change -#if CONFIG_PGTABLE_LEVELS == 4 - cmp.ne.or.andcm p6,p7=r19,r29 // did *pud change -#endif - mov r27=PAGE_SHIFT<<2 - ;; -(p6) ptc.l r22,r27 // purge PTE page translation -(p7) cmp.ne.or.andcm p6,p7=r25,r18 // did *pte change - ;; -(p6) ptc.l r16,r27 // purge translation -#endif - - mov pr=r31,-1 // restore predicate registers - RFI -END(vhpt_miss) - - .org ia64_ivt+0x400 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x0400 Entry 1 (size 64 bundles) ITLB (21) -ENTRY(itlb_miss) - DBG_FAULT(1) - /* - * The ITLB handler accesses the PTE via the virtually mapped linear - * page table. If a nested TLB miss occurs, we switch into physical - * mode, walk the page table, and then re-execute the PTE read and - * go on normally after that. - */ - MOV_FROM_IFA(r16) // get virtual address - mov r29=b0 // save b0 - mov r31=pr // save predicates -.itlb_fault: - MOV_FROM_IHA(r17) // get virtual address of PTE - movl r30=1f // load nested fault continuation point - ;; -1: ld8 r18=[r17] // read *pte - ;; - mov b0=r29 - tbit.z p6,p0=r18,_PAGE_P_BIT // page present bit cleared? -(p6) br.cond.spnt page_fault - ;; - ITC_I(p0, r18, r19) - ;; -#ifdef CONFIG_SMP - /* - * Tell the assemblers dependency-violation checker that the above "itc" instructions - * cannot possibly affect the following loads: - */ - dv_serialize_data - - ld8 r19=[r17] // read *pte again and see if same - mov r20=PAGE_SHIFT<<2 // setup page size for purge - ;; - cmp.ne p7,p0=r18,r19 - ;; -(p7) ptc.l r16,r20 -#endif - mov pr=r31,-1 - RFI -END(itlb_miss) - - .org ia64_ivt+0x0800 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x0800 Entry 2 (size 64 bundles) DTLB (9,48) -ENTRY(dtlb_miss) - DBG_FAULT(2) - /* - * The DTLB handler accesses the PTE via the virtually mapped linear - * page table. If a nested TLB miss occurs, we switch into physical - * mode, walk the page table, and then re-execute the PTE read and - * go on normally after that. - */ - MOV_FROM_IFA(r16) // get virtual address - mov r29=b0 // save b0 - mov r31=pr // save predicates -dtlb_fault: - MOV_FROM_IHA(r17) // get virtual address of PTE - movl r30=1f // load nested fault continuation point - ;; -1: ld8 r18=[r17] // read *pte - ;; - mov b0=r29 - tbit.z p6,p0=r18,_PAGE_P_BIT // page present bit cleared? -(p6) br.cond.spnt page_fault - ;; - ITC_D(p0, r18, r19) - ;; -#ifdef CONFIG_SMP - /* - * Tell the assemblers dependency-violation checker that the above "itc" instructions - * cannot possibly affect the following loads: - */ - dv_serialize_data - - ld8 r19=[r17] // read *pte again and see if same - mov r20=PAGE_SHIFT<<2 // setup page size for purge - ;; - cmp.ne p7,p0=r18,r19 - ;; -(p7) ptc.l r16,r20 -#endif - mov pr=r31,-1 - RFI -END(dtlb_miss) - - .org ia64_ivt+0x0c00 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x0c00 Entry 3 (size 64 bundles) Alt ITLB (19) -ENTRY(alt_itlb_miss) - DBG_FAULT(3) - MOV_FROM_IFA(r16) // get address that caused the TLB miss - movl r17=PAGE_KERNEL - MOV_FROM_IPSR(p0, r21) - movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff) - mov r31=pr - ;; -#ifdef CONFIG_DISABLE_VHPT - shr.u r22=r16,61 // get the region number into r21 - ;; - cmp.gt p8,p0=6,r22 // user mode - ;; - THASH(p8, r17, r16, r23) - ;; - MOV_TO_IHA(p8, r17, r23) -(p8) mov r29=b0 // save b0 -(p8) br.cond.dptk .itlb_fault -#endif - extr.u r23=r21,IA64_PSR_CPL0_BIT,2 // extract psr.cpl - and r19=r19,r16 // clear ed, reserved bits, and PTE control bits - shr.u r18=r16,57 // move address bit 61 to bit 4 - ;; - andcm r18=0x10,r18 // bit 4=~address-bit(61) - cmp.ne p8,p0=r0,r23 // psr.cpl != 0? - or r19=r17,r19 // insert PTE control bits into r19 - ;; - or r19=r19,r18 // set bit 4 (uncached) if the access was to region 6 -(p8) br.cond.spnt page_fault - ;; - ITC_I(p0, r19, r18) // insert the TLB entry - mov pr=r31,-1 - RFI -END(alt_itlb_miss) - - .org ia64_ivt+0x1000 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x1000 Entry 4 (size 64 bundles) Alt DTLB (7,46) -ENTRY(alt_dtlb_miss) - DBG_FAULT(4) - MOV_FROM_IFA(r16) // get address that caused the TLB miss - movl r17=PAGE_KERNEL - MOV_FROM_ISR(r20) - movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff) - MOV_FROM_IPSR(p0, r21) - mov r31=pr - mov r24=PERCPU_ADDR - ;; -#ifdef CONFIG_DISABLE_VHPT - shr.u r22=r16,61 // get the region number into r21 - ;; - cmp.gt p8,p0=6,r22 // access to region 0-5 - ;; - THASH(p8, r17, r16, r25) - ;; - MOV_TO_IHA(p8, r17, r25) -(p8) mov r29=b0 // save b0 -(p8) br.cond.dptk dtlb_fault -#endif - cmp.ge p10,p11=r16,r24 // access to per_cpu_data? - tbit.z p12,p0=r16,61 // access to region 6? - mov r25=PERCPU_PAGE_SHIFT << 2 - mov r26=PERCPU_PAGE_SIZE - nop.m 0 - nop.b 0 - ;; -(p10) mov r19=IA64_KR(PER_CPU_DATA) -(p11) and r19=r19,r16 // clear non-ppn fields - extr.u r23=r21,IA64_PSR_CPL0_BIT,2 // extract psr.cpl - and r22=IA64_ISR_CODE_MASK,r20 // get the isr.code field - tbit.nz p6,p7=r20,IA64_ISR_SP_BIT // is speculation bit on? - tbit.nz p9,p0=r20,IA64_ISR_NA_BIT // is non-access bit on? - ;; -(p10) sub r19=r19,r26 - MOV_TO_ITIR(p10, r25, r24) - cmp.ne p8,p0=r0,r23 -(p9) cmp.eq.or.andcm p6,p7=IA64_ISR_CODE_LFETCH,r22 // check isr.code field -(p12) dep r17=-1,r17,4,1 // set ma=UC for region 6 addr -(p8) br.cond.spnt page_fault - - dep r21=-1,r21,IA64_PSR_ED_BIT,1 - ;; - or r19=r19,r17 // insert PTE control bits into r19 - MOV_TO_IPSR(p6, r21, r24) - ;; - ITC_D(p7, r19, r18) // insert the TLB entry - mov pr=r31,-1 - RFI -END(alt_dtlb_miss) - - .org ia64_ivt+0x1400 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x1400 Entry 5 (size 64 bundles) Data nested TLB (6,45) -ENTRY(nested_dtlb_miss) - /* - * In the absence of kernel bugs, we get here when the virtually mapped linear - * page table is accessed non-speculatively (e.g., in the Dirty-bit, Instruction - * Access-bit, or Data Access-bit faults). If the DTLB entry for the virtual page - * table is missing, a nested TLB miss fault is triggered and control is - * transferred to this point. When this happens, we lookup the pte for the - * faulting address by walking the page table in physical mode and return to the - * continuation point passed in register r30 (or call page_fault if the address is - * not mapped). - * - * Input: r16: faulting address - * r29: saved b0 - * r30: continuation address - * r31: saved pr - * - * Output: r17: physical address of PTE of faulting address - * r29: saved b0 - * r30: continuation address - * r31: saved pr - * - * Clobbered: b0, r18, r19, r21, r22, psr.dt (cleared) - */ - RSM_PSR_DT // switch to using physical data addressing - mov r19=IA64_KR(PT_BASE) // get the page table base address - shl r21=r16,3 // shift bit 60 into sign bit - MOV_FROM_ITIR(r18) - ;; - shr.u r17=r16,61 // get the region number into r17 - extr.u r18=r18,2,6 // get the faulting page size - ;; - cmp.eq p6,p7=5,r17 // is faulting address in region 5? - add r22=-PAGE_SHIFT,r18 // adjustment for hugetlb address - add r18=PGDIR_SHIFT-PAGE_SHIFT,r18 - ;; - shr.u r22=r16,r22 - shr.u r18=r16,r18 -(p7) dep r17=r17,r19,(PAGE_SHIFT-3),3 // put region number bits in place - - srlz.d - LOAD_PHYSICAL(p6, r19, swapper_pg_dir) // region 5 is rooted at swapper_pg_dir - - .pred.rel "mutex", p6, p7 -(p6) shr.u r21=r21,PGDIR_SHIFT+PAGE_SHIFT -(p7) shr.u r21=r21,PGDIR_SHIFT+PAGE_SHIFT-3 - ;; -(p6) dep r17=r18,r19,3,(PAGE_SHIFT-3) // r17=pgd_offset for region 5 -(p7) dep r17=r18,r17,3,(PAGE_SHIFT-6) // r17=pgd_offset for region[0-4] - cmp.eq p7,p6=0,r21 // unused address bits all zeroes? -#if CONFIG_PGTABLE_LEVELS == 4 - shr.u r18=r22,PUD_SHIFT // shift pud index into position -#else - shr.u r18=r22,PMD_SHIFT // shift pmd index into position -#endif - ;; - ld8 r17=[r17] // get *pgd (may be 0) - ;; -(p7) cmp.eq p6,p7=r17,r0 // was pgd_present(*pgd) == NULL? - dep r17=r18,r17,3,(PAGE_SHIFT-3) // r17=p[u|m]d_offset(pgd,addr) - ;; -#if CONFIG_PGTABLE_LEVELS == 4 -(p7) ld8 r17=[r17] // get *pud (may be 0) - shr.u r18=r22,PMD_SHIFT // shift pmd index into position - ;; -(p7) cmp.eq.or.andcm p6,p7=r17,r0 // was pud_present(*pud) == NULL? - dep r17=r18,r17,3,(PAGE_SHIFT-3) // r17=pmd_offset(pud,addr) - ;; -#endif -(p7) ld8 r17=[r17] // get *pmd (may be 0) - shr.u r19=r22,PAGE_SHIFT // shift pte index into position - ;; -(p7) cmp.eq.or.andcm p6,p7=r17,r0 // was pmd_present(*pmd) == NULL? - dep r17=r19,r17,3,(PAGE_SHIFT-3) // r17=pte_offset(pmd,addr); -(p6) br.cond.spnt page_fault - mov b0=r30 - br.sptk.many b0 // return to continuation point -END(nested_dtlb_miss) - - .org ia64_ivt+0x1800 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x1800 Entry 6 (size 64 bundles) Instruction Key Miss (24) -ENTRY(ikey_miss) - DBG_FAULT(6) - FAULT(6) -END(ikey_miss) - - .org ia64_ivt+0x1c00 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x1c00 Entry 7 (size 64 bundles) Data Key Miss (12,51) -ENTRY(dkey_miss) - DBG_FAULT(7) - FAULT(7) -END(dkey_miss) - - .org ia64_ivt+0x2000 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x2000 Entry 8 (size 64 bundles) Dirty-bit (54) -ENTRY(dirty_bit) - DBG_FAULT(8) - /* - * What we do here is to simply turn on the dirty bit in the PTE. We need to - * update both the page-table and the TLB entry. To efficiently access the PTE, - * we address it through the virtual page table. Most likely, the TLB entry for - * the relevant virtual page table page is still present in the TLB so we can - * normally do this without additional TLB misses. In case the necessary virtual - * page table TLB entry isn't present, we take a nested TLB miss hit where we look - * up the physical address of the L3 PTE and then continue at label 1 below. - */ - MOV_FROM_IFA(r16) // get the address that caused the fault - movl r30=1f // load continuation point in case of nested fault - ;; - THASH(p0, r17, r16, r18) // compute virtual address of L3 PTE - mov r29=b0 // save b0 in case of nested fault - mov r31=pr // save pr -#ifdef CONFIG_SMP - mov r28=ar.ccv // save ar.ccv - ;; -1: ld8 r18=[r17] - ;; // avoid RAW on r18 - mov ar.ccv=r18 // set compare value for cmpxchg - or r25=_PAGE_D|_PAGE_A,r18 // set the dirty and accessed bits - tbit.z p7,p6 = r18,_PAGE_P_BIT // Check present bit - ;; -(p6) cmpxchg8.acq r26=[r17],r25,ar.ccv // Only update if page is present - mov r24=PAGE_SHIFT<<2 - ;; -(p6) cmp.eq p6,p7=r26,r18 // Only compare if page is present - ;; - ITC_D(p6, r25, r18) // install updated PTE - ;; - /* - * Tell the assemblers dependency-violation checker that the above "itc" instructions - * cannot possibly affect the following loads: - */ - dv_serialize_data - - ld8 r18=[r17] // read PTE again - ;; - cmp.eq p6,p7=r18,r25 // is it same as the newly installed - ;; -(p7) ptc.l r16,r24 - mov b0=r29 // restore b0 - mov ar.ccv=r28 -#else - ;; -1: ld8 r18=[r17] - ;; // avoid RAW on r18 - or r18=_PAGE_D|_PAGE_A,r18 // set the dirty and accessed bits - mov b0=r29 // restore b0 - ;; - st8 [r17]=r18 // store back updated PTE - ITC_D(p0, r18, r16) // install updated PTE -#endif - mov pr=r31,-1 // restore pr - RFI -END(dirty_bit) - - .org ia64_ivt+0x2400 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x2400 Entry 9 (size 64 bundles) Instruction Access-bit (27) -ENTRY(iaccess_bit) - DBG_FAULT(9) - // Like Entry 8, except for instruction access - MOV_FROM_IFA(r16) // get the address that caused the fault - movl r30=1f // load continuation point in case of nested fault - mov r31=pr // save predicates -#ifdef CONFIG_ITANIUM - /* - * Erratum 10 (IFA may contain incorrect address) has "NoFix" status. - */ - MOV_FROM_IPSR(p0, r17) - ;; - MOV_FROM_IIP(r18) - tbit.z p6,p0=r17,IA64_PSR_IS_BIT // IA64 instruction set? - ;; -(p6) mov r16=r18 // if so, use cr.iip instead of cr.ifa -#endif /* CONFIG_ITANIUM */ - ;; - THASH(p0, r17, r16, r18) // compute virtual address of L3 PTE - mov r29=b0 // save b0 in case of nested fault) -#ifdef CONFIG_SMP - mov r28=ar.ccv // save ar.ccv - ;; -1: ld8 r18=[r17] - ;; - mov ar.ccv=r18 // set compare value for cmpxchg - or r25=_PAGE_A,r18 // set the accessed bit - tbit.z p7,p6 = r18,_PAGE_P_BIT // Check present bit - ;; -(p6) cmpxchg8.acq r26=[r17],r25,ar.ccv // Only if page present - mov r24=PAGE_SHIFT<<2 - ;; -(p6) cmp.eq p6,p7=r26,r18 // Only if page present - ;; - ITC_I(p6, r25, r26) // install updated PTE - ;; - /* - * Tell the assemblers dependency-violation checker that the above "itc" instructions - * cannot possibly affect the following loads: - */ - dv_serialize_data - - ld8 r18=[r17] // read PTE again - ;; - cmp.eq p6,p7=r18,r25 // is it same as the newly installed - ;; -(p7) ptc.l r16,r24 - mov b0=r29 // restore b0 - mov ar.ccv=r28 -#else /* !CONFIG_SMP */ - ;; -1: ld8 r18=[r17] - ;; - or r18=_PAGE_A,r18 // set the accessed bit - mov b0=r29 // restore b0 - ;; - st8 [r17]=r18 // store back updated PTE - ITC_I(p0, r18, r16) // install updated PTE -#endif /* !CONFIG_SMP */ - mov pr=r31,-1 - RFI -END(iaccess_bit) - - .org ia64_ivt+0x2800 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x2800 Entry 10 (size 64 bundles) Data Access-bit (15,55) -ENTRY(daccess_bit) - DBG_FAULT(10) - // Like Entry 8, except for data access - MOV_FROM_IFA(r16) // get the address that caused the fault - movl r30=1f // load continuation point in case of nested fault - ;; - THASH(p0, r17, r16, r18) // compute virtual address of L3 PTE - mov r31=pr - mov r29=b0 // save b0 in case of nested fault) -#ifdef CONFIG_SMP - mov r28=ar.ccv // save ar.ccv - ;; -1: ld8 r18=[r17] - ;; // avoid RAW on r18 - mov ar.ccv=r18 // set compare value for cmpxchg - or r25=_PAGE_A,r18 // set the dirty bit - tbit.z p7,p6 = r18,_PAGE_P_BIT // Check present bit - ;; -(p6) cmpxchg8.acq r26=[r17],r25,ar.ccv // Only if page is present - mov r24=PAGE_SHIFT<<2 - ;; -(p6) cmp.eq p6,p7=r26,r18 // Only if page is present - ;; - ITC_D(p6, r25, r26) // install updated PTE - /* - * Tell the assemblers dependency-violation checker that the above "itc" instructions - * cannot possibly affect the following loads: - */ - dv_serialize_data - ;; - ld8 r18=[r17] // read PTE again - ;; - cmp.eq p6,p7=r18,r25 // is it same as the newly installed - ;; -(p7) ptc.l r16,r24 - mov ar.ccv=r28 -#else - ;; -1: ld8 r18=[r17] - ;; // avoid RAW on r18 - or r18=_PAGE_A,r18 // set the accessed bit - ;; - st8 [r17]=r18 // store back updated PTE - ITC_D(p0, r18, r16) // install updated PTE -#endif - mov b0=r29 // restore b0 - mov pr=r31,-1 - RFI -END(daccess_bit) - - .org ia64_ivt+0x2c00 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x2c00 Entry 11 (size 64 bundles) Break instruction (33) -ENTRY(break_fault) - /* - * The streamlined system call entry/exit paths only save/restore the initial part - * of pt_regs. This implies that the callers of system-calls must adhere to the - * normal procedure calling conventions. - * - * Registers to be saved & restored: - * CR registers: cr.ipsr, cr.iip, cr.ifs - * AR registers: ar.unat, ar.pfs, ar.rsc, ar.rnat, ar.bspstore, ar.fpsr - * others: pr, b0, b6, loadrs, r1, r11, r12, r13, r15 - * Registers to be restored only: - * r8-r11: output value from the system call. - * - * During system call exit, scratch registers (including r15) are modified/cleared - * to prevent leaking bits from kernel to user level. - */ - DBG_FAULT(11) - mov.m r16=IA64_KR(CURRENT) // M2 r16 <- current task (12 cyc) - MOV_FROM_IPSR(p0, r29) // M2 (12 cyc) - mov r31=pr // I0 (2 cyc) - - MOV_FROM_IIM(r17) // M2 (2 cyc) - mov.m r27=ar.rsc // M2 (12 cyc) - mov r18=__IA64_BREAK_SYSCALL // A - - mov.m ar.rsc=0 // M2 - mov.m r21=ar.fpsr // M2 (12 cyc) - mov r19=b6 // I0 (2 cyc) - ;; - mov.m r23=ar.bspstore // M2 (12 cyc) - mov.m r24=ar.rnat // M2 (5 cyc) - mov.i r26=ar.pfs // I0 (2 cyc) - - invala // M0|1 - nop.m 0 // M - mov r20=r1 // A save r1 - - nop.m 0 - movl r30=sys_call_table // X - - MOV_FROM_IIP(r28) // M2 (2 cyc) - cmp.eq p0,p7=r18,r17 // I0 is this a system call? -(p7) br.cond.spnt non_syscall // B no -> - // - // From this point on, we are definitely on the syscall-path - // and we can use (non-banked) scratch registers. - // -/////////////////////////////////////////////////////////////////////// - mov r1=r16 // A move task-pointer to "addl"-addressable reg - mov r2=r16 // A setup r2 for ia64_syscall_setup - add r9=TI_FLAGS+IA64_TASK_SIZE,r16 // A r9 = ¤t_thread_info()->flags - - adds r16=IA64_TASK_THREAD_ON_USTACK_OFFSET,r16 - adds r15=-1024,r15 // A subtract 1024 from syscall number - mov r3=NR_syscalls - 1 - ;; - ld1.bias r17=[r16] // M0|1 r17 = current->thread.on_ustack flag - ld4 r9=[r9] // M0|1 r9 = current_thread_info()->flags - extr.u r8=r29,41,2 // I0 extract ei field from cr.ipsr - - shladd r30=r15,3,r30 // A r30 = sys_call_table + 8*(syscall-1024) - addl r22=IA64_RBS_OFFSET,r1 // A compute base of RBS - cmp.leu p6,p7=r15,r3 // A syscall number in range? - ;; - - lfetch.fault.excl.nt1 [r22] // M0|1 prefetch RBS -(p6) ld8 r30=[r30] // M0|1 load address of syscall entry point - tnat.nz.or p7,p0=r15 // I0 is syscall nr a NaT? - - mov.m ar.bspstore=r22 // M2 switch to kernel RBS - cmp.eq p8,p9=2,r8 // A isr.ei==2? - ;; - -(p8) mov r8=0 // A clear ei to 0 -(p7) movl r30=sys_ni_syscall // X - -(p8) adds r28=16,r28 // A switch cr.iip to next bundle -(p9) adds r8=1,r8 // A increment ei to next slot -#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE - ;; - mov b6=r30 // I0 setup syscall handler branch reg early -#else - nop.i 0 - ;; -#endif - - mov.m r25=ar.unat // M2 (5 cyc) - dep r29=r8,r29,41,2 // I0 insert new ei into cr.ipsr - adds r15=1024,r15 // A restore original syscall number - // - // If any of the above loads miss in L1D, we'll stall here until - // the data arrives. - // -/////////////////////////////////////////////////////////////////////// - st1 [r16]=r0 // M2|3 clear current->thread.on_ustack flag -#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE - MOV_FROM_ITC(p0, p14, r30, r18) // M get cycle for accounting -#else - mov b6=r30 // I0 setup syscall handler branch reg early -#endif - cmp.eq pKStk,pUStk=r0,r17 // A were we on kernel stacks already? - - and r9=_TIF_SYSCALL_TRACEAUDIT,r9 // A mask trace or audit - mov r18=ar.bsp // M2 (12 cyc) -(pKStk) br.cond.spnt .break_fixup // B we're already in kernel-mode -- fix up RBS - ;; -.back_from_break_fixup: -(pUStk) addl r1=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r1 // A compute base of memory stack - cmp.eq p14,p0=r9,r0 // A are syscalls being traced/audited? - br.call.sptk.many b7=ia64_syscall_setup // B -1: -#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE - // mov.m r30=ar.itc is called in advance, and r13 is current - add r16=TI_AC_STAMP+IA64_TASK_SIZE,r13 // A - add r17=TI_AC_LEAVE+IA64_TASK_SIZE,r13 // A -(pKStk) br.cond.spnt .skip_accounting // B unlikely skip - ;; - ld8 r18=[r16],TI_AC_STIME-TI_AC_STAMP // M get last stamp - ld8 r19=[r17],TI_AC_UTIME-TI_AC_LEAVE // M time at leave - ;; - ld8 r20=[r16],TI_AC_STAMP-TI_AC_STIME // M cumulated stime - ld8 r21=[r17] // M cumulated utime - sub r22=r19,r18 // A stime before leave - ;; - st8 [r16]=r30,TI_AC_STIME-TI_AC_STAMP // M update stamp - sub r18=r30,r19 // A elapsed time in user - ;; - add r20=r20,r22 // A sum stime - add r21=r21,r18 // A sum utime - ;; - st8 [r16]=r20 // M update stime - st8 [r17]=r21 // M update utime - ;; -.skip_accounting: -#endif - mov ar.rsc=0x3 // M2 set eager mode, pl 0, LE, loadrs=0 - nop 0 - BSW_1(r2, r14) // B (6 cyc) regs are saved, switch to bank 1 - ;; - - SSM_PSR_IC_AND_DEFAULT_BITS_AND_SRLZ_I(r3, r16) // M2 now it's safe to re-enable intr.-collection - // M0 ensure interruption collection is on - movl r3=ia64_ret_from_syscall // X - ;; - mov rp=r3 // I0 set the real return addr -(p10) br.cond.spnt.many ia64_ret_from_syscall // B return if bad call-frame or r15 is a NaT - - SSM_PSR_I(p15, p15, r16) // M2 restore psr.i -(p14) br.call.sptk.many b6=b6 // B invoke syscall-handker (ignore return addr) - br.cond.spnt.many ia64_trace_syscall // B do syscall-tracing thingamagic - // NOT REACHED -/////////////////////////////////////////////////////////////////////// - // On entry, we optimistically assumed that we're coming from user-space. - // For the rare cases where a system-call is done from within the kernel, - // we fix things up at this point: -.break_fixup: - add r1=-IA64_PT_REGS_SIZE,sp // A allocate space for pt_regs structure - mov ar.rnat=r24 // M2 restore kernel's AR.RNAT - ;; - mov ar.bspstore=r23 // M2 restore kernel's AR.BSPSTORE - br.cond.sptk .back_from_break_fixup -END(break_fault) - - .org ia64_ivt+0x3000 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x3000 Entry 12 (size 64 bundles) External Interrupt (4) -ENTRY(interrupt) - /* interrupt handler has become too big to fit this area. */ - br.sptk.many __interrupt -END(interrupt) - - .org ia64_ivt+0x3400 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x3400 Entry 13 (size 64 bundles) Reserved - DBG_FAULT(13) - FAULT(13) - - .org ia64_ivt+0x3800 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x3800 Entry 14 (size 64 bundles) Reserved - DBG_FAULT(14) - FAULT(14) - - /* - * There is no particular reason for this code to be here, other than that - * there happens to be space here that would go unused otherwise. If this - * fault ever gets "unreserved", simply moved the following code to a more - * suitable spot... - * - * ia64_syscall_setup() is a separate subroutine so that it can - * allocate stacked registers so it can safely demine any - * potential NaT values from the input registers. - * - * On entry: - * - executing on bank 0 or bank 1 register set (doesn't matter) - * - r1: stack pointer - * - r2: current task pointer - * - r3: preserved - * - r11: original contents (saved ar.pfs to be saved) - * - r12: original contents (sp to be saved) - * - r13: original contents (tp to be saved) - * - r15: original contents (syscall # to be saved) - * - r18: saved bsp (after switching to kernel stack) - * - r19: saved b6 - * - r20: saved r1 (gp) - * - r21: saved ar.fpsr - * - r22: kernel's register backing store base (krbs_base) - * - r23: saved ar.bspstore - * - r24: saved ar.rnat - * - r25: saved ar.unat - * - r26: saved ar.pfs - * - r27: saved ar.rsc - * - r28: saved cr.iip - * - r29: saved cr.ipsr - * - r30: ar.itc for accounting (don't touch) - * - r31: saved pr - * - b0: original contents (to be saved) - * On exit: - * - p10: TRUE if syscall is invoked with more than 8 out - * registers or r15's Nat is true - * - r1: kernel's gp - * - r3: preserved (same as on entry) - * - r8: -EINVAL if p10 is true - * - r12: points to kernel stack - * - r13: points to current task - * - r14: preserved (same as on entry) - * - p13: preserved - * - p15: TRUE if interrupts need to be re-enabled - * - ar.fpsr: set to kernel settings - * - b6: preserved (same as on entry) - */ -GLOBAL_ENTRY(ia64_syscall_setup) -#if PT(B6) != 0 -# error This code assumes that b6 is the first field in pt_regs. -#endif - st8 [r1]=r19 // save b6 - add r16=PT(CR_IPSR),r1 // initialize first base pointer - add r17=PT(R11),r1 // initialize second base pointer - ;; - alloc r19=ar.pfs,8,0,0,0 // ensure in0-in7 are writable - st8 [r16]=r29,PT(AR_PFS)-PT(CR_IPSR) // save cr.ipsr - tnat.nz p8,p0=in0 - - st8.spill [r17]=r11,PT(CR_IIP)-PT(R11) // save r11 - tnat.nz p9,p0=in1 -(pKStk) mov r18=r0 // make sure r18 isn't NaT - ;; - - st8 [r16]=r26,PT(CR_IFS)-PT(AR_PFS) // save ar.pfs - st8 [r17]=r28,PT(AR_UNAT)-PT(CR_IIP) // save cr.iip - mov r28=b0 // save b0 (2 cyc) - ;; - - st8 [r17]=r25,PT(AR_RSC)-PT(AR_UNAT) // save ar.unat - dep r19=0,r19,38,26 // clear all bits but 0..37 [I0] -(p8) mov in0=-1 - ;; - - st8 [r16]=r19,PT(AR_RNAT)-PT(CR_IFS) // store ar.pfs.pfm in cr.ifs - extr.u r11=r19,7,7 // I0 // get sol of ar.pfs - and r8=0x7f,r19 // A // get sof of ar.pfs - - st8 [r17]=r27,PT(AR_BSPSTORE)-PT(AR_RSC)// save ar.rsc - tbit.nz p15,p0=r29,IA64_PSR_I_BIT // I0 -(p9) mov in1=-1 - ;; - -(pUStk) sub r18=r18,r22 // r18=RSE.ndirty*8 - tnat.nz p10,p0=in2 - add r11=8,r11 - ;; -(pKStk) adds r16=PT(PR)-PT(AR_RNAT),r16 // skip over ar_rnat field -(pKStk) adds r17=PT(B0)-PT(AR_BSPSTORE),r17 // skip over ar_bspstore field - tnat.nz p11,p0=in3 - ;; -(p10) mov in2=-1 - tnat.nz p12,p0=in4 // [I0] -(p11) mov in3=-1 - ;; -(pUStk) st8 [r16]=r24,PT(PR)-PT(AR_RNAT) // save ar.rnat -(pUStk) st8 [r17]=r23,PT(B0)-PT(AR_BSPSTORE) // save ar.bspstore - shl r18=r18,16 // compute ar.rsc to be used for "loadrs" - ;; - st8 [r16]=r31,PT(LOADRS)-PT(PR) // save predicates - st8 [r17]=r28,PT(R1)-PT(B0) // save b0 - tnat.nz p13,p0=in5 // [I0] - ;; - st8 [r16]=r18,PT(R12)-PT(LOADRS) // save ar.rsc value for "loadrs" - st8.spill [r17]=r20,PT(R13)-PT(R1) // save original r1 -(p12) mov in4=-1 - ;; - -.mem.offset 0,0; st8.spill [r16]=r12,PT(AR_FPSR)-PT(R12) // save r12 -.mem.offset 8,0; st8.spill [r17]=r13,PT(R15)-PT(R13) // save r13 -(p13) mov in5=-1 - ;; - st8 [r16]=r21,PT(R8)-PT(AR_FPSR) // save ar.fpsr - tnat.nz p13,p0=in6 - cmp.lt p10,p9=r11,r8 // frame size can't be more than local+8 - ;; - mov r8=1 -(p9) tnat.nz p10,p0=r15 - adds r12=-16,r1 // switch to kernel memory stack (with 16 bytes of scratch) - - st8.spill [r17]=r15 // save r15 - tnat.nz p8,p0=in7 - nop.i 0 - - mov r13=r2 // establish `current' - movl r1=__gp // establish kernel global pointer - ;; - st8 [r16]=r8 // ensure pt_regs.r8 != 0 (see handle_syscall_error) -(p13) mov in6=-1 -(p8) mov in7=-1 - - cmp.eq pSys,pNonSys=r0,r0 // set pSys=1, pNonSys=0 - movl r17=FPSR_DEFAULT - ;; - mov.m ar.fpsr=r17 // set ar.fpsr to kernel default value -(p10) mov r8=-EINVAL - br.ret.sptk.many b7 -END(ia64_syscall_setup) - - .org ia64_ivt+0x3c00 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x3c00 Entry 15 (size 64 bundles) Reserved - DBG_FAULT(15) - FAULT(15) - - .org ia64_ivt+0x4000 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x4000 Entry 16 (size 64 bundles) Reserved - DBG_FAULT(16) - FAULT(16) - -#if defined(CONFIG_VIRT_CPU_ACCOUNTING_NATIVE) - /* - * There is no particular reason for this code to be here, other than - * that there happens to be space here that would go unused otherwise. - * If this fault ever gets "unreserved", simply moved the following - * code to a more suitable spot... - * - * account_sys_enter is called from SAVE_MIN* macros if accounting is - * enabled and if the macro is entered from user mode. - */ -GLOBAL_ENTRY(account_sys_enter) - // mov.m r20=ar.itc is called in advance, and r13 is current - add r16=TI_AC_STAMP+IA64_TASK_SIZE,r13 - add r17=TI_AC_LEAVE+IA64_TASK_SIZE,r13 - ;; - ld8 r18=[r16],TI_AC_STIME-TI_AC_STAMP // time at last check in kernel - ld8 r19=[r17],TI_AC_UTIME-TI_AC_LEAVE // time at left from kernel - ;; - ld8 r23=[r16],TI_AC_STAMP-TI_AC_STIME // cumulated stime - ld8 r21=[r17] // cumulated utime - sub r22=r19,r18 // stime before leave kernel - ;; - st8 [r16]=r20,TI_AC_STIME-TI_AC_STAMP // update stamp - sub r18=r20,r19 // elapsed time in user mode - ;; - add r23=r23,r22 // sum stime - add r21=r21,r18 // sum utime - ;; - st8 [r16]=r23 // update stime - st8 [r17]=r21 // update utime - ;; - br.ret.sptk.many rp -END(account_sys_enter) -#endif - - .org ia64_ivt+0x4400 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x4400 Entry 17 (size 64 bundles) Reserved - DBG_FAULT(17) - FAULT(17) - - .org ia64_ivt+0x4800 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x4800 Entry 18 (size 64 bundles) Reserved - DBG_FAULT(18) - FAULT(18) - - .org ia64_ivt+0x4c00 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x4c00 Entry 19 (size 64 bundles) Reserved - DBG_FAULT(19) - FAULT(19) - -// -// --- End of long entries, Beginning of short entries -// - - .org ia64_ivt+0x5000 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x5000 Entry 20 (size 16 bundles) Page Not Present (10,22,49) -ENTRY(page_not_present) - DBG_FAULT(20) - MOV_FROM_IFA(r16) - RSM_PSR_DT - /* - * The Linux page fault handler doesn't expect non-present pages to be in - * the TLB. Flush the existing entry now, so we meet that expectation. - */ - mov r17=PAGE_SHIFT<<2 - ;; - ptc.l r16,r17 - ;; - mov r31=pr - srlz.d - br.sptk.many page_fault -END(page_not_present) - - .org ia64_ivt+0x5100 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x5100 Entry 21 (size 16 bundles) Key Permission (13,25,52) -ENTRY(key_permission) - DBG_FAULT(21) - MOV_FROM_IFA(r16) - RSM_PSR_DT - mov r31=pr - ;; - srlz.d - br.sptk.many page_fault -END(key_permission) - - .org ia64_ivt+0x5200 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x5200 Entry 22 (size 16 bundles) Instruction Access Rights (26) -ENTRY(iaccess_rights) - DBG_FAULT(22) - MOV_FROM_IFA(r16) - RSM_PSR_DT - mov r31=pr - ;; - srlz.d - br.sptk.many page_fault -END(iaccess_rights) - - .org ia64_ivt+0x5300 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x5300 Entry 23 (size 16 bundles) Data Access Rights (14,53) -ENTRY(daccess_rights) - DBG_FAULT(23) - MOV_FROM_IFA(r16) - RSM_PSR_DT - mov r31=pr - ;; - srlz.d - br.sptk.many page_fault -END(daccess_rights) - - .org ia64_ivt+0x5400 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x5400 Entry 24 (size 16 bundles) General Exception (5,32,34,36,38,39) -ENTRY(general_exception) - DBG_FAULT(24) - MOV_FROM_ISR(r16) - mov r31=pr - ;; - cmp4.eq p6,p0=0,r16 -(p6) br.sptk.many dispatch_illegal_op_fault - ;; - mov r19=24 // fault number - br.sptk.many dispatch_to_fault_handler -END(general_exception) - - .org ia64_ivt+0x5500 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x5500 Entry 25 (size 16 bundles) Disabled FP-Register (35) -ENTRY(disabled_fp_reg) - DBG_FAULT(25) - rsm psr.dfh // ensure we can access fph - ;; - srlz.d - mov r31=pr - mov r19=25 - br.sptk.many dispatch_to_fault_handler -END(disabled_fp_reg) - - .org ia64_ivt+0x5600 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x5600 Entry 26 (size 16 bundles) Nat Consumption (11,23,37,50) -ENTRY(nat_consumption) - DBG_FAULT(26) - - MOV_FROM_IPSR(p0, r16) - MOV_FROM_ISR(r17) - mov r31=pr // save PR - ;; - and r18=0xf,r17 // r18 = cr.ipsr.code{3:0} - tbit.z p6,p0=r17,IA64_ISR_NA_BIT - ;; - cmp.ne.or p6,p0=IA64_ISR_CODE_LFETCH,r18 - dep r16=-1,r16,IA64_PSR_ED_BIT,1 -(p6) br.cond.spnt 1f // branch if (cr.ispr.na == 0 || cr.ipsr.code{3:0} != LFETCH) - ;; - MOV_TO_IPSR(p0, r16, r18) - mov pr=r31,-1 - ;; - RFI - -1: mov pr=r31,-1 - ;; - FAULT(26) -END(nat_consumption) - - .org ia64_ivt+0x5700 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x5700 Entry 27 (size 16 bundles) Speculation (40) -ENTRY(speculation_vector) - DBG_FAULT(27) - /* - * A [f]chk.[as] instruction needs to take the branch to the recovery code but - * this part of the architecture is not implemented in hardware on some CPUs, such - * as Itanium. Thus, in general we need to emulate the behavior. IIM contains - * the relative target (not yet sign extended). So after sign extending it we - * simply add it to IIP. We also need to reset the EI field of the IPSR to zero, - * i.e., the slot to restart into. - * - * cr.imm contains zero_ext(imm21) - */ - MOV_FROM_IIM(r18) - ;; - MOV_FROM_IIP(r17) - shl r18=r18,43 // put sign bit in position (43=64-21) - ;; - - MOV_FROM_IPSR(p0, r16) - shr r18=r18,39 // sign extend (39=43-4) - ;; - - add r17=r17,r18 // now add the offset - ;; - MOV_TO_IIP(r17, r19) - dep r16=0,r16,41,2 // clear EI - ;; - - MOV_TO_IPSR(p0, r16, r19) - ;; - - RFI -END(speculation_vector) - - .org ia64_ivt+0x5800 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x5800 Entry 28 (size 16 bundles) Reserved - DBG_FAULT(28) - FAULT(28) - - .org ia64_ivt+0x5900 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x5900 Entry 29 (size 16 bundles) Debug (16,28,56) -ENTRY(debug_vector) - DBG_FAULT(29) - FAULT(29) -END(debug_vector) - - .org ia64_ivt+0x5a00 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x5a00 Entry 30 (size 16 bundles) Unaligned Reference (57) -ENTRY(unaligned_access) - DBG_FAULT(30) - mov r31=pr // prepare to save predicates - ;; - br.sptk.many dispatch_unaligned_handler -END(unaligned_access) - - .org ia64_ivt+0x5b00 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x5b00 Entry 31 (size 16 bundles) Unsupported Data Reference (57) -ENTRY(unsupported_data_reference) - DBG_FAULT(31) - FAULT(31) -END(unsupported_data_reference) - - .org ia64_ivt+0x5c00 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x5c00 Entry 32 (size 16 bundles) Floating-Point Fault (64) -ENTRY(floating_point_fault) - DBG_FAULT(32) - FAULT(32) -END(floating_point_fault) - - .org ia64_ivt+0x5d00 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x5d00 Entry 33 (size 16 bundles) Floating Point Trap (66) -ENTRY(floating_point_trap) - DBG_FAULT(33) - FAULT(33) -END(floating_point_trap) - - .org ia64_ivt+0x5e00 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x5e00 Entry 34 (size 16 bundles) Lower Privilege Transfer Trap (66) -ENTRY(lower_privilege_trap) - DBG_FAULT(34) - FAULT(34) -END(lower_privilege_trap) - - .org ia64_ivt+0x5f00 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x5f00 Entry 35 (size 16 bundles) Taken Branch Trap (68) -ENTRY(taken_branch_trap) - DBG_FAULT(35) - FAULT(35) -END(taken_branch_trap) - - .org ia64_ivt+0x6000 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x6000 Entry 36 (size 16 bundles) Single Step Trap (69) -ENTRY(single_step_trap) - DBG_FAULT(36) - FAULT(36) -END(single_step_trap) - - .org ia64_ivt+0x6100 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x6100 Entry 37 (size 16 bundles) Reserved - DBG_FAULT(37) - FAULT(37) - - .org ia64_ivt+0x6200 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x6200 Entry 38 (size 16 bundles) Reserved - DBG_FAULT(38) - FAULT(38) - - .org ia64_ivt+0x6300 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x6300 Entry 39 (size 16 bundles) Reserved - DBG_FAULT(39) - FAULT(39) - - .org ia64_ivt+0x6400 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x6400 Entry 40 (size 16 bundles) Reserved - DBG_FAULT(40) - FAULT(40) - - .org ia64_ivt+0x6500 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x6500 Entry 41 (size 16 bundles) Reserved - DBG_FAULT(41) - FAULT(41) - - .org ia64_ivt+0x6600 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x6600 Entry 42 (size 16 bundles) Reserved - DBG_FAULT(42) - FAULT(42) - - .org ia64_ivt+0x6700 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x6700 Entry 43 (size 16 bundles) Reserved - DBG_FAULT(43) - FAULT(43) - - .org ia64_ivt+0x6800 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x6800 Entry 44 (size 16 bundles) Reserved - DBG_FAULT(44) - FAULT(44) - - .org ia64_ivt+0x6900 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x6900 Entry 45 (size 16 bundles) IA-32 Exeception (17,18,29,41,42,43,44,58,60,61,62,72,73,75,76,77) -ENTRY(ia32_exception) - DBG_FAULT(45) - FAULT(45) -END(ia32_exception) - - .org ia64_ivt+0x6a00 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x6a00 Entry 46 (size 16 bundles) IA-32 Intercept (30,31,59,70,71) -ENTRY(ia32_intercept) - DBG_FAULT(46) - FAULT(46) -END(ia32_intercept) - - .org ia64_ivt+0x6b00 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x6b00 Entry 47 (size 16 bundles) IA-32 Interrupt (74) -ENTRY(ia32_interrupt) - DBG_FAULT(47) - FAULT(47) -END(ia32_interrupt) - - .org ia64_ivt+0x6c00 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x6c00 Entry 48 (size 16 bundles) Reserved - DBG_FAULT(48) - FAULT(48) - - .org ia64_ivt+0x6d00 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x6d00 Entry 49 (size 16 bundles) Reserved - DBG_FAULT(49) - FAULT(49) - - .org ia64_ivt+0x6e00 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x6e00 Entry 50 (size 16 bundles) Reserved - DBG_FAULT(50) - FAULT(50) - - .org ia64_ivt+0x6f00 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x6f00 Entry 51 (size 16 bundles) Reserved - DBG_FAULT(51) - FAULT(51) - - .org ia64_ivt+0x7000 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x7000 Entry 52 (size 16 bundles) Reserved - DBG_FAULT(52) - FAULT(52) - - .org ia64_ivt+0x7100 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x7100 Entry 53 (size 16 bundles) Reserved - DBG_FAULT(53) - FAULT(53) - - .org ia64_ivt+0x7200 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x7200 Entry 54 (size 16 bundles) Reserved - DBG_FAULT(54) - FAULT(54) - - .org ia64_ivt+0x7300 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x7300 Entry 55 (size 16 bundles) Reserved - DBG_FAULT(55) - FAULT(55) - - .org ia64_ivt+0x7400 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x7400 Entry 56 (size 16 bundles) Reserved - DBG_FAULT(56) - FAULT(56) - - .org ia64_ivt+0x7500 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x7500 Entry 57 (size 16 bundles) Reserved - DBG_FAULT(57) - FAULT(57) - - .org ia64_ivt+0x7600 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x7600 Entry 58 (size 16 bundles) Reserved - DBG_FAULT(58) - FAULT(58) - - .org ia64_ivt+0x7700 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x7700 Entry 59 (size 16 bundles) Reserved - DBG_FAULT(59) - FAULT(59) - - .org ia64_ivt+0x7800 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x7800 Entry 60 (size 16 bundles) Reserved - DBG_FAULT(60) - FAULT(60) - - .org ia64_ivt+0x7900 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x7900 Entry 61 (size 16 bundles) Reserved - DBG_FAULT(61) - FAULT(61) - - .org ia64_ivt+0x7a00 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x7a00 Entry 62 (size 16 bundles) Reserved - DBG_FAULT(62) - FAULT(62) - - .org ia64_ivt+0x7b00 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x7b00 Entry 63 (size 16 bundles) Reserved - DBG_FAULT(63) - FAULT(63) - - .org ia64_ivt+0x7c00 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x7c00 Entry 64 (size 16 bundles) Reserved - DBG_FAULT(64) - FAULT(64) - - .org ia64_ivt+0x7d00 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x7d00 Entry 65 (size 16 bundles) Reserved - DBG_FAULT(65) - FAULT(65) - - .org ia64_ivt+0x7e00 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x7e00 Entry 66 (size 16 bundles) Reserved - DBG_FAULT(66) - FAULT(66) - - .org ia64_ivt+0x7f00 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x7f00 Entry 67 (size 16 bundles) Reserved - DBG_FAULT(67) - FAULT(67) - - //----------------------------------------------------------------------------------- - // call do_page_fault (predicates are in r31, psr.dt may be off, r16 is faulting address) -ENTRY(page_fault) - SSM_PSR_DT_AND_SRLZ_I - ;; - SAVE_MIN_WITH_COVER - alloc r15=ar.pfs,0,0,3,0 - MOV_FROM_IFA(out0) - MOV_FROM_ISR(out1) - SSM_PSR_IC_AND_DEFAULT_BITS_AND_SRLZ_I(r14, r3) - adds r3=8,r2 // set up second base pointer - SSM_PSR_I(p15, p15, r14) // restore psr.i - movl r14=ia64_leave_kernel - ;; - SAVE_REST - mov rp=r14 - ;; - adds out2=16,r12 // out2 = pointer to pt_regs - br.call.sptk.many b6=ia64_do_page_fault // ignore return address -END(page_fault) - -ENTRY(non_syscall) - mov ar.rsc=r27 // restore ar.rsc before SAVE_MIN_WITH_COVER - ;; - SAVE_MIN_WITH_COVER - - // There is no particular reason for this code to be here, other than that - // there happens to be space here that would go unused otherwise. If this - // fault ever gets "unreserved", simply moved the following code to a more - // suitable spot... - - alloc r14=ar.pfs,0,0,2,0 - MOV_FROM_IIM(out0) - add out1=16,sp - adds r3=8,r2 // set up second base pointer for SAVE_REST - - SSM_PSR_IC_AND_DEFAULT_BITS_AND_SRLZ_I(r15, r24) - // guarantee that interruption collection is on - SSM_PSR_I(p15, p15, r15) // restore psr.i - movl r15=ia64_leave_kernel - ;; - SAVE_REST - mov rp=r15 - ;; - br.call.sptk.many b6=ia64_bad_break // avoid WAW on CFM and ignore return addr -END(non_syscall) - -ENTRY(__interrupt) - DBG_FAULT(12) - mov r31=pr // prepare to save predicates - ;; - SAVE_MIN_WITH_COVER // uses r31; defines r2 and r3 - SSM_PSR_IC_AND_DEFAULT_BITS_AND_SRLZ_I(r3, r14) - // ensure everybody knows psr.ic is back on - adds r3=8,r2 // set up second base pointer for SAVE_REST - ;; - SAVE_REST - ;; - MCA_RECOVER_RANGE(interrupt) - alloc r14=ar.pfs,0,0,2,0 // must be first in an insn group - MOV_FROM_IVR(out0, r8) // pass cr.ivr as first arg - add out1=16,sp // pass pointer to pt_regs as second arg - ;; - srlz.d // make sure we see the effect of cr.ivr - movl r14=ia64_leave_kernel - ;; - mov rp=r14 - br.call.sptk.many b6=ia64_handle_irq -END(__interrupt) - - /* - * There is no particular reason for this code to be here, other than that - * there happens to be space here that would go unused otherwise. If this - * fault ever gets "unreserved", simply moved the following code to a more - * suitable spot... - */ - -ENTRY(dispatch_unaligned_handler) - SAVE_MIN_WITH_COVER - ;; - alloc r14=ar.pfs,0,0,2,0 // now it's safe (must be first in insn group!) - MOV_FROM_IFA(out0) - adds out1=16,sp - - SSM_PSR_IC_AND_DEFAULT_BITS_AND_SRLZ_I(r3, r24) - // guarantee that interruption collection is on - SSM_PSR_I(p15, p15, r3) // restore psr.i - adds r3=8,r2 // set up second base pointer - ;; - SAVE_REST - movl r14=ia64_leave_kernel - ;; - mov rp=r14 - br.sptk.many ia64_prepare_handle_unaligned -END(dispatch_unaligned_handler) - - /* - * There is no particular reason for this code to be here, other than that - * there happens to be space here that would go unused otherwise. If this - * fault ever gets "unreserved", simply moved the following code to a more - * suitable spot... - */ - -ENTRY(dispatch_to_fault_handler) - /* - * Input: - * psr.ic: off - * r19: fault vector number (e.g., 24 for General Exception) - * r31: contains saved predicates (pr) - */ - SAVE_MIN_WITH_COVER_R19 - alloc r14=ar.pfs,0,0,5,0 - MOV_FROM_ISR(out1) - MOV_FROM_IFA(out2) - MOV_FROM_IIM(out3) - MOV_FROM_ITIR(out4) - ;; - SSM_PSR_IC_AND_DEFAULT_BITS_AND_SRLZ_I(r3, out0) - // guarantee that interruption collection is on - mov out0=r15 - ;; - SSM_PSR_I(p15, p15, r3) // restore psr.i - adds r3=8,r2 // set up second base pointer for SAVE_REST - ;; - SAVE_REST - movl r14=ia64_leave_kernel - ;; - mov rp=r14 - br.call.sptk.many b6=ia64_fault -END(dispatch_to_fault_handler) - - /* - * Squatting in this space ... - * - * This special case dispatcher for illegal operation faults allows preserved - * registers to be modified through a callback function (asm only) that is handed - * back from the fault handler in r8. Up to three arguments can be passed to the - * callback function by returning an aggregate with the callback as its first - * element, followed by the arguments. - */ -ENTRY(dispatch_illegal_op_fault) - .prologue - .body - SAVE_MIN_WITH_COVER - SSM_PSR_IC_AND_DEFAULT_BITS_AND_SRLZ_I(r3, r24) - // guarantee that interruption collection is on - ;; - SSM_PSR_I(p15, p15, r3) // restore psr.i - adds r3=8,r2 // set up second base pointer for SAVE_REST - ;; - alloc r14=ar.pfs,0,0,1,0 // must be first in insn group - mov out0=ar.ec - ;; - SAVE_REST - PT_REGS_UNWIND_INFO(0) - ;; - br.call.sptk.many rp=ia64_illegal_op_fault -.ret0: ;; - alloc r14=ar.pfs,0,0,3,0 // must be first in insn group - mov out0=r9 - mov out1=r10 - mov out2=r11 - movl r15=ia64_leave_kernel - ;; - mov rp=r15 - mov b6=r8 - ;; - cmp.ne p6,p0=0,r8 -(p6) br.call.dpnt.many b6=b6 // call returns to ia64_leave_kernel - br.sptk.many ia64_leave_kernel -END(dispatch_illegal_op_fault) diff --git a/arch/ia64/kernel/kprobes.c b/arch/ia64/kernel/kprobes.c deleted file mode 100644 index ca34e51e84b4..000000000000 --- a/arch/ia64/kernel/kprobes.c +++ /dev/null @@ -1,911 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Kernel Probes (KProbes) - * arch/ia64/kernel/kprobes.c - * - * Copyright (C) IBM Corporation, 2002, 2004 - * Copyright (C) Intel Corporation, 2005 - * - * 2005-Apr Rusty Lynch and Anil S Keshavamurthy - * adapted from i386 - */ - -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL; -DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); - -struct kretprobe_blackpoint kretprobe_blacklist[] = {{NULL, NULL}}; - -enum instruction_type {A, I, M, F, B, L, X, u}; -static enum instruction_type bundle_encoding[32][3] = { - [0x00] = { M, I, I }, - [0x01] = { M, I, I }, - [0x02] = { M, I, I }, - [0x03] = { M, I, I }, - [0x04] = { M, L, X }, - [0x05] = { M, L, X }, - [0x06] = { u, u, u }, - [0x07] = { u, u, u }, - [0x08] = { M, M, I }, - [0x09] = { M, M, I }, - [0x0A] = { M, M, I }, - [0x0B] = { M, M, I }, - [0x0C] = { M, F, I }, - [0x0D] = { M, F, I }, - [0x0E] = { M, M, F }, - [0x0F] = { M, M, F }, - [0x10] = { M, I, B }, - [0x11] = { M, I, B }, - [0x12] = { M, B, B }, - [0x13] = { M, B, B }, - [0x14] = { u, u, u }, - [0x15] = { u, u, u }, - [0x16] = { B, B, B }, - [0x17] = { B, B, B }, - [0x18] = { M, M, B }, - [0x19] = { M, M, B }, - [0x1A] = { u, u, u }, - [0x1B] = { u, u, u }, - [0x1C] = { M, F, B }, - [0x1D] = { M, F, B }, - [0x1E] = { u, u, u }, - [0x1F] = { u, u, u }, -}; - -/* Insert a long branch code */ -static void __kprobes set_brl_inst(void *from, void *to) -{ - s64 rel = ((s64) to - (s64) from) >> 4; - bundle_t *brl; - brl = (bundle_t *) ((u64) from & ~0xf); - brl->quad0.template = 0x05; /* [MLX](stop) */ - brl->quad0.slot0 = NOP_M_INST; /* nop.m 0x0 */ - brl->quad0.slot1_p0 = ((rel >> 20) & 0x7fffffffff) << 2; - brl->quad1.slot1_p1 = (((rel >> 20) & 0x7fffffffff) << 2) >> (64 - 46); - /* brl.cond.sptk.many.clr rel<<4 (qp=0) */ - brl->quad1.slot2 = BRL_INST(rel >> 59, rel & 0xfffff); -} - -/* - * In this function we check to see if the instruction - * is IP relative instruction and update the kprobe - * inst flag accordingly - */ -static void __kprobes update_kprobe_inst_flag(uint template, uint slot, - uint major_opcode, - unsigned long kprobe_inst, - struct kprobe *p) -{ - p->ainsn.inst_flag = 0; - p->ainsn.target_br_reg = 0; - p->ainsn.slot = slot; - - /* Check for Break instruction - * Bits 37:40 Major opcode to be zero - * Bits 27:32 X6 to be zero - * Bits 32:35 X3 to be zero - */ - if ((!major_opcode) && (!((kprobe_inst >> 27) & 0x1FF)) ) { - /* is a break instruction */ - p->ainsn.inst_flag |= INST_FLAG_BREAK_INST; - return; - } - - if (bundle_encoding[template][slot] == B) { - switch (major_opcode) { - case INDIRECT_CALL_OPCODE: - p->ainsn.inst_flag |= INST_FLAG_FIX_BRANCH_REG; - p->ainsn.target_br_reg = ((kprobe_inst >> 6) & 0x7); - break; - case IP_RELATIVE_PREDICT_OPCODE: - case IP_RELATIVE_BRANCH_OPCODE: - p->ainsn.inst_flag |= INST_FLAG_FIX_RELATIVE_IP_ADDR; - break; - case IP_RELATIVE_CALL_OPCODE: - p->ainsn.inst_flag |= INST_FLAG_FIX_RELATIVE_IP_ADDR; - p->ainsn.inst_flag |= INST_FLAG_FIX_BRANCH_REG; - p->ainsn.target_br_reg = ((kprobe_inst >> 6) & 0x7); - break; - } - } else if (bundle_encoding[template][slot] == X) { - switch (major_opcode) { - case LONG_CALL_OPCODE: - p->ainsn.inst_flag |= INST_FLAG_FIX_BRANCH_REG; - p->ainsn.target_br_reg = ((kprobe_inst >> 6) & 0x7); - break; - } - } - return; -} - -/* - * In this function we check to see if the instruction - * (qp) cmpx.crel.ctype p1,p2=r2,r3 - * on which we are inserting kprobe is cmp instruction - * with ctype as unc. - */ -static uint __kprobes is_cmp_ctype_unc_inst(uint template, uint slot, - uint major_opcode, - unsigned long kprobe_inst) -{ - cmp_inst_t cmp_inst; - uint ctype_unc = 0; - - if (!((bundle_encoding[template][slot] == I) || - (bundle_encoding[template][slot] == M))) - goto out; - - if (!((major_opcode == 0xC) || (major_opcode == 0xD) || - (major_opcode == 0xE))) - goto out; - - cmp_inst.l = kprobe_inst; - if ((cmp_inst.f.x2 == 0) || (cmp_inst.f.x2 == 1)) { - /* Integer compare - Register Register (A6 type)*/ - if ((cmp_inst.f.tb == 0) && (cmp_inst.f.ta == 0) - &&(cmp_inst.f.c == 1)) - ctype_unc = 1; - } else if ((cmp_inst.f.x2 == 2)||(cmp_inst.f.x2 == 3)) { - /* Integer compare - Immediate Register (A8 type)*/ - if ((cmp_inst.f.ta == 0) &&(cmp_inst.f.c == 1)) - ctype_unc = 1; - } -out: - return ctype_unc; -} - -/* - * In this function we check to see if the instruction - * on which we are inserting kprobe is supported. - * Returns qp value if supported - * Returns -EINVAL if unsupported - */ -static int __kprobes unsupported_inst(uint template, uint slot, - uint major_opcode, - unsigned long kprobe_inst, - unsigned long addr) -{ - int qp; - - qp = kprobe_inst & 0x3f; - if (is_cmp_ctype_unc_inst(template, slot, major_opcode, kprobe_inst)) { - if (slot == 1 && qp) { - printk(KERN_WARNING "Kprobes on cmp unc " - "instruction on slot 1 at <0x%lx> " - "is not supported\n", addr); - return -EINVAL; - - } - qp = 0; - } - else if (bundle_encoding[template][slot] == I) { - if (major_opcode == 0) { - /* - * Check for Integer speculation instruction - * - Bit 33-35 to be equal to 0x1 - */ - if (((kprobe_inst >> 33) & 0x7) == 1) { - printk(KERN_WARNING - "Kprobes on speculation inst at <0x%lx> not supported\n", - addr); - return -EINVAL; - } - /* - * IP relative mov instruction - * - Bit 27-35 to be equal to 0x30 - */ - if (((kprobe_inst >> 27) & 0x1FF) == 0x30) { - printk(KERN_WARNING - "Kprobes on \"mov r1=ip\" at <0x%lx> not supported\n", - addr); - return -EINVAL; - - } - } - else if ((major_opcode == 5) && !(kprobe_inst & (0xFUl << 33)) && - (kprobe_inst & (0x1UL << 12))) { - /* test bit instructions, tbit,tnat,tf - * bit 33-36 to be equal to 0 - * bit 12 to be equal to 1 - */ - if (slot == 1 && qp) { - printk(KERN_WARNING "Kprobes on test bit " - "instruction on slot at <0x%lx> " - "is not supported\n", addr); - return -EINVAL; - } - qp = 0; - } - } - else if (bundle_encoding[template][slot] == B) { - if (major_opcode == 7) { - /* IP-Relative Predict major code is 7 */ - printk(KERN_WARNING "Kprobes on IP-Relative" - "Predict is not supported\n"); - return -EINVAL; - } - else if (major_opcode == 2) { - /* Indirect Predict, major code is 2 - * bit 27-32 to be equal to 10 or 11 - */ - int x6=(kprobe_inst >> 27) & 0x3F; - if ((x6 == 0x10) || (x6 == 0x11)) { - printk(KERN_WARNING "Kprobes on " - "Indirect Predict is not supported\n"); - return -EINVAL; - } - } - } - /* kernel does not use float instruction, here for safety kprobe - * will judge whether it is fcmp/flass/float approximation instruction - */ - else if (unlikely(bundle_encoding[template][slot] == F)) { - if ((major_opcode == 4 || major_opcode == 5) && - (kprobe_inst & (0x1 << 12))) { - /* fcmp/fclass unc instruction */ - if (slot == 1 && qp) { - printk(KERN_WARNING "Kprobes on fcmp/fclass " - "instruction on slot at <0x%lx> " - "is not supported\n", addr); - return -EINVAL; - - } - qp = 0; - } - if ((major_opcode == 0 || major_opcode == 1) && - (kprobe_inst & (0x1UL << 33))) { - /* float Approximation instruction */ - if (slot == 1 && qp) { - printk(KERN_WARNING "Kprobes on float Approx " - "instr at <0x%lx> is not supported\n", - addr); - return -EINVAL; - } - qp = 0; - } - } - return qp; -} - -/* - * In this function we override the bundle with - * the break instruction at the given slot. - */ -static void __kprobes prepare_break_inst(uint template, uint slot, - uint major_opcode, - unsigned long kprobe_inst, - struct kprobe *p, - int qp) -{ - unsigned long break_inst = BREAK_INST; - bundle_t *bundle = &p->opcode.bundle; - - /* - * Copy the original kprobe_inst qualifying predicate(qp) - * to the break instruction - */ - break_inst |= qp; - - switch (slot) { - case 0: - bundle->quad0.slot0 = break_inst; - break; - case 1: - bundle->quad0.slot1_p0 = break_inst; - bundle->quad1.slot1_p1 = break_inst >> (64-46); - break; - case 2: - bundle->quad1.slot2 = break_inst; - break; - } - - /* - * Update the instruction flag, so that we can - * emulate the instruction properly after we - * single step on original instruction - */ - update_kprobe_inst_flag(template, slot, major_opcode, kprobe_inst, p); -} - -static void __kprobes get_kprobe_inst(bundle_t *bundle, uint slot, - unsigned long *kprobe_inst, uint *major_opcode) -{ - unsigned long kprobe_inst_p0, kprobe_inst_p1; - unsigned int template; - - template = bundle->quad0.template; - - switch (slot) { - case 0: - *major_opcode = (bundle->quad0.slot0 >> SLOT0_OPCODE_SHIFT); - *kprobe_inst = bundle->quad0.slot0; - break; - case 1: - *major_opcode = (bundle->quad1.slot1_p1 >> SLOT1_p1_OPCODE_SHIFT); - kprobe_inst_p0 = bundle->quad0.slot1_p0; - kprobe_inst_p1 = bundle->quad1.slot1_p1; - *kprobe_inst = kprobe_inst_p0 | (kprobe_inst_p1 << (64-46)); - break; - case 2: - *major_opcode = (bundle->quad1.slot2 >> SLOT2_OPCODE_SHIFT); - *kprobe_inst = bundle->quad1.slot2; - break; - } -} - -/* Returns non-zero if the addr is in the Interrupt Vector Table */ -static int __kprobes in_ivt_functions(unsigned long addr) -{ - return (addr >= (unsigned long)__start_ivt_text - && addr < (unsigned long)__end_ivt_text); -} - -static int __kprobes valid_kprobe_addr(int template, int slot, - unsigned long addr) -{ - if ((slot > 2) || ((bundle_encoding[template][1] == L) && slot > 1)) { - printk(KERN_WARNING "Attempting to insert unaligned kprobe " - "at 0x%lx\n", addr); - return -EINVAL; - } - - if (in_ivt_functions(addr)) { - printk(KERN_WARNING "Kprobes can't be inserted inside " - "IVT functions at 0x%lx\n", addr); - return -EINVAL; - } - - return 0; -} - -static void __kprobes save_previous_kprobe(struct kprobe_ctlblk *kcb) -{ - unsigned int i; - i = atomic_add_return(1, &kcb->prev_kprobe_index); - kcb->prev_kprobe[i-1].kp = kprobe_running(); - kcb->prev_kprobe[i-1].status = kcb->kprobe_status; -} - -static void __kprobes restore_previous_kprobe(struct kprobe_ctlblk *kcb) -{ - unsigned int i; - i = atomic_read(&kcb->prev_kprobe_index); - __this_cpu_write(current_kprobe, kcb->prev_kprobe[i-1].kp); - kcb->kprobe_status = kcb->prev_kprobe[i-1].status; - atomic_sub(1, &kcb->prev_kprobe_index); -} - -static void __kprobes set_current_kprobe(struct kprobe *p, - struct kprobe_ctlblk *kcb) -{ - __this_cpu_write(current_kprobe, p); -} - -void __kretprobe_trampoline(void) -{ -} - -int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) -{ - regs->cr_iip = __kretprobe_trampoline_handler(regs, NULL); - /* - * By returning a non-zero value, we are telling - * kprobe_handler() that we don't want the post_handler - * to run (and have re-enabled preemption) - */ - return 1; -} - -void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, - struct pt_regs *regs) -{ - ri->ret_addr = (kprobe_opcode_t *)regs->b0; - ri->fp = NULL; - - /* Replace the return addr with trampoline addr */ - regs->b0 = (unsigned long)dereference_function_descriptor(__kretprobe_trampoline); -} - -/* Check the instruction in the slot is break */ -static int __kprobes __is_ia64_break_inst(bundle_t *bundle, uint slot) -{ - unsigned int major_opcode; - unsigned int template = bundle->quad0.template; - unsigned long kprobe_inst; - - /* Move to slot 2, if bundle is MLX type and kprobe slot is 1 */ - if (slot == 1 && bundle_encoding[template][1] == L) - slot++; - - /* Get Kprobe probe instruction at given slot*/ - get_kprobe_inst(bundle, slot, &kprobe_inst, &major_opcode); - - /* For break instruction, - * Bits 37:40 Major opcode to be zero - * Bits 27:32 X6 to be zero - * Bits 32:35 X3 to be zero - */ - if (major_opcode || ((kprobe_inst >> 27) & 0x1FF)) { - /* Not a break instruction */ - return 0; - } - - /* Is a break instruction */ - return 1; -} - -/* - * In this function, we check whether the target bundle modifies IP or - * it triggers an exception. If so, it cannot be boostable. - */ -static int __kprobes can_boost(bundle_t *bundle, uint slot, - unsigned long bundle_addr) -{ - unsigned int template = bundle->quad0.template; - - do { - if (search_exception_tables(bundle_addr + slot) || - __is_ia64_break_inst(bundle, slot)) - return 0; /* exception may occur in this bundle*/ - } while ((++slot) < 3); - template &= 0x1e; - if (template >= 0x10 /* including B unit */ || - template == 0x04 /* including X unit */ || - template == 0x06) /* undefined */ - return 0; - - return 1; -} - -/* Prepare long jump bundle and disables other boosters if need */ -static void __kprobes prepare_booster(struct kprobe *p) -{ - unsigned long addr = (unsigned long)p->addr & ~0xFULL; - unsigned int slot = (unsigned long)p->addr & 0xf; - struct kprobe *other_kp; - - if (can_boost(&p->ainsn.insn[0].bundle, slot, addr)) { - set_brl_inst(&p->ainsn.insn[1].bundle, (bundle_t *)addr + 1); - p->ainsn.inst_flag |= INST_FLAG_BOOSTABLE; - } - - /* disables boosters in previous slots */ - for (; addr < (unsigned long)p->addr; addr++) { - other_kp = get_kprobe((void *)addr); - if (other_kp) - other_kp->ainsn.inst_flag &= ~INST_FLAG_BOOSTABLE; - } -} - -int __kprobes arch_prepare_kprobe(struct kprobe *p) -{ - unsigned long addr = (unsigned long) p->addr; - unsigned long *kprobe_addr = (unsigned long *)(addr & ~0xFULL); - unsigned long kprobe_inst=0; - unsigned int slot = addr & 0xf, template, major_opcode = 0; - bundle_t *bundle; - int qp; - - bundle = &((kprobe_opcode_t *)kprobe_addr)->bundle; - template = bundle->quad0.template; - - if(valid_kprobe_addr(template, slot, addr)) - return -EINVAL; - - /* Move to slot 2, if bundle is MLX type and kprobe slot is 1 */ - if (slot == 1 && bundle_encoding[template][1] == L) - slot++; - - /* Get kprobe_inst and major_opcode from the bundle */ - get_kprobe_inst(bundle, slot, &kprobe_inst, &major_opcode); - - qp = unsupported_inst(template, slot, major_opcode, kprobe_inst, addr); - if (qp < 0) - return -EINVAL; - - p->ainsn.insn = get_insn_slot(); - if (!p->ainsn.insn) - return -ENOMEM; - memcpy(&p->opcode, kprobe_addr, sizeof(kprobe_opcode_t)); - memcpy(p->ainsn.insn, kprobe_addr, sizeof(kprobe_opcode_t)); - - prepare_break_inst(template, slot, major_opcode, kprobe_inst, p, qp); - - prepare_booster(p); - - return 0; -} - -void __kprobes arch_arm_kprobe(struct kprobe *p) -{ - unsigned long arm_addr; - bundle_t *src, *dest; - - arm_addr = ((unsigned long)p->addr) & ~0xFUL; - dest = &((kprobe_opcode_t *)arm_addr)->bundle; - src = &p->opcode.bundle; - - flush_icache_range((unsigned long)p->ainsn.insn, - (unsigned long)p->ainsn.insn + - sizeof(kprobe_opcode_t) * MAX_INSN_SIZE); - - switch (p->ainsn.slot) { - case 0: - dest->quad0.slot0 = src->quad0.slot0; - break; - case 1: - dest->quad1.slot1_p1 = src->quad1.slot1_p1; - break; - case 2: - dest->quad1.slot2 = src->quad1.slot2; - break; - } - flush_icache_range(arm_addr, arm_addr + sizeof(kprobe_opcode_t)); -} - -void __kprobes arch_disarm_kprobe(struct kprobe *p) -{ - unsigned long arm_addr; - bundle_t *src, *dest; - - arm_addr = ((unsigned long)p->addr) & ~0xFUL; - dest = &((kprobe_opcode_t *)arm_addr)->bundle; - /* p->ainsn.insn contains the original unaltered kprobe_opcode_t */ - src = &p->ainsn.insn->bundle; - switch (p->ainsn.slot) { - case 0: - dest->quad0.slot0 = src->quad0.slot0; - break; - case 1: - dest->quad1.slot1_p1 = src->quad1.slot1_p1; - break; - case 2: - dest->quad1.slot2 = src->quad1.slot2; - break; - } - flush_icache_range(arm_addr, arm_addr + sizeof(kprobe_opcode_t)); -} - -void __kprobes arch_remove_kprobe(struct kprobe *p) -{ - if (p->ainsn.insn) { - free_insn_slot(p->ainsn.insn, - p->ainsn.inst_flag & INST_FLAG_BOOSTABLE); - p->ainsn.insn = NULL; - } -} -/* - * We are resuming execution after a single step fault, so the pt_regs - * structure reflects the register state after we executed the instruction - * located in the kprobe (p->ainsn.insn->bundle). We still need to adjust - * the ip to point back to the original stack address. To set the IP address - * to original stack address, handle the case where we need to fixup the - * relative IP address and/or fixup branch register. - */ -static void __kprobes resume_execution(struct kprobe *p, struct pt_regs *regs) -{ - unsigned long bundle_addr = (unsigned long) (&p->ainsn.insn->bundle); - unsigned long resume_addr = (unsigned long)p->addr & ~0xFULL; - unsigned long template; - int slot = ((unsigned long)p->addr & 0xf); - - template = p->ainsn.insn->bundle.quad0.template; - - if (slot == 1 && bundle_encoding[template][1] == L) - slot = 2; - - if (p->ainsn.inst_flag & ~INST_FLAG_BOOSTABLE) { - - if (p->ainsn.inst_flag & INST_FLAG_FIX_RELATIVE_IP_ADDR) { - /* Fix relative IP address */ - regs->cr_iip = (regs->cr_iip - bundle_addr) + - resume_addr; - } - - if (p->ainsn.inst_flag & INST_FLAG_FIX_BRANCH_REG) { - /* - * Fix target branch register, software convention is - * to use either b0 or b6 or b7, so just checking - * only those registers - */ - switch (p->ainsn.target_br_reg) { - case 0: - if ((regs->b0 == bundle_addr) || - (regs->b0 == bundle_addr + 0x10)) { - regs->b0 = (regs->b0 - bundle_addr) + - resume_addr; - } - break; - case 6: - if ((regs->b6 == bundle_addr) || - (regs->b6 == bundle_addr + 0x10)) { - regs->b6 = (regs->b6 - bundle_addr) + - resume_addr; - } - break; - case 7: - if ((regs->b7 == bundle_addr) || - (regs->b7 == bundle_addr + 0x10)) { - regs->b7 = (regs->b7 - bundle_addr) + - resume_addr; - } - break; - } /* end switch */ - } - goto turn_ss_off; - } - - if (slot == 2) { - if (regs->cr_iip == bundle_addr + 0x10) { - regs->cr_iip = resume_addr + 0x10; - } - } else { - if (regs->cr_iip == bundle_addr) { - regs->cr_iip = resume_addr; - } - } - -turn_ss_off: - /* Turn off Single Step bit */ - ia64_psr(regs)->ss = 0; -} - -static void __kprobes prepare_ss(struct kprobe *p, struct pt_regs *regs) -{ - unsigned long bundle_addr = (unsigned long) &p->ainsn.insn->bundle; - unsigned long slot = (unsigned long)p->addr & 0xf; - - /* single step inline if break instruction */ - if (p->ainsn.inst_flag == INST_FLAG_BREAK_INST) - regs->cr_iip = (unsigned long)p->addr & ~0xFULL; - else - regs->cr_iip = bundle_addr & ~0xFULL; - - if (slot > 2) - slot = 0; - - ia64_psr(regs)->ri = slot; - - /* turn on single stepping */ - ia64_psr(regs)->ss = 1; -} - -static int __kprobes is_ia64_break_inst(struct pt_regs *regs) -{ - unsigned int slot = ia64_psr(regs)->ri; - unsigned long *kprobe_addr = (unsigned long *)regs->cr_iip; - bundle_t bundle; - - memcpy(&bundle, kprobe_addr, sizeof(bundle_t)); - - return __is_ia64_break_inst(&bundle, slot); -} - -static int __kprobes pre_kprobes_handler(struct die_args *args) -{ - struct kprobe *p; - int ret = 0; - struct pt_regs *regs = args->regs; - kprobe_opcode_t *addr = (kprobe_opcode_t *)instruction_pointer(regs); - struct kprobe_ctlblk *kcb; - - /* - * We don't want to be preempted for the entire - * duration of kprobe processing - */ - preempt_disable(); - kcb = get_kprobe_ctlblk(); - - /* Handle recursion cases */ - if (kprobe_running()) { - p = get_kprobe(addr); - if (p) { - if ((kcb->kprobe_status == KPROBE_HIT_SS) && - (p->ainsn.inst_flag == INST_FLAG_BREAK_INST)) { - ia64_psr(regs)->ss = 0; - goto no_kprobe; - } - /* We have reentered the pre_kprobe_handler(), since - * another probe was hit while within the handler. - * We here save the original kprobes variables and - * just single step on the instruction of the new probe - * without calling any user handlers. - */ - save_previous_kprobe(kcb); - set_current_kprobe(p, kcb); - kprobes_inc_nmissed_count(p); - prepare_ss(p, regs); - kcb->kprobe_status = KPROBE_REENTER; - return 1; - } else if (!is_ia64_break_inst(regs)) { - /* The breakpoint instruction was removed by - * another cpu right after we hit, no further - * handling of this interrupt is appropriate - */ - ret = 1; - goto no_kprobe; - } else { - /* Not our break */ - goto no_kprobe; - } - } - - p = get_kprobe(addr); - if (!p) { - if (!is_ia64_break_inst(regs)) { - /* - * The breakpoint instruction was removed right - * after we hit it. Another cpu has removed - * either a probepoint or a debugger breakpoint - * at this address. In either case, no further - * handling of this interrupt is appropriate. - */ - ret = 1; - - } - - /* Not one of our break, let kernel handle it */ - goto no_kprobe; - } - - set_current_kprobe(p, kcb); - kcb->kprobe_status = KPROBE_HIT_ACTIVE; - - if (p->pre_handler && p->pre_handler(p, regs)) { - reset_current_kprobe(); - preempt_enable_no_resched(); - return 1; - } - -#if !defined(CONFIG_PREEMPTION) - if (p->ainsn.inst_flag == INST_FLAG_BOOSTABLE && !p->post_handler) { - /* Boost up -- we can execute copied instructions directly */ - ia64_psr(regs)->ri = p->ainsn.slot; - regs->cr_iip = (unsigned long)&p->ainsn.insn->bundle & ~0xFULL; - /* turn single stepping off */ - ia64_psr(regs)->ss = 0; - - reset_current_kprobe(); - preempt_enable_no_resched(); - return 1; - } -#endif - prepare_ss(p, regs); - kcb->kprobe_status = KPROBE_HIT_SS; - return 1; - -no_kprobe: - preempt_enable_no_resched(); - return ret; -} - -static int __kprobes post_kprobes_handler(struct pt_regs *regs) -{ - struct kprobe *cur = kprobe_running(); - struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); - - if (!cur) - return 0; - - if ((kcb->kprobe_status != KPROBE_REENTER) && cur->post_handler) { - kcb->kprobe_status = KPROBE_HIT_SSDONE; - cur->post_handler(cur, regs, 0); - } - - resume_execution(cur, regs); - - /*Restore back the original saved kprobes variables and continue. */ - if (kcb->kprobe_status == KPROBE_REENTER) { - restore_previous_kprobe(kcb); - goto out; - } - reset_current_kprobe(); - -out: - preempt_enable_no_resched(); - return 1; -} - -int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr) -{ - struct kprobe *cur = kprobe_running(); - struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); - - - switch(kcb->kprobe_status) { - case KPROBE_HIT_SS: - case KPROBE_REENTER: - /* - * We are here because the instruction being single - * stepped caused a page fault. We reset the current - * kprobe and the instruction pointer points back to - * the probe address and allow the page fault handler - * to continue as a normal page fault. - */ - regs->cr_iip = ((unsigned long)cur->addr) & ~0xFULL; - ia64_psr(regs)->ri = ((unsigned long)cur->addr) & 0xf; - if (kcb->kprobe_status == KPROBE_REENTER) - restore_previous_kprobe(kcb); - else - reset_current_kprobe(); - preempt_enable_no_resched(); - break; - case KPROBE_HIT_ACTIVE: - case KPROBE_HIT_SSDONE: - /* - * In case the user-specified fault handler returned - * zero, try to fix up. - */ - if (ia64_done_with_exception(regs)) - return 1; - - /* - * Let ia64_do_page_fault() fix it. - */ - break; - default: - break; - } - - return 0; -} - -int __kprobes kprobe_exceptions_notify(struct notifier_block *self, - unsigned long val, void *data) -{ - struct die_args *args = (struct die_args *)data; - int ret = NOTIFY_DONE; - - if (args->regs && user_mode(args->regs)) - return ret; - - switch(val) { - case DIE_BREAK: - /* err is break number from ia64_bad_break() */ - if ((args->err >> 12) == (__IA64_BREAK_KPROBE >> 12) - || args->err == 0) - if (pre_kprobes_handler(args)) - ret = NOTIFY_STOP; - break; - case DIE_FAULT: - /* err is vector number from ia64_fault() */ - if (args->err == 36) - if (post_kprobes_handler(args->regs)) - ret = NOTIFY_STOP; - break; - default: - break; - } - return ret; -} - -static struct kprobe trampoline_p = { - .pre_handler = trampoline_probe_handler -}; - -int __init arch_init_kprobes(void) -{ - trampoline_p.addr = - dereference_function_descriptor(__kretprobe_trampoline); - return register_kprobe(&trampoline_p); -} - -int __kprobes arch_trampoline_kprobe(struct kprobe *p) -{ - if (p->addr == - dereference_function_descriptor(__kretprobe_trampoline)) - return 1; - - return 0; -} diff --git a/arch/ia64/kernel/machine_kexec.c b/arch/ia64/kernel/machine_kexec.c deleted file mode 100644 index 4db9ca144fa5..000000000000 --- a/arch/ia64/kernel/machine_kexec.c +++ /dev/null @@ -1,163 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * arch/ia64/kernel/machine_kexec.c - * - * Handle transition of Linux booting another kernel - * Copyright (C) 2005 Hewlett-Packard Development Comapny, L.P. - * Copyright (C) 2005 Khalid Aziz - * Copyright (C) 2006 Intel Corp, Zou Nan hai - */ - -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -typedef void (*relocate_new_kernel_t)( - unsigned long indirection_page, - unsigned long start_address, - struct ia64_boot_param *boot_param, - unsigned long pal_addr) __noreturn; - -struct kimage *ia64_kimage; - -struct resource efi_memmap_res = { - .name = "EFI Memory Map", - .start = 0, - .end = 0, - .flags = IORESOURCE_BUSY | IORESOURCE_MEM -}; - -struct resource boot_param_res = { - .name = "Boot parameter", - .start = 0, - .end = 0, - .flags = IORESOURCE_BUSY | IORESOURCE_MEM -}; - - -/* - * Do what every setup is needed on image and the - * reboot code buffer to allow us to avoid allocations - * later. - */ -int machine_kexec_prepare(struct kimage *image) -{ - void *control_code_buffer; - const unsigned long *func; - - func = (unsigned long *)&relocate_new_kernel; - /* Pre-load control code buffer to minimize work in kexec path */ - control_code_buffer = page_address(image->control_code_page); - memcpy((void *)control_code_buffer, (const void *)func[0], - relocate_new_kernel_size); - flush_icache_range((unsigned long)control_code_buffer, - (unsigned long)control_code_buffer + relocate_new_kernel_size); - ia64_kimage = image; - - return 0; -} - -void machine_kexec_cleanup(struct kimage *image) -{ -} - -/* - * Do not allocate memory (or fail in any way) in machine_kexec(). - * We are past the point of no return, committed to rebooting now. - */ -static void ia64_machine_kexec(struct unw_frame_info *info, void *arg) -{ - struct kimage *image = arg; - relocate_new_kernel_t rnk; - void *pal_addr = efi_get_pal_addr(); - unsigned long code_addr; - int ii; - u64 fp, gp; - ia64_fptr_t *init_handler = (ia64_fptr_t *)ia64_os_init_on_kdump; - - BUG_ON(!image); - code_addr = (unsigned long)page_address(image->control_code_page); - if (image->type == KEXEC_TYPE_CRASH) { - crash_save_this_cpu(); - current->thread.ksp = (__u64)info->sw - 16; - - /* Register noop init handler */ - fp = ia64_tpa(init_handler->fp); - gp = ia64_tpa(ia64_getreg(_IA64_REG_GP)); - ia64_sal_set_vectors(SAL_VECTOR_OS_INIT, fp, gp, 0, fp, gp, 0); - } else { - /* Unregister init handlers of current kernel */ - ia64_sal_set_vectors(SAL_VECTOR_OS_INIT, 0, 0, 0, 0, 0, 0); - } - - /* Unregister mca handler - No more recovery on current kernel */ - ia64_sal_set_vectors(SAL_VECTOR_OS_MCA, 0, 0, 0, 0, 0, 0); - - /* Interrupts aren't acceptable while we reboot */ - local_irq_disable(); - - /* Mask CMC and Performance Monitor interrupts */ - ia64_setreg(_IA64_REG_CR_PMV, 1 << 16); - ia64_setreg(_IA64_REG_CR_CMCV, 1 << 16); - - /* Mask ITV and Local Redirect Registers */ - ia64_set_itv(1 << 16); - ia64_set_lrr0(1 << 16); - ia64_set_lrr1(1 << 16); - - /* terminate possible nested in-service interrupts */ - for (ii = 0; ii < 16; ii++) - ia64_eoi(); - - /* unmask TPR and clear any pending interrupts */ - ia64_setreg(_IA64_REG_CR_TPR, 0); - ia64_srlz_d(); - while (ia64_get_ivr() != IA64_SPURIOUS_INT_VECTOR) - ia64_eoi(); - rnk = (relocate_new_kernel_t)&code_addr; - (*rnk)(image->head, image->start, ia64_boot_param, - GRANULEROUNDDOWN((unsigned long) pal_addr)); - BUG(); -} - -void machine_kexec(struct kimage *image) -{ - BUG_ON(!image); - unw_init_running(ia64_machine_kexec, image); - for(;;); -} - -void arch_crash_save_vmcoreinfo(void) -{ -#if defined(CONFIG_SPARSEMEM) - VMCOREINFO_SYMBOL(pgdat_list); - VMCOREINFO_LENGTH(pgdat_list, MAX_NUMNODES); -#endif -#ifdef CONFIG_NUMA - VMCOREINFO_SYMBOL(node_memblk); - VMCOREINFO_LENGTH(node_memblk, NR_NODE_MEMBLKS); - VMCOREINFO_STRUCT_SIZE(node_memblk_s); - VMCOREINFO_OFFSET(node_memblk_s, start_paddr); - VMCOREINFO_OFFSET(node_memblk_s, size); -#endif -#if CONFIG_PGTABLE_LEVELS == 3 - VMCOREINFO_CONFIG(PGTABLE_3); -#elif CONFIG_PGTABLE_LEVELS == 4 - VMCOREINFO_CONFIG(PGTABLE_4); -#endif -} - diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c deleted file mode 100644 index 2671688d349a..000000000000 --- a/arch/ia64/kernel/mca.c +++ /dev/null @@ -1,2111 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * File: mca.c - * Purpose: Generic MCA handling layer - * - * Copyright (C) 2003 Hewlett-Packard Co - * David Mosberger-Tang - * - * Copyright (C) 2002 Dell Inc. - * Copyright (C) Matt Domsch - * - * Copyright (C) 2002 Intel - * Copyright (C) Jenna Hall - * - * Copyright (C) 2001 Intel - * Copyright (C) Fred Lewis - * - * Copyright (C) 2000 Intel - * Copyright (C) Chuck Fleckenstein - * - * Copyright (C) 1999, 2004-2008 Silicon Graphics, Inc. - * Copyright (C) Vijay Chander - * - * Copyright (C) 2006 FUJITSU LIMITED - * Copyright (C) Hidetoshi Seto - * - * 2000-03-29 Chuck Fleckenstein - * Fixed PAL/SAL update issues, began MCA bug fixes, logging issues, - * added min save state dump, added INIT handler. - * - * 2001-01-03 Fred Lewis - * Added setup of CMCI and CPEI IRQs, logging of corrected platform - * errors, completed code for logging of corrected & uncorrected - * machine check errors, and updated for conformance with Nov. 2000 - * revision of the SAL 3.0 spec. - * - * 2002-01-04 Jenna Hall - * Aligned MCA stack to 16 bytes, added platform vs. CPU error flag, - * set SAL default return values, changed error record structure to - * linked list, added init call to sal_get_state_info_size(). - * - * 2002-03-25 Matt Domsch - * GUID cleanups. - * - * 2003-04-15 David Mosberger-Tang - * Added INIT backtrace support. - * - * 2003-12-08 Keith Owens - * smp_call_function() must not be called from interrupt context - * (can deadlock on tasklist_lock). - * Use keventd to call smp_call_function(). - * - * 2004-02-01 Keith Owens - * Avoid deadlock when using printk() for MCA and INIT records. - * Delete all record printing code, moved to salinfo_decode in user - * space. Mark variables and functions static where possible. - * Delete dead variables and functions. Reorder to remove the need - * for forward declarations and to consolidate related code. - * - * 2005-08-12 Keith Owens - * Convert MCA/INIT handlers to use per event stacks and SAL/OS - * state. - * - * 2005-10-07 Keith Owens - * Add notify_die() hooks. - * - * 2006-09-15 Hidetoshi Seto - * Add printing support for MCA/INIT. - * - * 2007-04-27 Russ Anderson - * Support multiple cpus going through OS_MCA in the same event. - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#include "mca_drv.h" -#include "entry.h" -#include "irq.h" - -#if defined(IA64_MCA_DEBUG_INFO) -# define IA64_MCA_DEBUG(fmt...) printk(fmt) -#else -# define IA64_MCA_DEBUG(fmt...) do {} while (0) -#endif - -#define NOTIFY_INIT(event, regs, arg, spin) \ -do { \ - if ((notify_die((event), "INIT", (regs), (arg), 0, 0) \ - == NOTIFY_STOP) && ((spin) == 1)) \ - ia64_mca_spin(__func__); \ -} while (0) - -#define NOTIFY_MCA(event, regs, arg, spin) \ -do { \ - if ((notify_die((event), "MCA", (regs), (arg), 0, 0) \ - == NOTIFY_STOP) && ((spin) == 1)) \ - ia64_mca_spin(__func__); \ -} while (0) - -/* Used by mca_asm.S */ -DEFINE_PER_CPU(u64, ia64_mca_data); /* == __per_cpu_mca[smp_processor_id()] */ -DEFINE_PER_CPU(u64, ia64_mca_per_cpu_pte); /* PTE to map per-CPU area */ -DEFINE_PER_CPU(u64, ia64_mca_pal_pte); /* PTE to map PAL code */ -DEFINE_PER_CPU(u64, ia64_mca_pal_base); /* vaddr PAL code granule */ -DEFINE_PER_CPU(u64, ia64_mca_tr_reload); /* Flag for TR reload */ - -unsigned long __per_cpu_mca[NR_CPUS]; - -/* In mca_asm.S */ -extern void ia64_os_init_dispatch_monarch (void); -extern void ia64_os_init_dispatch_slave (void); - -static int monarch_cpu = -1; - -static ia64_mc_info_t ia64_mc_info; - -#define MAX_CPE_POLL_INTERVAL (15*60*HZ) /* 15 minutes */ -#define MIN_CPE_POLL_INTERVAL (2*60*HZ) /* 2 minutes */ -#define CMC_POLL_INTERVAL (1*60*HZ) /* 1 minute */ -#define CPE_HISTORY_LENGTH 5 -#define CMC_HISTORY_LENGTH 5 - -static struct timer_list cpe_poll_timer; -static struct timer_list cmc_poll_timer; -/* - * This variable tells whether we are currently in polling mode. - * Start with this in the wrong state so we won't play w/ timers - * before the system is ready. - */ -static int cmc_polling_enabled = 1; - -/* - * Clearing this variable prevents CPE polling from getting activated - * in mca_late_init. Use it if your system doesn't provide a CPEI, - * but encounters problems retrieving CPE logs. This should only be - * necessary for debugging. - */ -static int cpe_poll_enabled = 1; - -extern void salinfo_log_wakeup(int type, u8 *buffer, u64 size, int irqsafe); - -static int mca_init __initdata; - -/* - * limited & delayed printing support for MCA/INIT handler - */ - -#define mprintk(fmt...) ia64_mca_printk(fmt) - -#define MLOGBUF_SIZE (512+256*NR_CPUS) -#define MLOGBUF_MSGMAX 256 -static char mlogbuf[MLOGBUF_SIZE]; -static DEFINE_SPINLOCK(mlogbuf_wlock); /* mca context only */ -static DEFINE_SPINLOCK(mlogbuf_rlock); /* normal context only */ -static unsigned long mlogbuf_start; -static unsigned long mlogbuf_end; -static unsigned int mlogbuf_finished = 0; -static unsigned long mlogbuf_timestamp = 0; - -static int loglevel_save = -1; -#define BREAK_LOGLEVEL(__console_loglevel) \ - oops_in_progress = 1; \ - if (loglevel_save < 0) \ - loglevel_save = __console_loglevel; \ - __console_loglevel = 15; - -#define RESTORE_LOGLEVEL(__console_loglevel) \ - if (loglevel_save >= 0) { \ - __console_loglevel = loglevel_save; \ - loglevel_save = -1; \ - } \ - mlogbuf_finished = 0; \ - oops_in_progress = 0; - -/* - * Push messages into buffer, print them later if not urgent. - */ -void ia64_mca_printk(const char *fmt, ...) -{ - va_list args; - int printed_len; - char temp_buf[MLOGBUF_MSGMAX]; - char *p; - - va_start(args, fmt); - printed_len = vscnprintf(temp_buf, sizeof(temp_buf), fmt, args); - va_end(args); - - /* Copy the output into mlogbuf */ - if (oops_in_progress) { - /* mlogbuf was abandoned, use printk directly instead. */ - printk("%s", temp_buf); - } else { - spin_lock(&mlogbuf_wlock); - for (p = temp_buf; *p; p++) { - unsigned long next = (mlogbuf_end + 1) % MLOGBUF_SIZE; - if (next != mlogbuf_start) { - mlogbuf[mlogbuf_end] = *p; - mlogbuf_end = next; - } else { - /* buffer full */ - break; - } - } - mlogbuf[mlogbuf_end] = '\0'; - spin_unlock(&mlogbuf_wlock); - } -} -EXPORT_SYMBOL(ia64_mca_printk); - -/* - * Print buffered messages. - * NOTE: call this after returning normal context. (ex. from salinfod) - */ -void ia64_mlogbuf_dump(void) -{ - char temp_buf[MLOGBUF_MSGMAX]; - char *p; - unsigned long index; - unsigned long flags; - unsigned int printed_len; - - /* Get output from mlogbuf */ - while (mlogbuf_start != mlogbuf_end) { - temp_buf[0] = '\0'; - p = temp_buf; - printed_len = 0; - - spin_lock_irqsave(&mlogbuf_rlock, flags); - - index = mlogbuf_start; - while (index != mlogbuf_end) { - *p = mlogbuf[index]; - index = (index + 1) % MLOGBUF_SIZE; - if (!*p) - break; - p++; - if (++printed_len >= MLOGBUF_MSGMAX - 1) - break; - } - *p = '\0'; - if (temp_buf[0]) - printk("%s", temp_buf); - mlogbuf_start = index; - - mlogbuf_timestamp = 0; - spin_unlock_irqrestore(&mlogbuf_rlock, flags); - } -} -EXPORT_SYMBOL(ia64_mlogbuf_dump); - -/* - * Call this if system is going to down or if immediate flushing messages to - * console is required. (ex. recovery was failed, crash dump is going to be - * invoked, long-wait rendezvous etc.) - * NOTE: this should be called from monarch. - */ -static void ia64_mlogbuf_finish(int wait) -{ - BREAK_LOGLEVEL(console_loglevel); - - ia64_mlogbuf_dump(); - printk(KERN_EMERG "mlogbuf_finish: printing switched to urgent mode, " - "MCA/INIT might be dodgy or fail.\n"); - - if (!wait) - return; - - /* wait for console */ - printk("Delaying for 5 seconds...\n"); - udelay(5*1000000); - - mlogbuf_finished = 1; -} - -/* - * Print buffered messages from INIT context. - */ -static void ia64_mlogbuf_dump_from_init(void) -{ - if (mlogbuf_finished) - return; - - if (mlogbuf_timestamp && - time_before(jiffies, mlogbuf_timestamp + 30 * HZ)) { - printk(KERN_ERR "INIT: mlogbuf_dump is interrupted by INIT " - " and the system seems to be messed up.\n"); - ia64_mlogbuf_finish(0); - return; - } - - if (!spin_trylock(&mlogbuf_rlock)) { - printk(KERN_ERR "INIT: mlogbuf_dump is interrupted by INIT. " - "Generated messages other than stack dump will be " - "buffered to mlogbuf and will be printed later.\n"); - printk(KERN_ERR "INIT: If messages would not printed after " - "this INIT, wait 30sec and assert INIT again.\n"); - if (!mlogbuf_timestamp) - mlogbuf_timestamp = jiffies; - return; - } - spin_unlock(&mlogbuf_rlock); - ia64_mlogbuf_dump(); -} - -static inline void -ia64_mca_spin(const char *func) -{ - if (monarch_cpu == smp_processor_id()) - ia64_mlogbuf_finish(0); - mprintk(KERN_EMERG "%s: spinning here, not returning to SAL\n", func); - while (1) - cpu_relax(); -} -/* - * IA64_MCA log support - */ -#define IA64_MAX_LOGS 2 /* Double-buffering for nested MCAs */ -#define IA64_MAX_LOG_TYPES 4 /* MCA, INIT, CMC, CPE */ - -typedef struct ia64_state_log_s -{ - spinlock_t isl_lock; - int isl_index; - unsigned long isl_count; - ia64_err_rec_t *isl_log[IA64_MAX_LOGS]; /* need space to store header + error log */ -} ia64_state_log_t; - -static ia64_state_log_t ia64_state_log[IA64_MAX_LOG_TYPES]; - -#define IA64_LOG_LOCK_INIT(it) spin_lock_init(&ia64_state_log[it].isl_lock) -#define IA64_LOG_LOCK(it) spin_lock_irqsave(&ia64_state_log[it].isl_lock, s) -#define IA64_LOG_UNLOCK(it) spin_unlock_irqrestore(&ia64_state_log[it].isl_lock,s) -#define IA64_LOG_NEXT_INDEX(it) ia64_state_log[it].isl_index -#define IA64_LOG_CURR_INDEX(it) 1 - ia64_state_log[it].isl_index -#define IA64_LOG_INDEX_INC(it) \ - {ia64_state_log[it].isl_index = 1 - ia64_state_log[it].isl_index; \ - ia64_state_log[it].isl_count++;} -#define IA64_LOG_INDEX_DEC(it) \ - ia64_state_log[it].isl_index = 1 - ia64_state_log[it].isl_index -#define IA64_LOG_NEXT_BUFFER(it) (void *)((ia64_state_log[it].isl_log[IA64_LOG_NEXT_INDEX(it)])) -#define IA64_LOG_CURR_BUFFER(it) (void *)((ia64_state_log[it].isl_log[IA64_LOG_CURR_INDEX(it)])) -#define IA64_LOG_COUNT(it) ia64_state_log[it].isl_count - -static inline void ia64_log_allocate(int it, u64 size) -{ - ia64_state_log[it].isl_log[IA64_LOG_CURR_INDEX(it)] = - (ia64_err_rec_t *)memblock_alloc(size, SMP_CACHE_BYTES); - if (!ia64_state_log[it].isl_log[IA64_LOG_CURR_INDEX(it)]) - panic("%s: Failed to allocate %llu bytes\n", __func__, size); - - ia64_state_log[it].isl_log[IA64_LOG_NEXT_INDEX(it)] = - (ia64_err_rec_t *)memblock_alloc(size, SMP_CACHE_BYTES); - if (!ia64_state_log[it].isl_log[IA64_LOG_NEXT_INDEX(it)]) - panic("%s: Failed to allocate %llu bytes\n", __func__, size); -} - -/* - * ia64_log_init - * Reset the OS ia64 log buffer - * Inputs : info_type (SAL_INFO_TYPE_{MCA,INIT,CMC,CPE}) - * Outputs : None - */ -static void __init -ia64_log_init(int sal_info_type) -{ - u64 max_size = 0; - - IA64_LOG_NEXT_INDEX(sal_info_type) = 0; - IA64_LOG_LOCK_INIT(sal_info_type); - - // SAL will tell us the maximum size of any error record of this type - max_size = ia64_sal_get_state_info_size(sal_info_type); - if (!max_size) - /* alloc_bootmem() doesn't like zero-sized allocations! */ - return; - - // set up OS data structures to hold error info - ia64_log_allocate(sal_info_type, max_size); -} - -/* - * ia64_log_get - * - * Get the current MCA log from SAL and copy it into the OS log buffer. - * - * Inputs : info_type (SAL_INFO_TYPE_{MCA,INIT,CMC,CPE}) - * irq_safe whether you can use printk at this point - * Outputs : size (total record length) - * *buffer (ptr to error record) - * - */ -static u64 -ia64_log_get(int sal_info_type, u8 **buffer, int irq_safe) -{ - sal_log_record_header_t *log_buffer; - u64 total_len = 0; - unsigned long s; - - IA64_LOG_LOCK(sal_info_type); - - /* Get the process state information */ - log_buffer = IA64_LOG_NEXT_BUFFER(sal_info_type); - - total_len = ia64_sal_get_state_info(sal_info_type, (u64 *)log_buffer); - - if (total_len) { - IA64_LOG_INDEX_INC(sal_info_type); - IA64_LOG_UNLOCK(sal_info_type); - if (irq_safe) { - IA64_MCA_DEBUG("%s: SAL error record type %d retrieved. Record length = %ld\n", - __func__, sal_info_type, total_len); - } - *buffer = (u8 *) log_buffer; - return total_len; - } else { - IA64_LOG_UNLOCK(sal_info_type); - return 0; - } -} - -/* - * ia64_mca_log_sal_error_record - * - * This function retrieves a specified error record type from SAL - * and wakes up any processes waiting for error records. - * - * Inputs : sal_info_type (Type of error record MCA/CMC/CPE) - * FIXME: remove MCA and irq_safe. - */ -static void -ia64_mca_log_sal_error_record(int sal_info_type) -{ - u8 *buffer; - sal_log_record_header_t *rh; - u64 size; - int irq_safe = sal_info_type != SAL_INFO_TYPE_MCA; -#ifdef IA64_MCA_DEBUG_INFO - static const char * const rec_name[] = { "MCA", "INIT", "CMC", "CPE" }; -#endif - - size = ia64_log_get(sal_info_type, &buffer, irq_safe); - if (!size) - return; - - salinfo_log_wakeup(sal_info_type, buffer, size, irq_safe); - - if (irq_safe) - IA64_MCA_DEBUG("CPU %d: SAL log contains %s error record\n", - smp_processor_id(), - sal_info_type < ARRAY_SIZE(rec_name) ? rec_name[sal_info_type] : "UNKNOWN"); - - /* Clear logs from corrected errors in case there's no user-level logger */ - rh = (sal_log_record_header_t *)buffer; - if (rh->severity == sal_log_severity_corrected) - ia64_sal_clear_state_info(sal_info_type); -} - -/* - * search_mca_table - * See if the MCA surfaced in an instruction range - * that has been tagged as recoverable. - * - * Inputs - * first First address range to check - * last Last address range to check - * ip Instruction pointer, address we are looking for - * - * Return value: - * 1 on Success (in the table)/ 0 on Failure (not in the table) - */ -int -search_mca_table (const struct mca_table_entry *first, - const struct mca_table_entry *last, - unsigned long ip) -{ - const struct mca_table_entry *curr; - u64 curr_start, curr_end; - - curr = first; - while (curr <= last) { - curr_start = (u64) &curr->start_addr + curr->start_addr; - curr_end = (u64) &curr->end_addr + curr->end_addr; - - if ((ip >= curr_start) && (ip <= curr_end)) { - return 1; - } - curr++; - } - return 0; -} - -/* Given an address, look for it in the mca tables. */ -int mca_recover_range(unsigned long addr) -{ - extern struct mca_table_entry __start___mca_table[]; - extern struct mca_table_entry __stop___mca_table[]; - - return search_mca_table(__start___mca_table, __stop___mca_table-1, addr); -} -EXPORT_SYMBOL_GPL(mca_recover_range); - -int cpe_vector = -1; -int ia64_cpe_irq = -1; - -static irqreturn_t -ia64_mca_cpe_int_handler (int cpe_irq, void *arg) -{ - static unsigned long cpe_history[CPE_HISTORY_LENGTH]; - static int index; - static DEFINE_SPINLOCK(cpe_history_lock); - - IA64_MCA_DEBUG("%s: received interrupt vector = %#x on CPU %d\n", - __func__, cpe_irq, smp_processor_id()); - - /* SAL spec states this should run w/ interrupts enabled */ - local_irq_enable(); - - spin_lock(&cpe_history_lock); - if (!cpe_poll_enabled && cpe_vector >= 0) { - - int i, count = 1; /* we know 1 happened now */ - unsigned long now = jiffies; - - for (i = 0; i < CPE_HISTORY_LENGTH; i++) { - if (now - cpe_history[i] <= HZ) - count++; - } - - IA64_MCA_DEBUG(KERN_INFO "CPE threshold %d/%d\n", count, CPE_HISTORY_LENGTH); - if (count >= CPE_HISTORY_LENGTH) { - - cpe_poll_enabled = 1; - spin_unlock(&cpe_history_lock); - disable_irq_nosync(local_vector_to_irq(IA64_CPE_VECTOR)); - - /* - * Corrected errors will still be corrected, but - * make sure there's a log somewhere that indicates - * something is generating more than we can handle. - */ - printk(KERN_WARNING "WARNING: Switching to polling CPE handler; error records may be lost\n"); - - mod_timer(&cpe_poll_timer, jiffies + MIN_CPE_POLL_INTERVAL); - - /* lock already released, get out now */ - goto out; - } else { - cpe_history[index++] = now; - if (index == CPE_HISTORY_LENGTH) - index = 0; - } - } - spin_unlock(&cpe_history_lock); -out: - /* Get the CPE error record and log it */ - ia64_mca_log_sal_error_record(SAL_INFO_TYPE_CPE); - - local_irq_disable(); - - return IRQ_HANDLED; -} - -/* - * ia64_mca_register_cpev - * - * Register the corrected platform error vector with SAL. - * - * Inputs - * cpev Corrected Platform Error Vector number - * - * Outputs - * None - */ -void -ia64_mca_register_cpev (int cpev) -{ - /* Register the CPE interrupt vector with SAL */ - struct ia64_sal_retval isrv; - - isrv = ia64_sal_mc_set_params(SAL_MC_PARAM_CPE_INT, SAL_MC_PARAM_MECHANISM_INT, cpev, 0, 0); - if (isrv.status) { - printk(KERN_ERR "Failed to register Corrected Platform " - "Error interrupt vector with SAL (status %ld)\n", isrv.status); - return; - } - - IA64_MCA_DEBUG("%s: corrected platform error " - "vector %#x registered\n", __func__, cpev); -} - -/* - * ia64_mca_cmc_vector_setup - * - * Setup the corrected machine check vector register in the processor. - * (The interrupt is masked on boot. ia64_mca_late_init unmask this.) - * This function is invoked on a per-processor basis. - * - * Inputs - * None - * - * Outputs - * None - */ -void -ia64_mca_cmc_vector_setup (void) -{ - cmcv_reg_t cmcv; - - cmcv.cmcv_regval = 0; - cmcv.cmcv_mask = 1; /* Mask/disable interrupt at first */ - cmcv.cmcv_vector = IA64_CMC_VECTOR; - ia64_setreg(_IA64_REG_CR_CMCV, cmcv.cmcv_regval); - - IA64_MCA_DEBUG("%s: CPU %d corrected machine check vector %#x registered.\n", - __func__, smp_processor_id(), IA64_CMC_VECTOR); - - IA64_MCA_DEBUG("%s: CPU %d CMCV = %#016lx\n", - __func__, smp_processor_id(), ia64_getreg(_IA64_REG_CR_CMCV)); -} - -/* - * ia64_mca_cmc_vector_disable - * - * Mask the corrected machine check vector register in the processor. - * This function is invoked on a per-processor basis. - * - * Inputs - * dummy(unused) - * - * Outputs - * None - */ -static void -ia64_mca_cmc_vector_disable (void *dummy) -{ - cmcv_reg_t cmcv; - - cmcv.cmcv_regval = ia64_getreg(_IA64_REG_CR_CMCV); - - cmcv.cmcv_mask = 1; /* Mask/disable interrupt */ - ia64_setreg(_IA64_REG_CR_CMCV, cmcv.cmcv_regval); - - IA64_MCA_DEBUG("%s: CPU %d corrected machine check vector %#x disabled.\n", - __func__, smp_processor_id(), cmcv.cmcv_vector); -} - -/* - * ia64_mca_cmc_vector_enable - * - * Unmask the corrected machine check vector register in the processor. - * This function is invoked on a per-processor basis. - * - * Inputs - * dummy(unused) - * - * Outputs - * None - */ -static void -ia64_mca_cmc_vector_enable (void *dummy) -{ - cmcv_reg_t cmcv; - - cmcv.cmcv_regval = ia64_getreg(_IA64_REG_CR_CMCV); - - cmcv.cmcv_mask = 0; /* Unmask/enable interrupt */ - ia64_setreg(_IA64_REG_CR_CMCV, cmcv.cmcv_regval); - - IA64_MCA_DEBUG("%s: CPU %d corrected machine check vector %#x enabled.\n", - __func__, smp_processor_id(), cmcv.cmcv_vector); -} - -/* - * ia64_mca_cmc_vector_disable_keventd - * - * Called via keventd (smp_call_function() is not safe in interrupt context) to - * disable the cmc interrupt vector. - */ -static void -ia64_mca_cmc_vector_disable_keventd(struct work_struct *unused) -{ - on_each_cpu(ia64_mca_cmc_vector_disable, NULL, 0); -} - -/* - * ia64_mca_cmc_vector_enable_keventd - * - * Called via keventd (smp_call_function() is not safe in interrupt context) to - * enable the cmc interrupt vector. - */ -static void -ia64_mca_cmc_vector_enable_keventd(struct work_struct *unused) -{ - on_each_cpu(ia64_mca_cmc_vector_enable, NULL, 0); -} - -/* - * ia64_mca_wakeup - * - * Send an inter-cpu interrupt to wake-up a particular cpu. - * - * Inputs : cpuid - * Outputs : None - */ -static void -ia64_mca_wakeup(int cpu) -{ - ia64_send_ipi(cpu, IA64_MCA_WAKEUP_VECTOR, IA64_IPI_DM_INT, 0); -} - -/* - * ia64_mca_wakeup_all - * - * Wakeup all the slave cpus which have rendez'ed previously. - * - * Inputs : None - * Outputs : None - */ -static void -ia64_mca_wakeup_all(void) -{ - int cpu; - - /* Clear the Rendez checkin flag for all cpus */ - for_each_online_cpu(cpu) { - if (ia64_mc_info.imi_rendez_checkin[cpu] == IA64_MCA_RENDEZ_CHECKIN_DONE) - ia64_mca_wakeup(cpu); - } - -} - -/* - * ia64_mca_rendez_interrupt_handler - * - * This is handler used to put slave processors into spinloop - * while the monarch processor does the mca handling and later - * wake each slave up once the monarch is done. The state - * IA64_MCA_RENDEZ_CHECKIN_DONE indicates the cpu is rendez'ed - * in SAL. The state IA64_MCA_RENDEZ_CHECKIN_NOTDONE indicates - * the cpu has come out of OS rendezvous. - * - * Inputs : None - * Outputs : None - */ -static irqreturn_t -ia64_mca_rendez_int_handler(int rendez_irq, void *arg) -{ - unsigned long flags; - int cpu = smp_processor_id(); - struct ia64_mca_notify_die nd = - { .sos = NULL, .monarch_cpu = &monarch_cpu }; - - /* Mask all interrupts */ - local_irq_save(flags); - - NOTIFY_MCA(DIE_MCA_RENDZVOUS_ENTER, get_irq_regs(), (long)&nd, 1); - - ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_DONE; - /* Register with the SAL monarch that the slave has - * reached SAL - */ - ia64_sal_mc_rendez(); - - NOTIFY_MCA(DIE_MCA_RENDZVOUS_PROCESS, get_irq_regs(), (long)&nd, 1); - - /* Wait for the monarch cpu to exit. */ - while (monarch_cpu != -1) - cpu_relax(); /* spin until monarch leaves */ - - NOTIFY_MCA(DIE_MCA_RENDZVOUS_LEAVE, get_irq_regs(), (long)&nd, 1); - - ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_NOTDONE; - /* Enable all interrupts */ - local_irq_restore(flags); - return IRQ_HANDLED; -} - -/* - * ia64_mca_wakeup_int_handler - * - * The interrupt handler for processing the inter-cpu interrupt to the - * slave cpu which was spinning in the rendez loop. - * Since this spinning is done by turning off the interrupts and - * polling on the wakeup-interrupt bit in the IRR, there is - * nothing useful to be done in the handler. - * - * Inputs : wakeup_irq (Wakeup-interrupt bit) - * arg (Interrupt handler specific argument) - * Outputs : None - * - */ -static irqreturn_t -ia64_mca_wakeup_int_handler(int wakeup_irq, void *arg) -{ - return IRQ_HANDLED; -} - -/* Function pointer for extra MCA recovery */ -int (*ia64_mca_ucmc_extension) - (void*,struct ia64_sal_os_state*) - = NULL; - -int -ia64_reg_MCA_extension(int (*fn)(void *, struct ia64_sal_os_state *)) -{ - if (ia64_mca_ucmc_extension) - return 1; - - ia64_mca_ucmc_extension = fn; - return 0; -} - -void -ia64_unreg_MCA_extension(void) -{ - if (ia64_mca_ucmc_extension) - ia64_mca_ucmc_extension = NULL; -} - -EXPORT_SYMBOL(ia64_reg_MCA_extension); -EXPORT_SYMBOL(ia64_unreg_MCA_extension); - - -static inline void -copy_reg(const u64 *fr, u64 fnat, unsigned long *tr, unsigned long *tnat) -{ - u64 fslot, tslot, nat; - *tr = *fr; - fslot = ((unsigned long)fr >> 3) & 63; - tslot = ((unsigned long)tr >> 3) & 63; - *tnat &= ~(1UL << tslot); - nat = (fnat >> fslot) & 1; - *tnat |= (nat << tslot); -} - -/* Change the comm field on the MCA/INT task to include the pid that - * was interrupted, it makes for easier debugging. If that pid was 0 - * (swapper or nested MCA/INIT) then use the start of the previous comm - * field suffixed with its cpu. - */ - -static void -ia64_mca_modify_comm(const struct task_struct *previous_current) -{ - char *p, comm[sizeof(current->comm)]; - if (previous_current->pid) - snprintf(comm, sizeof(comm), "%s %d", - current->comm, previous_current->pid); - else { - int l; - if ((p = strchr(previous_current->comm, ' '))) - l = p - previous_current->comm; - else - l = strlen(previous_current->comm); - snprintf(comm, sizeof(comm), "%s %*s %d", - current->comm, l, previous_current->comm, - task_thread_info(previous_current)->cpu); - } - memcpy(current->comm, comm, sizeof(current->comm)); -} - -static void -finish_pt_regs(struct pt_regs *regs, struct ia64_sal_os_state *sos, - unsigned long *nat) -{ - const struct pal_min_state_area *ms = sos->pal_min_state; - const u64 *bank; - - /* If ipsr.ic then use pmsa_{iip,ipsr,ifs}, else use - * pmsa_{xip,xpsr,xfs} - */ - if (ia64_psr(regs)->ic) { - regs->cr_iip = ms->pmsa_iip; - regs->cr_ipsr = ms->pmsa_ipsr; - regs->cr_ifs = ms->pmsa_ifs; - } else { - regs->cr_iip = ms->pmsa_xip; - regs->cr_ipsr = ms->pmsa_xpsr; - regs->cr_ifs = ms->pmsa_xfs; - - sos->iip = ms->pmsa_iip; - sos->ipsr = ms->pmsa_ipsr; - sos->ifs = ms->pmsa_ifs; - } - regs->pr = ms->pmsa_pr; - regs->b0 = ms->pmsa_br0; - regs->ar_rsc = ms->pmsa_rsc; - copy_reg(&ms->pmsa_gr[1-1], ms->pmsa_nat_bits, ®s->r1, nat); - copy_reg(&ms->pmsa_gr[2-1], ms->pmsa_nat_bits, ®s->r2, nat); - copy_reg(&ms->pmsa_gr[3-1], ms->pmsa_nat_bits, ®s->r3, nat); - copy_reg(&ms->pmsa_gr[8-1], ms->pmsa_nat_bits, ®s->r8, nat); - copy_reg(&ms->pmsa_gr[9-1], ms->pmsa_nat_bits, ®s->r9, nat); - copy_reg(&ms->pmsa_gr[10-1], ms->pmsa_nat_bits, ®s->r10, nat); - copy_reg(&ms->pmsa_gr[11-1], ms->pmsa_nat_bits, ®s->r11, nat); - copy_reg(&ms->pmsa_gr[12-1], ms->pmsa_nat_bits, ®s->r12, nat); - copy_reg(&ms->pmsa_gr[13-1], ms->pmsa_nat_bits, ®s->r13, nat); - copy_reg(&ms->pmsa_gr[14-1], ms->pmsa_nat_bits, ®s->r14, nat); - copy_reg(&ms->pmsa_gr[15-1], ms->pmsa_nat_bits, ®s->r15, nat); - if (ia64_psr(regs)->bn) - bank = ms->pmsa_bank1_gr; - else - bank = ms->pmsa_bank0_gr; - copy_reg(&bank[16-16], ms->pmsa_nat_bits, ®s->r16, nat); - copy_reg(&bank[17-16], ms->pmsa_nat_bits, ®s->r17, nat); - copy_reg(&bank[18-16], ms->pmsa_nat_bits, ®s->r18, nat); - copy_reg(&bank[19-16], ms->pmsa_nat_bits, ®s->r19, nat); - copy_reg(&bank[20-16], ms->pmsa_nat_bits, ®s->r20, nat); - copy_reg(&bank[21-16], ms->pmsa_nat_bits, ®s->r21, nat); - copy_reg(&bank[22-16], ms->pmsa_nat_bits, ®s->r22, nat); - copy_reg(&bank[23-16], ms->pmsa_nat_bits, ®s->r23, nat); - copy_reg(&bank[24-16], ms->pmsa_nat_bits, ®s->r24, nat); - copy_reg(&bank[25-16], ms->pmsa_nat_bits, ®s->r25, nat); - copy_reg(&bank[26-16], ms->pmsa_nat_bits, ®s->r26, nat); - copy_reg(&bank[27-16], ms->pmsa_nat_bits, ®s->r27, nat); - copy_reg(&bank[28-16], ms->pmsa_nat_bits, ®s->r28, nat); - copy_reg(&bank[29-16], ms->pmsa_nat_bits, ®s->r29, nat); - copy_reg(&bank[30-16], ms->pmsa_nat_bits, ®s->r30, nat); - copy_reg(&bank[31-16], ms->pmsa_nat_bits, ®s->r31, nat); -} - -/* On entry to this routine, we are running on the per cpu stack, see - * mca_asm.h. The original stack has not been touched by this event. Some of - * the original stack's registers will be in the RBS on this stack. This stack - * also contains a partial pt_regs and switch_stack, the rest of the data is in - * PAL minstate. - * - * The first thing to do is modify the original stack to look like a blocked - * task so we can run backtrace on the original task. Also mark the per cpu - * stack as current to ensure that we use the correct task state, it also means - * that we can do backtrace on the MCA/INIT handler code itself. - */ - -static struct task_struct * -ia64_mca_modify_original_stack(struct pt_regs *regs, - const struct switch_stack *sw, - struct ia64_sal_os_state *sos, - const char *type) -{ - char *p; - ia64_va va; - extern char ia64_leave_kernel[]; /* Need asm address, not function descriptor */ - const struct pal_min_state_area *ms = sos->pal_min_state; - struct task_struct *previous_current; - struct pt_regs *old_regs; - struct switch_stack *old_sw; - unsigned size = sizeof(struct pt_regs) + - sizeof(struct switch_stack) + 16; - unsigned long *old_bspstore, *old_bsp; - unsigned long *new_bspstore, *new_bsp; - unsigned long old_unat, old_rnat, new_rnat, nat; - u64 slots, loadrs = regs->loadrs; - u64 r12 = ms->pmsa_gr[12-1], r13 = ms->pmsa_gr[13-1]; - u64 ar_bspstore = regs->ar_bspstore; - u64 ar_bsp = regs->ar_bspstore + (loadrs >> 16); - const char *msg; - int cpu = smp_processor_id(); - - previous_current = curr_task(cpu); - ia64_set_curr_task(cpu, current); - if ((p = strchr(current->comm, ' '))) - *p = '\0'; - - /* Best effort attempt to cope with MCA/INIT delivered while in - * physical mode. - */ - regs->cr_ipsr = ms->pmsa_ipsr; - if (ia64_psr(regs)->dt == 0) { - va.l = r12; - if (va.f.reg == 0) { - va.f.reg = 7; - r12 = va.l; - } - va.l = r13; - if (va.f.reg == 0) { - va.f.reg = 7; - r13 = va.l; - } - } - if (ia64_psr(regs)->rt == 0) { - va.l = ar_bspstore; - if (va.f.reg == 0) { - va.f.reg = 7; - ar_bspstore = va.l; - } - va.l = ar_bsp; - if (va.f.reg == 0) { - va.f.reg = 7; - ar_bsp = va.l; - } - } - - /* mca_asm.S ia64_old_stack() cannot assume that the dirty registers - * have been copied to the old stack, the old stack may fail the - * validation tests below. So ia64_old_stack() must restore the dirty - * registers from the new stack. The old and new bspstore probably - * have different alignments, so loadrs calculated on the old bsp - * cannot be used to restore from the new bsp. Calculate a suitable - * loadrs for the new stack and save it in the new pt_regs, where - * ia64_old_stack() can get it. - */ - old_bspstore = (unsigned long *)ar_bspstore; - old_bsp = (unsigned long *)ar_bsp; - slots = ia64_rse_num_regs(old_bspstore, old_bsp); - new_bspstore = (unsigned long *)((u64)current + IA64_RBS_OFFSET); - new_bsp = ia64_rse_skip_regs(new_bspstore, slots); - regs->loadrs = (new_bsp - new_bspstore) * 8 << 16; - - /* Verify the previous stack state before we change it */ - if (user_mode(regs)) { - msg = "occurred in user space"; - /* previous_current is guaranteed to be valid when the task was - * in user space, so ... - */ - ia64_mca_modify_comm(previous_current); - goto no_mod; - } - - if (r13 != sos->prev_IA64_KR_CURRENT) { - msg = "inconsistent previous current and r13"; - goto no_mod; - } - - if (!mca_recover_range(ms->pmsa_iip)) { - if ((r12 - r13) >= KERNEL_STACK_SIZE) { - msg = "inconsistent r12 and r13"; - goto no_mod; - } - if ((ar_bspstore - r13) >= KERNEL_STACK_SIZE) { - msg = "inconsistent ar.bspstore and r13"; - goto no_mod; - } - va.p = old_bspstore; - if (va.f.reg < 5) { - msg = "old_bspstore is in the wrong region"; - goto no_mod; - } - if ((ar_bsp - r13) >= KERNEL_STACK_SIZE) { - msg = "inconsistent ar.bsp and r13"; - goto no_mod; - } - size += (ia64_rse_skip_regs(old_bspstore, slots) - old_bspstore) * 8; - if (ar_bspstore + size > r12) { - msg = "no room for blocked state"; - goto no_mod; - } - } - - ia64_mca_modify_comm(previous_current); - - /* Make the original task look blocked. First stack a struct pt_regs, - * describing the state at the time of interrupt. mca_asm.S built a - * partial pt_regs, copy it and fill in the blanks using minstate. - */ - p = (char *)r12 - sizeof(*regs); - old_regs = (struct pt_regs *)p; - memcpy(old_regs, regs, sizeof(*regs)); - old_regs->loadrs = loadrs; - old_unat = old_regs->ar_unat; - finish_pt_regs(old_regs, sos, &old_unat); - - /* Next stack a struct switch_stack. mca_asm.S built a partial - * switch_stack, copy it and fill in the blanks using pt_regs and - * minstate. - * - * In the synthesized switch_stack, b0 points to ia64_leave_kernel, - * ar.pfs is set to 0. - * - * unwind.c::unw_unwind() does special processing for interrupt frames. - * It checks if the PRED_NON_SYSCALL predicate is set, if the predicate - * is clear then unw_unwind() does _not_ adjust bsp over pt_regs. Not - * that this is documented, of course. Set PRED_NON_SYSCALL in the - * switch_stack on the original stack so it will unwind correctly when - * unwind.c reads pt_regs. - * - * thread.ksp is updated to point to the synthesized switch_stack. - */ - p -= sizeof(struct switch_stack); - old_sw = (struct switch_stack *)p; - memcpy(old_sw, sw, sizeof(*sw)); - old_sw->caller_unat = old_unat; - old_sw->ar_fpsr = old_regs->ar_fpsr; - copy_reg(&ms->pmsa_gr[4-1], ms->pmsa_nat_bits, &old_sw->r4, &old_unat); - copy_reg(&ms->pmsa_gr[5-1], ms->pmsa_nat_bits, &old_sw->r5, &old_unat); - copy_reg(&ms->pmsa_gr[6-1], ms->pmsa_nat_bits, &old_sw->r6, &old_unat); - copy_reg(&ms->pmsa_gr[7-1], ms->pmsa_nat_bits, &old_sw->r7, &old_unat); - old_sw->b0 = (u64)ia64_leave_kernel; - old_sw->b1 = ms->pmsa_br1; - old_sw->ar_pfs = 0; - old_sw->ar_unat = old_unat; - old_sw->pr = old_regs->pr | (1UL << PRED_NON_SYSCALL); - previous_current->thread.ksp = (u64)p - 16; - - /* Finally copy the original stack's registers back to its RBS. - * Registers from ar.bspstore through ar.bsp at the time of the event - * are in the current RBS, copy them back to the original stack. The - * copy must be done register by register because the original bspstore - * and the current one have different alignments, so the saved RNAT - * data occurs at different places. - * - * mca_asm does cover, so the old_bsp already includes all registers at - * the time of MCA/INIT. It also does flushrs, so all registers before - * this function have been written to backing store on the MCA/INIT - * stack. - */ - new_rnat = ia64_get_rnat(ia64_rse_rnat_addr(new_bspstore)); - old_rnat = regs->ar_rnat; - while (slots--) { - if (ia64_rse_is_rnat_slot(new_bspstore)) { - new_rnat = ia64_get_rnat(new_bspstore++); - } - if (ia64_rse_is_rnat_slot(old_bspstore)) { - *old_bspstore++ = old_rnat; - old_rnat = 0; - } - nat = (new_rnat >> ia64_rse_slot_num(new_bspstore)) & 1UL; - old_rnat &= ~(1UL << ia64_rse_slot_num(old_bspstore)); - old_rnat |= (nat << ia64_rse_slot_num(old_bspstore)); - *old_bspstore++ = *new_bspstore++; - } - old_sw->ar_bspstore = (unsigned long)old_bspstore; - old_sw->ar_rnat = old_rnat; - - sos->prev_task = previous_current; - return previous_current; - -no_mod: - mprintk(KERN_INFO "cpu %d, %s %s, original stack not modified\n", - smp_processor_id(), type, msg); - old_unat = regs->ar_unat; - finish_pt_regs(regs, sos, &old_unat); - return previous_current; -} - -/* The monarch/slave interaction is based on monarch_cpu and requires that all - * slaves have entered rendezvous before the monarch leaves. If any cpu has - * not entered rendezvous yet then wait a bit. The assumption is that any - * slave that has not rendezvoused after a reasonable time is never going to do - * so. In this context, slave includes cpus that respond to the MCA rendezvous - * interrupt, as well as cpus that receive the INIT slave event. - */ - -static void -ia64_wait_for_slaves(int monarch, const char *type) -{ - int c, i , wait; - - /* - * wait 5 seconds total for slaves (arbitrary) - */ - for (i = 0; i < 5000; i++) { - wait = 0; - for_each_online_cpu(c) { - if (c == monarch) - continue; - if (ia64_mc_info.imi_rendez_checkin[c] - == IA64_MCA_RENDEZ_CHECKIN_NOTDONE) { - udelay(1000); /* short wait */ - wait = 1; - break; - } - } - if (!wait) - goto all_in; - } - - /* - * Maybe slave(s) dead. Print buffered messages immediately. - */ - ia64_mlogbuf_finish(0); - mprintk(KERN_INFO "OS %s slave did not rendezvous on cpu", type); - for_each_online_cpu(c) { - if (c == monarch) - continue; - if (ia64_mc_info.imi_rendez_checkin[c] == IA64_MCA_RENDEZ_CHECKIN_NOTDONE) - mprintk(" %d", c); - } - mprintk("\n"); - return; - -all_in: - mprintk(KERN_INFO "All OS %s slaves have reached rendezvous\n", type); - return; -} - -/* mca_insert_tr - * - * Switch rid when TR reload and needed! - * iord: 1: itr, 2: itr; - * -*/ -static void mca_insert_tr(u64 iord) -{ - - int i; - u64 old_rr; - struct ia64_tr_entry *p; - unsigned long psr; - int cpu = smp_processor_id(); - - if (!ia64_idtrs[cpu]) - return; - - psr = ia64_clear_ic(); - for (i = IA64_TR_ALLOC_BASE; i < IA64_TR_ALLOC_MAX; i++) { - p = ia64_idtrs[cpu] + (iord - 1) * IA64_TR_ALLOC_MAX; - if (p->pte & 0x1) { - old_rr = ia64_get_rr(p->ifa); - if (old_rr != p->rr) { - ia64_set_rr(p->ifa, p->rr); - ia64_srlz_d(); - } - ia64_ptr(iord, p->ifa, p->itir >> 2); - ia64_srlz_i(); - if (iord & 0x1) { - ia64_itr(0x1, i, p->ifa, p->pte, p->itir >> 2); - ia64_srlz_i(); - } - if (iord & 0x2) { - ia64_itr(0x2, i, p->ifa, p->pte, p->itir >> 2); - ia64_srlz_i(); - } - if (old_rr != p->rr) { - ia64_set_rr(p->ifa, old_rr); - ia64_srlz_d(); - } - } - } - ia64_set_psr(psr); -} - -/* - * ia64_mca_handler - * - * This is uncorrectable machine check handler called from OS_MCA - * dispatch code which is in turn called from SAL_CHECK(). - * This is the place where the core of OS MCA handling is done. - * Right now the logs are extracted and displayed in a well-defined - * format. This handler code is supposed to be run only on the - * monarch processor. Once the monarch is done with MCA handling - * further MCA logging is enabled by clearing logs. - * Monarch also has the duty of sending wakeup-IPIs to pull the - * slave processors out of rendezvous spinloop. - * - * If multiple processors call into OS_MCA, the first will become - * the monarch. Subsequent cpus will be recorded in the mca_cpu - * bitmask. After the first monarch has processed its MCA, it - * will wake up the next cpu in the mca_cpu bitmask and then go - * into the rendezvous loop. When all processors have serviced - * their MCA, the last monarch frees up the rest of the processors. - */ -void -ia64_mca_handler(struct pt_regs *regs, struct switch_stack *sw, - struct ia64_sal_os_state *sos) -{ - int recover, cpu = smp_processor_id(); - struct task_struct *previous_current; - struct ia64_mca_notify_die nd = - { .sos = sos, .monarch_cpu = &monarch_cpu, .data = &recover }; - static atomic_t mca_count; - static cpumask_t mca_cpu; - - if (atomic_add_return(1, &mca_count) == 1) { - monarch_cpu = cpu; - sos->monarch = 1; - } else { - cpumask_set_cpu(cpu, &mca_cpu); - sos->monarch = 0; - } - mprintk(KERN_INFO "Entered OS MCA handler. PSP=%lx cpu=%d " - "monarch=%ld\n", sos->proc_state_param, cpu, sos->monarch); - - previous_current = ia64_mca_modify_original_stack(regs, sw, sos, "MCA"); - - NOTIFY_MCA(DIE_MCA_MONARCH_ENTER, regs, (long)&nd, 1); - - ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_CONCURRENT_MCA; - if (sos->monarch) { - ia64_wait_for_slaves(cpu, "MCA"); - - /* Wakeup all the processors which are spinning in the - * rendezvous loop. They will leave SAL, then spin in the OS - * with interrupts disabled until this monarch cpu leaves the - * MCA handler. That gets control back to the OS so we can - * backtrace the other cpus, backtrace when spinning in SAL - * does not work. - */ - ia64_mca_wakeup_all(); - } else { - while (cpumask_test_cpu(cpu, &mca_cpu)) - cpu_relax(); /* spin until monarch wakes us */ - } - - NOTIFY_MCA(DIE_MCA_MONARCH_PROCESS, regs, (long)&nd, 1); - - /* Get the MCA error record and log it */ - ia64_mca_log_sal_error_record(SAL_INFO_TYPE_MCA); - - /* MCA error recovery */ - recover = (ia64_mca_ucmc_extension - && ia64_mca_ucmc_extension( - IA64_LOG_CURR_BUFFER(SAL_INFO_TYPE_MCA), - sos)); - - if (recover) { - sal_log_record_header_t *rh = IA64_LOG_CURR_BUFFER(SAL_INFO_TYPE_MCA); - rh->severity = sal_log_severity_corrected; - ia64_sal_clear_state_info(SAL_INFO_TYPE_MCA); - sos->os_status = IA64_MCA_CORRECTED; - } else { - /* Dump buffered message to console */ - ia64_mlogbuf_finish(1); - } - - if (__this_cpu_read(ia64_mca_tr_reload)) { - mca_insert_tr(0x1); /*Reload dynamic itrs*/ - mca_insert_tr(0x2); /*Reload dynamic itrs*/ - } - - NOTIFY_MCA(DIE_MCA_MONARCH_LEAVE, regs, (long)&nd, 1); - - if (atomic_dec_return(&mca_count) > 0) { - int i; - - /* wake up the next monarch cpu, - * and put this cpu in the rendez loop. - */ - for_each_online_cpu(i) { - if (cpumask_test_cpu(i, &mca_cpu)) { - monarch_cpu = i; - cpumask_clear_cpu(i, &mca_cpu); /* wake next cpu */ - while (monarch_cpu != -1) - cpu_relax(); /* spin until last cpu leaves */ - ia64_set_curr_task(cpu, previous_current); - ia64_mc_info.imi_rendez_checkin[cpu] - = IA64_MCA_RENDEZ_CHECKIN_NOTDONE; - return; - } - } - } - ia64_set_curr_task(cpu, previous_current); - ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_NOTDONE; - monarch_cpu = -1; /* This frees the slaves and previous monarchs */ -} - -static DECLARE_WORK(cmc_disable_work, ia64_mca_cmc_vector_disable_keventd); -static DECLARE_WORK(cmc_enable_work, ia64_mca_cmc_vector_enable_keventd); - -/* - * ia64_mca_cmc_int_handler - * - * This is corrected machine check interrupt handler. - * Right now the logs are extracted and displayed in a well-defined - * format. - * - * Inputs - * interrupt number - * client data arg ptr - * - * Outputs - * None - */ -static irqreturn_t -ia64_mca_cmc_int_handler(int cmc_irq, void *arg) -{ - static unsigned long cmc_history[CMC_HISTORY_LENGTH]; - static int index; - static DEFINE_SPINLOCK(cmc_history_lock); - - IA64_MCA_DEBUG("%s: received interrupt vector = %#x on CPU %d\n", - __func__, cmc_irq, smp_processor_id()); - - /* SAL spec states this should run w/ interrupts enabled */ - local_irq_enable(); - - spin_lock(&cmc_history_lock); - if (!cmc_polling_enabled) { - int i, count = 1; /* we know 1 happened now */ - unsigned long now = jiffies; - - for (i = 0; i < CMC_HISTORY_LENGTH; i++) { - if (now - cmc_history[i] <= HZ) - count++; - } - - IA64_MCA_DEBUG(KERN_INFO "CMC threshold %d/%d\n", count, CMC_HISTORY_LENGTH); - if (count >= CMC_HISTORY_LENGTH) { - - cmc_polling_enabled = 1; - spin_unlock(&cmc_history_lock); - /* If we're being hit with CMC interrupts, we won't - * ever execute the schedule_work() below. Need to - * disable CMC interrupts on this processor now. - */ - ia64_mca_cmc_vector_disable(NULL); - schedule_work(&cmc_disable_work); - - /* - * Corrected errors will still be corrected, but - * make sure there's a log somewhere that indicates - * something is generating more than we can handle. - */ - printk(KERN_WARNING "WARNING: Switching to polling CMC handler; error records may be lost\n"); - - mod_timer(&cmc_poll_timer, jiffies + CMC_POLL_INTERVAL); - - /* lock already released, get out now */ - goto out; - } else { - cmc_history[index++] = now; - if (index == CMC_HISTORY_LENGTH) - index = 0; - } - } - spin_unlock(&cmc_history_lock); -out: - /* Get the CMC error record and log it */ - ia64_mca_log_sal_error_record(SAL_INFO_TYPE_CMC); - - local_irq_disable(); - - return IRQ_HANDLED; -} - -/* - * ia64_mca_cmc_int_caller - * - * Triggered by sw interrupt from CMC polling routine. Calls - * real interrupt handler and either triggers a sw interrupt - * on the next cpu or does cleanup at the end. - * - * Inputs - * interrupt number - * client data arg ptr - * Outputs - * handled - */ -static irqreturn_t -ia64_mca_cmc_int_caller(int cmc_irq, void *arg) -{ - static int start_count = -1; - unsigned int cpuid; - - cpuid = smp_processor_id(); - - /* If first cpu, update count */ - if (start_count == -1) - start_count = IA64_LOG_COUNT(SAL_INFO_TYPE_CMC); - - ia64_mca_cmc_int_handler(cmc_irq, arg); - - cpuid = cpumask_next(cpuid+1, cpu_online_mask); - - if (cpuid < nr_cpu_ids) { - ia64_send_ipi(cpuid, IA64_CMCP_VECTOR, IA64_IPI_DM_INT, 0); - } else { - /* If no log record, switch out of polling mode */ - if (start_count == IA64_LOG_COUNT(SAL_INFO_TYPE_CMC)) { - - printk(KERN_WARNING "Returning to interrupt driven CMC handler\n"); - schedule_work(&cmc_enable_work); - cmc_polling_enabled = 0; - - } else { - - mod_timer(&cmc_poll_timer, jiffies + CMC_POLL_INTERVAL); - } - - start_count = -1; - } - - return IRQ_HANDLED; -} - -/* - * ia64_mca_cmc_poll - * - * Poll for Corrected Machine Checks (CMCs) - * - * Inputs : dummy(unused) - * Outputs : None - * - */ -static void -ia64_mca_cmc_poll (struct timer_list *unused) -{ - /* Trigger a CMC interrupt cascade */ - ia64_send_ipi(cpumask_first(cpu_online_mask), IA64_CMCP_VECTOR, - IA64_IPI_DM_INT, 0); -} - -/* - * ia64_mca_cpe_int_caller - * - * Triggered by sw interrupt from CPE polling routine. Calls - * real interrupt handler and either triggers a sw interrupt - * on the next cpu or does cleanup at the end. - * - * Inputs - * interrupt number - * client data arg ptr - * Outputs - * handled - */ -static irqreturn_t -ia64_mca_cpe_int_caller(int cpe_irq, void *arg) -{ - static int start_count = -1; - static int poll_time = MIN_CPE_POLL_INTERVAL; - unsigned int cpuid; - - cpuid = smp_processor_id(); - - /* If first cpu, update count */ - if (start_count == -1) - start_count = IA64_LOG_COUNT(SAL_INFO_TYPE_CPE); - - ia64_mca_cpe_int_handler(cpe_irq, arg); - - cpuid = cpumask_next(cpuid+1, cpu_online_mask); - - if (cpuid < NR_CPUS) { - ia64_send_ipi(cpuid, IA64_CPEP_VECTOR, IA64_IPI_DM_INT, 0); - } else { - /* - * If a log was recorded, increase our polling frequency, - * otherwise, backoff or return to interrupt mode. - */ - if (start_count != IA64_LOG_COUNT(SAL_INFO_TYPE_CPE)) { - poll_time = max(MIN_CPE_POLL_INTERVAL, poll_time / 2); - } else if (cpe_vector < 0) { - poll_time = min(MAX_CPE_POLL_INTERVAL, poll_time * 2); - } else { - poll_time = MIN_CPE_POLL_INTERVAL; - - printk(KERN_WARNING "Returning to interrupt driven CPE handler\n"); - enable_irq(local_vector_to_irq(IA64_CPE_VECTOR)); - cpe_poll_enabled = 0; - } - - if (cpe_poll_enabled) - mod_timer(&cpe_poll_timer, jiffies + poll_time); - start_count = -1; - } - - return IRQ_HANDLED; -} - -/* - * ia64_mca_cpe_poll - * - * Poll for Corrected Platform Errors (CPEs), trigger interrupt - * on first cpu, from there it will trickle through all the cpus. - * - * Inputs : dummy(unused) - * Outputs : None - * - */ -static void -ia64_mca_cpe_poll (struct timer_list *unused) -{ - /* Trigger a CPE interrupt cascade */ - ia64_send_ipi(cpumask_first(cpu_online_mask), IA64_CPEP_VECTOR, - IA64_IPI_DM_INT, 0); -} - -static int -default_monarch_init_process(struct notifier_block *self, unsigned long val, void *data) -{ - int c; - struct task_struct *g, *t; - if (val != DIE_INIT_MONARCH_PROCESS) - return NOTIFY_DONE; -#ifdef CONFIG_KEXEC - if (atomic_read(&kdump_in_progress)) - return NOTIFY_DONE; -#endif - - /* - * FIXME: mlogbuf will brim over with INIT stack dumps. - * To enable show_stack from INIT, we use oops_in_progress which should - * be used in real oops. This would cause something wrong after INIT. - */ - BREAK_LOGLEVEL(console_loglevel); - ia64_mlogbuf_dump_from_init(); - - printk(KERN_ERR "Processes interrupted by INIT -"); - for_each_online_cpu(c) { - struct ia64_sal_os_state *s; - t = __va(__per_cpu_mca[c] + IA64_MCA_CPU_INIT_STACK_OFFSET); - s = (struct ia64_sal_os_state *)((char *)t + MCA_SOS_OFFSET); - g = s->prev_task; - if (g) { - if (g->pid) - printk(" %d", g->pid); - else - printk(" %d (cpu %d task 0x%p)", g->pid, task_cpu(g), g); - } - } - printk("\n\n"); - if (read_trylock(&tasklist_lock)) { - for_each_process_thread(g, t) { - printk("\nBacktrace of pid %d (%s)\n", t->pid, t->comm); - show_stack(t, NULL, KERN_DEFAULT); - } - read_unlock(&tasklist_lock); - } - /* FIXME: This will not restore zapped printk locks. */ - RESTORE_LOGLEVEL(console_loglevel); - return NOTIFY_DONE; -} - -/* - * C portion of the OS INIT handler - * - * Called from ia64_os_init_dispatch - * - * Inputs: pointer to pt_regs where processor info was saved. SAL/OS state for - * this event. This code is used for both monarch and slave INIT events, see - * sos->monarch. - * - * All INIT events switch to the INIT stack and change the previous process to - * blocked status. If one of the INIT events is the monarch then we are - * probably processing the nmi button/command. Use the monarch cpu to dump all - * the processes. The slave INIT events all spin until the monarch cpu - * returns. We can also get INIT slave events for MCA, in which case the MCA - * process is the monarch. - */ - -void -ia64_init_handler(struct pt_regs *regs, struct switch_stack *sw, - struct ia64_sal_os_state *sos) -{ - static atomic_t slaves; - static atomic_t monarchs; - struct task_struct *previous_current; - int cpu = smp_processor_id(); - struct ia64_mca_notify_die nd = - { .sos = sos, .monarch_cpu = &monarch_cpu }; - - NOTIFY_INIT(DIE_INIT_ENTER, regs, (long)&nd, 0); - - mprintk(KERN_INFO "Entered OS INIT handler. PSP=%lx cpu=%d monarch=%ld\n", - sos->proc_state_param, cpu, sos->monarch); - salinfo_log_wakeup(SAL_INFO_TYPE_INIT, NULL, 0, 0); - - previous_current = ia64_mca_modify_original_stack(regs, sw, sos, "INIT"); - sos->os_status = IA64_INIT_RESUME; - - /* FIXME: Workaround for broken proms that drive all INIT events as - * slaves. The last slave that enters is promoted to be a monarch. - * Remove this code in September 2006, that gives platforms a year to - * fix their proms and get their customers updated. - */ - if (!sos->monarch && atomic_add_return(1, &slaves) == num_online_cpus()) { - mprintk(KERN_WARNING "%s: Promoting cpu %d to monarch.\n", - __func__, cpu); - atomic_dec(&slaves); - sos->monarch = 1; - } - - /* FIXME: Workaround for broken proms that drive all INIT events as - * monarchs. Second and subsequent monarchs are demoted to slaves. - * Remove this code in September 2006, that gives platforms a year to - * fix their proms and get their customers updated. - */ - if (sos->monarch && atomic_add_return(1, &monarchs) > 1) { - mprintk(KERN_WARNING "%s: Demoting cpu %d to slave.\n", - __func__, cpu); - atomic_dec(&monarchs); - sos->monarch = 0; - } - - if (!sos->monarch) { - ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_INIT; - -#ifdef CONFIG_KEXEC - while (monarch_cpu == -1 && !atomic_read(&kdump_in_progress)) - udelay(1000); -#else - while (monarch_cpu == -1) - cpu_relax(); /* spin until monarch enters */ -#endif - - NOTIFY_INIT(DIE_INIT_SLAVE_ENTER, regs, (long)&nd, 1); - NOTIFY_INIT(DIE_INIT_SLAVE_PROCESS, regs, (long)&nd, 1); - -#ifdef CONFIG_KEXEC - while (monarch_cpu != -1 && !atomic_read(&kdump_in_progress)) - udelay(1000); -#else - while (monarch_cpu != -1) - cpu_relax(); /* spin until monarch leaves */ -#endif - - NOTIFY_INIT(DIE_INIT_SLAVE_LEAVE, regs, (long)&nd, 1); - - mprintk("Slave on cpu %d returning to normal service.\n", cpu); - ia64_set_curr_task(cpu, previous_current); - ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_NOTDONE; - atomic_dec(&slaves); - return; - } - - monarch_cpu = cpu; - NOTIFY_INIT(DIE_INIT_MONARCH_ENTER, regs, (long)&nd, 1); - - /* - * Wait for a bit. On some machines (e.g., HP's zx2000 and zx6000, INIT can be - * generated via the BMC's command-line interface, but since the console is on the - * same serial line, the user will need some time to switch out of the BMC before - * the dump begins. - */ - mprintk("Delaying for 5 seconds...\n"); - udelay(5*1000000); - ia64_wait_for_slaves(cpu, "INIT"); - /* If nobody intercepts DIE_INIT_MONARCH_PROCESS then we drop through - * to default_monarch_init_process() above and just print all the - * tasks. - */ - NOTIFY_INIT(DIE_INIT_MONARCH_PROCESS, regs, (long)&nd, 1); - NOTIFY_INIT(DIE_INIT_MONARCH_LEAVE, regs, (long)&nd, 1); - - mprintk("\nINIT dump complete. Monarch on cpu %d returning to normal service.\n", cpu); - atomic_dec(&monarchs); - ia64_set_curr_task(cpu, previous_current); - monarch_cpu = -1; - return; -} - -static int __init -ia64_mca_disable_cpe_polling(char *str) -{ - cpe_poll_enabled = 0; - return 1; -} - -__setup("disable_cpe_poll", ia64_mca_disable_cpe_polling); - -/* Minimal format of the MCA/INIT stacks. The pseudo processes that run on - * these stacks can never sleep, they cannot return from the kernel to user - * space, they do not appear in a normal ps listing. So there is no need to - * format most of the fields. - */ - -static void -format_mca_init_stack(void *mca_data, unsigned long offset, - const char *type, int cpu) -{ - struct task_struct *p = (struct task_struct *)((char *)mca_data + offset); - struct thread_info *ti; - memset(p, 0, KERNEL_STACK_SIZE); - ti = task_thread_info(p); - ti->flags = _TIF_MCA_INIT; - ti->preempt_count = 1; - ti->task = p; - ti->cpu = cpu; - p->stack = ti; - p->__state = TASK_UNINTERRUPTIBLE; - cpumask_set_cpu(cpu, &p->cpus_mask); - INIT_LIST_HEAD(&p->tasks); - p->parent = p->real_parent = p->group_leader = p; - INIT_LIST_HEAD(&p->children); - INIT_LIST_HEAD(&p->sibling); - strscpy(p->comm, type, sizeof(p->comm)-1); -} - -/* Caller prevents this from being called after init */ -static void * __ref mca_bootmem(void) -{ - return memblock_alloc(sizeof(struct ia64_mca_cpu), KERNEL_STACK_SIZE); -} - -/* Do per-CPU MCA-related initialization. */ -void -ia64_mca_cpu_init(void *cpu_data) -{ - void *pal_vaddr; - void *data; - long sz = sizeof(struct ia64_mca_cpu); - int cpu = smp_processor_id(); - static int first_time = 1; - - /* - * Structure will already be allocated if cpu has been online, - * then offlined. - */ - if (__per_cpu_mca[cpu]) { - data = __va(__per_cpu_mca[cpu]); - } else { - if (first_time) { - data = mca_bootmem(); - first_time = 0; - } else - data = (void *)__get_free_pages(GFP_ATOMIC, - get_order(sz)); - if (!data) - panic("Could not allocate MCA memory for cpu %d\n", - cpu); - } - format_mca_init_stack(data, offsetof(struct ia64_mca_cpu, mca_stack), - "MCA", cpu); - format_mca_init_stack(data, offsetof(struct ia64_mca_cpu, init_stack), - "INIT", cpu); - __this_cpu_write(ia64_mca_data, (__per_cpu_mca[cpu] = __pa(data))); - - /* - * Stash away a copy of the PTE needed to map the per-CPU page. - * We may need it during MCA recovery. - */ - __this_cpu_write(ia64_mca_per_cpu_pte, - pte_val(mk_pte_phys(__pa(cpu_data), PAGE_KERNEL))); - - /* - * Also, stash away a copy of the PAL address and the PTE - * needed to map it. - */ - pal_vaddr = efi_get_pal_addr(); - if (!pal_vaddr) - return; - __this_cpu_write(ia64_mca_pal_base, - GRANULEROUNDDOWN((unsigned long) pal_vaddr)); - __this_cpu_write(ia64_mca_pal_pte, pte_val(mk_pte_phys(__pa(pal_vaddr), - PAGE_KERNEL))); -} - -static int ia64_mca_cpu_online(unsigned int cpu) -{ - unsigned long flags; - - local_irq_save(flags); - if (!cmc_polling_enabled) - ia64_mca_cmc_vector_enable(NULL); - local_irq_restore(flags); - return 0; -} - -/* - * ia64_mca_init - * - * Do all the system level mca specific initialization. - * - * 1. Register spinloop and wakeup request interrupt vectors - * - * 2. Register OS_MCA handler entry point - * - * 3. Register OS_INIT handler entry point - * - * 4. Initialize MCA/CMC/INIT related log buffers maintained by the OS. - * - * Note that this initialization is done very early before some kernel - * services are available. - * - * Inputs : None - * - * Outputs : None - */ -void __init -ia64_mca_init(void) -{ - ia64_fptr_t *init_hldlr_ptr_monarch = (ia64_fptr_t *)ia64_os_init_dispatch_monarch; - ia64_fptr_t *init_hldlr_ptr_slave = (ia64_fptr_t *)ia64_os_init_dispatch_slave; - ia64_fptr_t *mca_hldlr_ptr = (ia64_fptr_t *)ia64_os_mca_dispatch; - int i; - long rc; - struct ia64_sal_retval isrv; - unsigned long timeout = IA64_MCA_RENDEZ_TIMEOUT; /* platform specific */ - static struct notifier_block default_init_monarch_nb = { - .notifier_call = default_monarch_init_process, - .priority = 0/* we need to notified last */ - }; - - IA64_MCA_DEBUG("%s: begin\n", __func__); - - /* Clear the Rendez checkin flag for all cpus */ - for(i = 0 ; i < NR_CPUS; i++) - ia64_mc_info.imi_rendez_checkin[i] = IA64_MCA_RENDEZ_CHECKIN_NOTDONE; - - /* - * Register the rendezvous spinloop and wakeup mechanism with SAL - */ - - /* Register the rendezvous interrupt vector with SAL */ - while (1) { - isrv = ia64_sal_mc_set_params(SAL_MC_PARAM_RENDEZ_INT, - SAL_MC_PARAM_MECHANISM_INT, - IA64_MCA_RENDEZ_VECTOR, - timeout, - SAL_MC_PARAM_RZ_ALWAYS); - rc = isrv.status; - if (rc == 0) - break; - if (rc == -2) { - printk(KERN_INFO "Increasing MCA rendezvous timeout from " - "%ld to %ld milliseconds\n", timeout, isrv.v0); - timeout = isrv.v0; - NOTIFY_MCA(DIE_MCA_NEW_TIMEOUT, NULL, timeout, 0); - continue; - } - printk(KERN_ERR "Failed to register rendezvous interrupt " - "with SAL (status %ld)\n", rc); - return; - } - - /* Register the wakeup interrupt vector with SAL */ - isrv = ia64_sal_mc_set_params(SAL_MC_PARAM_RENDEZ_WAKEUP, - SAL_MC_PARAM_MECHANISM_INT, - IA64_MCA_WAKEUP_VECTOR, - 0, 0); - rc = isrv.status; - if (rc) { - printk(KERN_ERR "Failed to register wakeup interrupt with SAL " - "(status %ld)\n", rc); - return; - } - - IA64_MCA_DEBUG("%s: registered MCA rendezvous spinloop and wakeup mech.\n", __func__); - - ia64_mc_info.imi_mca_handler = ia64_tpa(mca_hldlr_ptr->fp); - /* - * XXX - disable SAL checksum by setting size to 0; should be - * ia64_tpa(ia64_os_mca_dispatch_end) - ia64_tpa(ia64_os_mca_dispatch); - */ - ia64_mc_info.imi_mca_handler_size = 0; - - /* Register the os mca handler with SAL */ - if ((rc = ia64_sal_set_vectors(SAL_VECTOR_OS_MCA, - ia64_mc_info.imi_mca_handler, - ia64_tpa(mca_hldlr_ptr->gp), - ia64_mc_info.imi_mca_handler_size, - 0, 0, 0))) - { - printk(KERN_ERR "Failed to register OS MCA handler with SAL " - "(status %ld)\n", rc); - return; - } - - IA64_MCA_DEBUG("%s: registered OS MCA handler with SAL at 0x%lx, gp = 0x%lx\n", __func__, - ia64_mc_info.imi_mca_handler, ia64_tpa(mca_hldlr_ptr->gp)); - - /* - * XXX - disable SAL checksum by setting size to 0, should be - * size of the actual init handler in mca_asm.S. - */ - ia64_mc_info.imi_monarch_init_handler = ia64_tpa(init_hldlr_ptr_monarch->fp); - ia64_mc_info.imi_monarch_init_handler_size = 0; - ia64_mc_info.imi_slave_init_handler = ia64_tpa(init_hldlr_ptr_slave->fp); - ia64_mc_info.imi_slave_init_handler_size = 0; - - IA64_MCA_DEBUG("%s: OS INIT handler at %lx\n", __func__, - ia64_mc_info.imi_monarch_init_handler); - - /* Register the os init handler with SAL */ - if ((rc = ia64_sal_set_vectors(SAL_VECTOR_OS_INIT, - ia64_mc_info.imi_monarch_init_handler, - ia64_tpa(ia64_getreg(_IA64_REG_GP)), - ia64_mc_info.imi_monarch_init_handler_size, - ia64_mc_info.imi_slave_init_handler, - ia64_tpa(ia64_getreg(_IA64_REG_GP)), - ia64_mc_info.imi_slave_init_handler_size))) - { - printk(KERN_ERR "Failed to register m/s INIT handlers with SAL " - "(status %ld)\n", rc); - return; - } - if (register_die_notifier(&default_init_monarch_nb)) { - printk(KERN_ERR "Failed to register default monarch INIT process\n"); - return; - } - - IA64_MCA_DEBUG("%s: registered OS INIT handler with SAL\n", __func__); - - /* Initialize the areas set aside by the OS to buffer the - * platform/processor error states for MCA/INIT/CMC - * handling. - */ - ia64_log_init(SAL_INFO_TYPE_MCA); - ia64_log_init(SAL_INFO_TYPE_INIT); - ia64_log_init(SAL_INFO_TYPE_CMC); - ia64_log_init(SAL_INFO_TYPE_CPE); - - mca_init = 1; - printk(KERN_INFO "MCA related initialization done\n"); -} - - -/* - * These pieces cannot be done in ia64_mca_init() because it is called before - * early_irq_init() which would wipe out our percpu irq registrations. But we - * cannot leave them until ia64_mca_late_init() because by then all the other - * processors have been brought online and have set their own CMC vectors to - * point at a non-existant action. Called from arch_early_irq_init(). - */ -void __init ia64_mca_irq_init(void) -{ - /* - * Configure the CMCI/P vector and handler. Interrupts for CMC are - * per-processor, so AP CMC interrupts are setup in smp_callin() (smpboot.c). - */ - register_percpu_irq(IA64_CMC_VECTOR, ia64_mca_cmc_int_handler, 0, - "cmc_hndlr"); - register_percpu_irq(IA64_CMCP_VECTOR, ia64_mca_cmc_int_caller, 0, - "cmc_poll"); - ia64_mca_cmc_vector_setup(); /* Setup vector on BSP */ - - /* Setup the MCA rendezvous interrupt vector */ - register_percpu_irq(IA64_MCA_RENDEZ_VECTOR, ia64_mca_rendez_int_handler, - 0, "mca_rdzv"); - - /* Setup the MCA wakeup interrupt vector */ - register_percpu_irq(IA64_MCA_WAKEUP_VECTOR, ia64_mca_wakeup_int_handler, - 0, "mca_wkup"); - - /* Setup the CPEI/P handler */ - register_percpu_irq(IA64_CPEP_VECTOR, ia64_mca_cpe_int_caller, 0, - "cpe_poll"); -} - -/* - * ia64_mca_late_init - * - * Opportunity to setup things that require initialization later - * than ia64_mca_init. Setup a timer to poll for CPEs if the - * platform doesn't support an interrupt driven mechanism. - * - * Inputs : None - * Outputs : Status - */ -static int __init -ia64_mca_late_init(void) -{ - if (!mca_init) - return 0; - - /* Setup the CMCI/P vector and handler */ - timer_setup(&cmc_poll_timer, ia64_mca_cmc_poll, 0); - - /* Unmask/enable the vector */ - cmc_polling_enabled = 0; - cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "ia64/mca:online", - ia64_mca_cpu_online, NULL); - IA64_MCA_DEBUG("%s: CMCI/P setup and enabled.\n", __func__); - - /* Setup the CPEI/P vector and handler */ - cpe_vector = acpi_request_vector(ACPI_INTERRUPT_CPEI); - timer_setup(&cpe_poll_timer, ia64_mca_cpe_poll, 0); - - { - unsigned int irq; - - if (cpe_vector >= 0) { - /* If platform supports CPEI, enable the irq. */ - irq = local_vector_to_irq(cpe_vector); - if (irq > 0) { - cpe_poll_enabled = 0; - irq_set_status_flags(irq, IRQ_PER_CPU); - if (request_irq(irq, ia64_mca_cpe_int_handler, - 0, "cpe_hndlr", NULL)) - pr_err("Failed to register cpe_hndlr interrupt\n"); - ia64_cpe_irq = irq; - ia64_mca_register_cpev(cpe_vector); - IA64_MCA_DEBUG("%s: CPEI/P setup and enabled.\n", - __func__); - return 0; - } - printk(KERN_ERR "%s: Failed to find irq for CPE " - "interrupt handler, vector %d\n", - __func__, cpe_vector); - } - /* If platform doesn't support CPEI, get the timer going. */ - if (cpe_poll_enabled) { - ia64_mca_cpe_poll(0UL); - IA64_MCA_DEBUG("%s: CPEP setup and enabled.\n", __func__); - } - } - - return 0; -} - -device_initcall(ia64_mca_late_init); diff --git a/arch/ia64/kernel/mca_asm.S b/arch/ia64/kernel/mca_asm.S deleted file mode 100644 index 0d6b8cf9d1d0..000000000000 --- a/arch/ia64/kernel/mca_asm.S +++ /dev/null @@ -1,1123 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * File: mca_asm.S - * Purpose: assembly portion of the IA64 MCA handling - * - * Mods by cfleck to integrate into kernel build - * - * 2000-03-15 David Mosberger-Tang - * Added various stop bits to get a clean compile - * - * 2000-03-29 Chuck Fleckenstein - * Added code to save INIT handoff state in pt_regs format, - * switch to temp kstack, switch modes, jump to C INIT handler - * - * 2002-01-04 J.Hall - * Before entering virtual mode code: - * 1. Check for TLB CPU error - * 2. Restore current thread pointer to kr6 - * 3. Move stack ptr 16 bytes to conform to C calling convention - * - * 2004-11-12 Russ Anderson - * Added per cpu MCA/INIT stack save areas. - * - * 2005-12-08 Keith Owens - * Use per cpu MCA/INIT stacks for all data. - */ -#include -#include - -#include -#include -#include -#include - -#include "entry.h" - -#define GET_IA64_MCA_DATA(reg) \ - GET_THIS_PADDR(reg, ia64_mca_data) \ - ;; \ - ld8 reg=[reg] - - .global ia64_do_tlb_purge - .global ia64_os_mca_dispatch - .global ia64_os_init_on_kdump - .global ia64_os_init_dispatch_monarch - .global ia64_os_init_dispatch_slave - - .text - .align 16 - -//StartMain//////////////////////////////////////////////////////////////////// - -/* - * Just the TLB purge part is moved to a separate function - * so we can re-use the code for cpu hotplug code as well - * Caller should now setup b1, so we can branch once the - * tlb flush is complete. - */ - -ia64_do_tlb_purge: -#define O(member) IA64_CPUINFO_##member##_OFFSET - - GET_THIS_PADDR(r2, ia64_cpu_info) // load phys addr of cpu_info into r2 - ;; - addl r17=O(PTCE_STRIDE),r2 - addl r2=O(PTCE_BASE),r2 - ;; - ld8 r18=[r2],(O(PTCE_COUNT)-O(PTCE_BASE));; // r18=ptce_base - ld4 r19=[r2],4 // r19=ptce_count[0] - ld4 r21=[r17],4 // r21=ptce_stride[0] - ;; - ld4 r20=[r2] // r20=ptce_count[1] - ld4 r22=[r17] // r22=ptce_stride[1] - mov r24=0 - ;; - adds r20=-1,r20 - ;; -#undef O - -2: - cmp.ltu p6,p7=r24,r19 -(p7) br.cond.dpnt.few 4f - mov ar.lc=r20 -3: - ptc.e r18 - ;; - add r18=r22,r18 - br.cloop.sptk.few 3b - ;; - add r18=r21,r18 - add r24=1,r24 - ;; - br.sptk.few 2b -4: - srlz.i // srlz.i implies srlz.d - ;; - - // Now purge addresses formerly mapped by TR registers - // 1. Purge ITR&DTR for kernel. - movl r16=KERNEL_START - mov r18=KERNEL_TR_PAGE_SHIFT<<2 - ;; - ptr.i r16, r18 - ptr.d r16, r18 - ;; - srlz.i - ;; - srlz.d - ;; - // 3. Purge ITR for PAL code. - GET_THIS_PADDR(r2, ia64_mca_pal_base) - ;; - ld8 r16=[r2] - mov r18=IA64_GRANULE_SHIFT<<2 - ;; - ptr.i r16,r18 - ;; - srlz.i - ;; - // 4. Purge DTR for stack. - mov r16=IA64_KR(CURRENT_STACK) - ;; - shl r16=r16,IA64_GRANULE_SHIFT - movl r19=PAGE_OFFSET - ;; - add r16=r19,r16 - mov r18=IA64_GRANULE_SHIFT<<2 - ;; - ptr.d r16,r18 - ;; - srlz.i - ;; - // Now branch away to caller. - br.sptk.many b1 - ;; - -//EndMain////////////////////////////////////////////////////////////////////// - -//StartMain//////////////////////////////////////////////////////////////////// - -ia64_os_mca_dispatch: - mov r3=IA64_MCA_CPU_MCA_STACK_OFFSET // use the MCA stack - LOAD_PHYSICAL(p0,r2,1f) // return address - mov r19=1 // All MCA events are treated as monarch (for now) - br.sptk ia64_state_save // save the state that is not in minstate -1: - - GET_IA64_MCA_DATA(r2) - // Using MCA stack, struct ia64_sal_os_state, variable proc_state_param - ;; - add r3=IA64_MCA_CPU_MCA_STACK_OFFSET+MCA_SOS_OFFSET+SOS(PROC_STATE_PARAM), r2 - ;; - ld8 r18=[r3] // Get processor state parameter on existing PALE_CHECK. - ;; - tbit.nz p6,p7=r18,60 -(p7) br.spnt done_tlb_purge_and_reload - - // The following code purges TC and TR entries. Then reload all TC entries. - // Purge percpu data TC entries. -begin_tlb_purge_and_reload: - movl r18=ia64_reload_tr;; - LOAD_PHYSICAL(p0,r18,ia64_reload_tr);; - mov b1=r18;; - br.sptk.many ia64_do_tlb_purge;; - -ia64_reload_tr: - // Finally reload the TR registers. - // 1. Reload DTR/ITR registers for kernel. - mov r18=KERNEL_TR_PAGE_SHIFT<<2 - movl r17=KERNEL_START - ;; - mov cr.itir=r18 - mov cr.ifa=r17 - mov r16=IA64_TR_KERNEL - mov r19=ip - movl r18=PAGE_KERNEL - ;; - dep r17=0,r19,0, KERNEL_TR_PAGE_SHIFT - ;; - or r18=r17,r18 - ;; - itr.i itr[r16]=r18 - ;; - itr.d dtr[r16]=r18 - ;; - srlz.i - srlz.d - ;; - // 3. Reload ITR for PAL code. - GET_THIS_PADDR(r2, ia64_mca_pal_pte) - ;; - ld8 r18=[r2] // load PAL PTE - ;; - GET_THIS_PADDR(r2, ia64_mca_pal_base) - ;; - ld8 r16=[r2] // load PAL vaddr - mov r19=IA64_GRANULE_SHIFT<<2 - ;; - mov cr.itir=r19 - mov cr.ifa=r16 - mov r20=IA64_TR_PALCODE - ;; - itr.i itr[r20]=r18 - ;; - srlz.i - ;; - // 4. Reload DTR for stack. - mov r16=IA64_KR(CURRENT_STACK) - ;; - shl r16=r16,IA64_GRANULE_SHIFT - movl r19=PAGE_OFFSET - ;; - add r18=r19,r16 - movl r20=PAGE_KERNEL - ;; - add r16=r20,r16 - mov r19=IA64_GRANULE_SHIFT<<2 - ;; - mov cr.itir=r19 - mov cr.ifa=r18 - mov r20=IA64_TR_CURRENT_STACK - ;; - itr.d dtr[r20]=r16 - GET_THIS_PADDR(r2, ia64_mca_tr_reload) - mov r18 = 1 - ;; - srlz.d - ;; - st8 [r2] =r18 - ;; - -done_tlb_purge_and_reload: - - // switch to per cpu MCA stack - mov r3=IA64_MCA_CPU_MCA_STACK_OFFSET // use the MCA stack - LOAD_PHYSICAL(p0,r2,1f) // return address - br.sptk ia64_new_stack -1: - - // everything saved, now we can set the kernel registers - mov r3=IA64_MCA_CPU_MCA_STACK_OFFSET // use the MCA stack - LOAD_PHYSICAL(p0,r2,1f) // return address - br.sptk ia64_set_kernel_registers -1: - - // This must be done in physical mode - GET_IA64_MCA_DATA(r2) - ;; - mov r7=r2 - - // Enter virtual mode from physical mode - VIRTUAL_MODE_ENTER(r2, r3, ia64_os_mca_virtual_begin, r4) - - // This code returns to SAL via SOS r2, in general SAL has no unwind - // data. To get a clean termination when backtracing the C MCA/INIT - // handler, set a dummy return address of 0 in this routine. That - // requires that ia64_os_mca_virtual_begin be a global function. -ENTRY(ia64_os_mca_virtual_begin) - .prologue - .save rp,r0 - .body - - mov ar.rsc=3 // set eager mode for C handler - mov r2=r7 // see GET_IA64_MCA_DATA above - ;; - - // Call virtual mode handler - alloc r14=ar.pfs,0,0,3,0 - ;; - DATA_PA_TO_VA(r2,r7) - ;; - add out0=IA64_MCA_CPU_MCA_STACK_OFFSET+MCA_PT_REGS_OFFSET, r2 - add out1=IA64_MCA_CPU_MCA_STACK_OFFSET+MCA_SWITCH_STACK_OFFSET, r2 - add out2=IA64_MCA_CPU_MCA_STACK_OFFSET+MCA_SOS_OFFSET, r2 - br.call.sptk.many b0=ia64_mca_handler - - // Revert back to physical mode before going back to SAL - PHYSICAL_MODE_ENTER(r2, r3, ia64_os_mca_virtual_end, r4) -ia64_os_mca_virtual_end: - -END(ia64_os_mca_virtual_begin) - - // switch back to previous stack - alloc r14=ar.pfs,0,0,0,0 // remove the MCA handler frame - mov r3=IA64_MCA_CPU_MCA_STACK_OFFSET // use the MCA stack - LOAD_PHYSICAL(p0,r2,1f) // return address - br.sptk ia64_old_stack -1: - - mov r3=IA64_MCA_CPU_MCA_STACK_OFFSET // use the MCA stack - LOAD_PHYSICAL(p0,r2,1f) // return address - br.sptk ia64_state_restore // restore the SAL state -1: - - mov b0=r12 // SAL_CHECK return address - - br b0 - -//EndMain////////////////////////////////////////////////////////////////////// - -//StartMain//////////////////////////////////////////////////////////////////// - -// -// NOP init handler for kdump. In panic situation, we may receive INIT -// while kernel transition. Since we initialize registers on leave from -// current kernel, no longer monarch/slave handlers of current kernel in -// virtual mode are called safely. -// We can unregister these init handlers from SAL, however then the INIT -// will result in warmboot by SAL and we cannot retrieve the crashdump. -// Therefore register this NOP function to SAL, to prevent entering virtual -// mode and resulting warmboot by SAL. -// -ia64_os_init_on_kdump: - mov r8=r0 // IA64_INIT_RESUME - mov r9=r10 // SAL_GP - mov r22=r17 // *minstate - ;; - mov r10=r0 // return to same context - mov b0=r12 // SAL_CHECK return address - br b0 - -// -// SAL to OS entry point for INIT on all processors. This has been defined for -// registration purposes with SAL as a part of ia64_mca_init. Monarch and -// slave INIT have identical processing, except for the value of the -// sos->monarch flag in r19. -// - -ia64_os_init_dispatch_monarch: - mov r19=1 // Bow, bow, ye lower middle classes! - br.sptk ia64_os_init_dispatch - -ia64_os_init_dispatch_slave: - mov r19=0 // yeth, mathter - -ia64_os_init_dispatch: - - mov r3=IA64_MCA_CPU_INIT_STACK_OFFSET // use the INIT stack - LOAD_PHYSICAL(p0,r2,1f) // return address - br.sptk ia64_state_save // save the state that is not in minstate -1: - - // switch to per cpu INIT stack - mov r3=IA64_MCA_CPU_INIT_STACK_OFFSET // use the INIT stack - LOAD_PHYSICAL(p0,r2,1f) // return address - br.sptk ia64_new_stack -1: - - // everything saved, now we can set the kernel registers - mov r3=IA64_MCA_CPU_INIT_STACK_OFFSET // use the INIT stack - LOAD_PHYSICAL(p0,r2,1f) // return address - br.sptk ia64_set_kernel_registers -1: - - // This must be done in physical mode - GET_IA64_MCA_DATA(r2) - ;; - mov r7=r2 - - // Enter virtual mode from physical mode - VIRTUAL_MODE_ENTER(r2, r3, ia64_os_init_virtual_begin, r4) - - // This code returns to SAL via SOS r2, in general SAL has no unwind - // data. To get a clean termination when backtracing the C MCA/INIT - // handler, set a dummy return address of 0 in this routine. That - // requires that ia64_os_init_virtual_begin be a global function. -ENTRY(ia64_os_init_virtual_begin) - .prologue - .save rp,r0 - .body - - mov ar.rsc=3 // set eager mode for C handler - mov r2=r7 // see GET_IA64_MCA_DATA above - ;; - - // Call virtual mode handler - alloc r14=ar.pfs,0,0,3,0 - ;; - DATA_PA_TO_VA(r2,r7) - ;; - add out0=IA64_MCA_CPU_INIT_STACK_OFFSET+MCA_PT_REGS_OFFSET, r2 - add out1=IA64_MCA_CPU_INIT_STACK_OFFSET+MCA_SWITCH_STACK_OFFSET, r2 - add out2=IA64_MCA_CPU_INIT_STACK_OFFSET+MCA_SOS_OFFSET, r2 - br.call.sptk.many b0=ia64_init_handler - - // Revert back to physical mode before going back to SAL - PHYSICAL_MODE_ENTER(r2, r3, ia64_os_init_virtual_end, r4) -ia64_os_init_virtual_end: - -END(ia64_os_init_virtual_begin) - - mov r3=IA64_MCA_CPU_INIT_STACK_OFFSET // use the INIT stack - LOAD_PHYSICAL(p0,r2,1f) // return address - br.sptk ia64_state_restore // restore the SAL state -1: - - // switch back to previous stack - alloc r14=ar.pfs,0,0,0,0 // remove the INIT handler frame - mov r3=IA64_MCA_CPU_INIT_STACK_OFFSET // use the INIT stack - LOAD_PHYSICAL(p0,r2,1f) // return address - br.sptk ia64_old_stack -1: - - mov b0=r12 // SAL_CHECK return address - br b0 - -//EndMain////////////////////////////////////////////////////////////////////// - -// common defines for the stubs -#define ms r4 -#define regs r5 -#define temp1 r2 /* careful, it overlaps with input registers */ -#define temp2 r3 /* careful, it overlaps with input registers */ -#define temp3 r7 -#define temp4 r14 - - -//++ -// Name: -// ia64_state_save() -// -// Stub Description: -// -// Save the state that is not in minstate. This is sensitive to the layout of -// struct ia64_sal_os_state in mca.h. -// -// r2 contains the return address, r3 contains either -// IA64_MCA_CPU_MCA_STACK_OFFSET or IA64_MCA_CPU_INIT_STACK_OFFSET. -// -// The OS to SAL section of struct ia64_sal_os_state is set to a default -// value of cold boot (MCA) or warm boot (INIT) and return to the same -// context. ia64_sal_os_state is also used to hold some registers that -// need to be saved and restored across the stack switches. -// -// Most input registers to this stub come from PAL/SAL -// r1 os gp, physical -// r8 pal_proc entry point -// r9 sal_proc entry point -// r10 sal gp -// r11 MCA - rendevzous state, INIT - reason code -// r12 sal return address -// r17 pal min_state -// r18 processor state parameter -// r19 monarch flag, set by the caller of this routine -// -// In addition to the SAL to OS state, this routine saves all the -// registers that appear in struct pt_regs and struct switch_stack, -// excluding those that are already in the PAL minstate area. This -// results in a partial pt_regs and switch_stack, the C code copies the -// remaining registers from PAL minstate to pt_regs and switch_stack. The -// resulting structures contain all the state of the original process when -// MCA/INIT occurred. -// -//-- - -ia64_state_save: - add regs=MCA_SOS_OFFSET, r3 - add ms=MCA_SOS_OFFSET+8, r3 - mov b0=r2 // save return address - cmp.eq p1,p2=IA64_MCA_CPU_MCA_STACK_OFFSET, r3 - ;; - GET_IA64_MCA_DATA(temp2) - ;; - add temp1=temp2, regs // struct ia64_sal_os_state on MCA or INIT stack - add temp2=temp2, ms // struct ia64_sal_os_state+8 on MCA or INIT stack - ;; - mov regs=temp1 // save the start of sos - st8 [temp1]=r1,16 // os_gp - st8 [temp2]=r8,16 // pal_proc - ;; - st8 [temp1]=r9,16 // sal_proc - st8 [temp2]=r11,16 // rv_rc - mov r11=cr.iipa - ;; - st8 [temp1]=r18 // proc_state_param - st8 [temp2]=r19 // monarch - mov r6=IA64_KR(CURRENT) - add temp1=SOS(SAL_RA), regs - add temp2=SOS(SAL_GP), regs - ;; - st8 [temp1]=r12,16 // sal_ra - st8 [temp2]=r10,16 // sal_gp - mov r12=cr.isr - ;; - st8 [temp1]=r17,16 // pal_min_state - st8 [temp2]=r6,16 // prev_IA64_KR_CURRENT - mov r6=IA64_KR(CURRENT_STACK) - ;; - st8 [temp1]=r6,16 // prev_IA64_KR_CURRENT_STACK - st8 [temp2]=r0,16 // prev_task, starts off as NULL - mov r6=cr.ifa - ;; - st8 [temp1]=r12,16 // cr.isr - st8 [temp2]=r6,16 // cr.ifa - mov r12=cr.itir - ;; - st8 [temp1]=r12,16 // cr.itir - st8 [temp2]=r11,16 // cr.iipa - mov r12=cr.iim - ;; - st8 [temp1]=r12 // cr.iim -(p1) mov r12=IA64_MCA_COLD_BOOT -(p2) mov r12=IA64_INIT_WARM_BOOT - mov r6=cr.iha - add temp1=SOS(OS_STATUS), regs - ;; - st8 [temp2]=r6 // cr.iha - add temp2=SOS(CONTEXT), regs - st8 [temp1]=r12 // os_status, default is cold boot - mov r6=IA64_MCA_SAME_CONTEXT - ;; - st8 [temp2]=r6 // context, default is same context - - // Save the pt_regs data that is not in minstate. The previous code - // left regs at sos. - add regs=MCA_PT_REGS_OFFSET-MCA_SOS_OFFSET, regs - ;; - add temp1=PT(B6), regs - mov temp3=b6 - mov temp4=b7 - add temp2=PT(B7), regs - ;; - st8 [temp1]=temp3,PT(AR_CSD)-PT(B6) // save b6 - st8 [temp2]=temp4,PT(AR_SSD)-PT(B7) // save b7 - mov temp3=ar.csd - mov temp4=ar.ssd - cover // must be last in group - ;; - st8 [temp1]=temp3,PT(AR_UNAT)-PT(AR_CSD) // save ar.csd - st8 [temp2]=temp4,PT(AR_PFS)-PT(AR_SSD) // save ar.ssd - mov temp3=ar.unat - mov temp4=ar.pfs - ;; - st8 [temp1]=temp3,PT(AR_RNAT)-PT(AR_UNAT) // save ar.unat - st8 [temp2]=temp4,PT(AR_BSPSTORE)-PT(AR_PFS) // save ar.pfs - mov temp3=ar.rnat - mov temp4=ar.bspstore - ;; - st8 [temp1]=temp3,PT(LOADRS)-PT(AR_RNAT) // save ar.rnat - st8 [temp2]=temp4,PT(AR_FPSR)-PT(AR_BSPSTORE) // save ar.bspstore - mov temp3=ar.bsp - ;; - sub temp3=temp3, temp4 // ar.bsp - ar.bspstore - mov temp4=ar.fpsr - ;; - shl temp3=temp3,16 // compute ar.rsc to be used for "loadrs" - ;; - st8 [temp1]=temp3,PT(AR_CCV)-PT(LOADRS) // save loadrs - st8 [temp2]=temp4,PT(F6)-PT(AR_FPSR) // save ar.fpsr - mov temp3=ar.ccv - ;; - st8 [temp1]=temp3,PT(F7)-PT(AR_CCV) // save ar.ccv - stf.spill [temp2]=f6,PT(F8)-PT(F6) - ;; - stf.spill [temp1]=f7,PT(F9)-PT(F7) - stf.spill [temp2]=f8,PT(F10)-PT(F8) - ;; - stf.spill [temp1]=f9,PT(F11)-PT(F9) - stf.spill [temp2]=f10 - ;; - stf.spill [temp1]=f11 - - // Save the switch_stack data that is not in minstate nor pt_regs. The - // previous code left regs at pt_regs. - add regs=MCA_SWITCH_STACK_OFFSET-MCA_PT_REGS_OFFSET, regs - ;; - add temp1=SW(F2), regs - add temp2=SW(F3), regs - ;; - stf.spill [temp1]=f2,32 - stf.spill [temp2]=f3,32 - ;; - stf.spill [temp1]=f4,32 - stf.spill [temp2]=f5,32 - ;; - stf.spill [temp1]=f12,32 - stf.spill [temp2]=f13,32 - ;; - stf.spill [temp1]=f14,32 - stf.spill [temp2]=f15,32 - ;; - stf.spill [temp1]=f16,32 - stf.spill [temp2]=f17,32 - ;; - stf.spill [temp1]=f18,32 - stf.spill [temp2]=f19,32 - ;; - stf.spill [temp1]=f20,32 - stf.spill [temp2]=f21,32 - ;; - stf.spill [temp1]=f22,32 - stf.spill [temp2]=f23,32 - ;; - stf.spill [temp1]=f24,32 - stf.spill [temp2]=f25,32 - ;; - stf.spill [temp1]=f26,32 - stf.spill [temp2]=f27,32 - ;; - stf.spill [temp1]=f28,32 - stf.spill [temp2]=f29,32 - ;; - stf.spill [temp1]=f30,SW(B2)-SW(F30) - stf.spill [temp2]=f31,SW(B3)-SW(F31) - mov temp3=b2 - mov temp4=b3 - ;; - st8 [temp1]=temp3,16 // save b2 - st8 [temp2]=temp4,16 // save b3 - mov temp3=b4 - mov temp4=b5 - ;; - st8 [temp1]=temp3,SW(AR_LC)-SW(B4) // save b4 - st8 [temp2]=temp4 // save b5 - mov temp3=ar.lc - ;; - st8 [temp1]=temp3 // save ar.lc - - // FIXME: Some proms are incorrectly accessing the minstate area as - // cached data. The C code uses region 6, uncached virtual. Ensure - // that there is no cache data lying around for the first 1K of the - // minstate area. - // Remove this code in September 2006, that gives platforms a year to - // fix their proms and get their customers updated. - - add r1=32*1,r17 - add r2=32*2,r17 - add r3=32*3,r17 - add r4=32*4,r17 - add r5=32*5,r17 - add r6=32*6,r17 - add r7=32*7,r17 - ;; - fc r17 - fc r1 - fc r2 - fc r3 - fc r4 - fc r5 - fc r6 - fc r7 - add r17=32*8,r17 - add r1=32*8,r1 - add r2=32*8,r2 - add r3=32*8,r3 - add r4=32*8,r4 - add r5=32*8,r5 - add r6=32*8,r6 - add r7=32*8,r7 - ;; - fc r17 - fc r1 - fc r2 - fc r3 - fc r4 - fc r5 - fc r6 - fc r7 - add r17=32*8,r17 - add r1=32*8,r1 - add r2=32*8,r2 - add r3=32*8,r3 - add r4=32*8,r4 - add r5=32*8,r5 - add r6=32*8,r6 - add r7=32*8,r7 - ;; - fc r17 - fc r1 - fc r2 - fc r3 - fc r4 - fc r5 - fc r6 - fc r7 - add r17=32*8,r17 - add r1=32*8,r1 - add r2=32*8,r2 - add r3=32*8,r3 - add r4=32*8,r4 - add r5=32*8,r5 - add r6=32*8,r6 - add r7=32*8,r7 - ;; - fc r17 - fc r1 - fc r2 - fc r3 - fc r4 - fc r5 - fc r6 - fc r7 - - br.sptk b0 - -//EndStub////////////////////////////////////////////////////////////////////// - - -//++ -// Name: -// ia64_state_restore() -// -// Stub Description: -// -// Restore the SAL/OS state. This is sensitive to the layout of struct -// ia64_sal_os_state in mca.h. -// -// r2 contains the return address, r3 contains either -// IA64_MCA_CPU_MCA_STACK_OFFSET or IA64_MCA_CPU_INIT_STACK_OFFSET. -// -// In addition to the SAL to OS state, this routine restores all the -// registers that appear in struct pt_regs and struct switch_stack, -// excluding those in the PAL minstate area. -// -//-- - -ia64_state_restore: - // Restore the switch_stack data that is not in minstate nor pt_regs. - add regs=MCA_SWITCH_STACK_OFFSET, r3 - mov b0=r2 // save return address - ;; - GET_IA64_MCA_DATA(temp2) - ;; - add regs=temp2, regs - ;; - add temp1=SW(F2), regs - add temp2=SW(F3), regs - ;; - ldf.fill f2=[temp1],32 - ldf.fill f3=[temp2],32 - ;; - ldf.fill f4=[temp1],32 - ldf.fill f5=[temp2],32 - ;; - ldf.fill f12=[temp1],32 - ldf.fill f13=[temp2],32 - ;; - ldf.fill f14=[temp1],32 - ldf.fill f15=[temp2],32 - ;; - ldf.fill f16=[temp1],32 - ldf.fill f17=[temp2],32 - ;; - ldf.fill f18=[temp1],32 - ldf.fill f19=[temp2],32 - ;; - ldf.fill f20=[temp1],32 - ldf.fill f21=[temp2],32 - ;; - ldf.fill f22=[temp1],32 - ldf.fill f23=[temp2],32 - ;; - ldf.fill f24=[temp1],32 - ldf.fill f25=[temp2],32 - ;; - ldf.fill f26=[temp1],32 - ldf.fill f27=[temp2],32 - ;; - ldf.fill f28=[temp1],32 - ldf.fill f29=[temp2],32 - ;; - ldf.fill f30=[temp1],SW(B2)-SW(F30) - ldf.fill f31=[temp2],SW(B3)-SW(F31) - ;; - ld8 temp3=[temp1],16 // restore b2 - ld8 temp4=[temp2],16 // restore b3 - ;; - mov b2=temp3 - mov b3=temp4 - ld8 temp3=[temp1],SW(AR_LC)-SW(B4) // restore b4 - ld8 temp4=[temp2] // restore b5 - ;; - mov b4=temp3 - mov b5=temp4 - ld8 temp3=[temp1] // restore ar.lc - ;; - mov ar.lc=temp3 - - // Restore the pt_regs data that is not in minstate. The previous code - // left regs at switch_stack. - add regs=MCA_PT_REGS_OFFSET-MCA_SWITCH_STACK_OFFSET, regs - ;; - add temp1=PT(B6), regs - add temp2=PT(B7), regs - ;; - ld8 temp3=[temp1],PT(AR_CSD)-PT(B6) // restore b6 - ld8 temp4=[temp2],PT(AR_SSD)-PT(B7) // restore b7 - ;; - mov b6=temp3 - mov b7=temp4 - ld8 temp3=[temp1],PT(AR_UNAT)-PT(AR_CSD) // restore ar.csd - ld8 temp4=[temp2],PT(AR_PFS)-PT(AR_SSD) // restore ar.ssd - ;; - mov ar.csd=temp3 - mov ar.ssd=temp4 - ld8 temp3=[temp1] // restore ar.unat - add temp1=PT(AR_CCV)-PT(AR_UNAT), temp1 - ld8 temp4=[temp2],PT(AR_FPSR)-PT(AR_PFS) // restore ar.pfs - ;; - mov ar.unat=temp3 - mov ar.pfs=temp4 - // ar.rnat, ar.bspstore, loadrs are restore in ia64_old_stack. - ld8 temp3=[temp1],PT(F6)-PT(AR_CCV) // restore ar.ccv - ld8 temp4=[temp2],PT(F7)-PT(AR_FPSR) // restore ar.fpsr - ;; - mov ar.ccv=temp3 - mov ar.fpsr=temp4 - ldf.fill f6=[temp1],PT(F8)-PT(F6) - ldf.fill f7=[temp2],PT(F9)-PT(F7) - ;; - ldf.fill f8=[temp1],PT(F10)-PT(F8) - ldf.fill f9=[temp2],PT(F11)-PT(F9) - ;; - ldf.fill f10=[temp1] - ldf.fill f11=[temp2] - - // Restore the SAL to OS state. The previous code left regs at pt_regs. - add regs=MCA_SOS_OFFSET-MCA_PT_REGS_OFFSET, regs - ;; - add temp1=SOS(SAL_RA), regs - add temp2=SOS(SAL_GP), regs - ;; - ld8 r12=[temp1],16 // sal_ra - ld8 r9=[temp2],16 // sal_gp - ;; - ld8 r22=[temp1],16 // pal_min_state, virtual - ld8 r13=[temp2],16 // prev_IA64_KR_CURRENT - ;; - ld8 r16=[temp1],16 // prev_IA64_KR_CURRENT_STACK - ld8 r20=[temp2],16 // prev_task - ;; - ld8 temp3=[temp1],16 // cr.isr - ld8 temp4=[temp2],16 // cr.ifa - ;; - mov cr.isr=temp3 - mov cr.ifa=temp4 - ld8 temp3=[temp1],16 // cr.itir - ld8 temp4=[temp2],16 // cr.iipa - ;; - mov cr.itir=temp3 - mov cr.iipa=temp4 - ld8 temp3=[temp1] // cr.iim - ld8 temp4=[temp2] // cr.iha - add temp1=SOS(OS_STATUS), regs - add temp2=SOS(CONTEXT), regs - ;; - mov cr.iim=temp3 - mov cr.iha=temp4 - dep r22=0,r22,62,1 // pal_min_state, physical, uncached - mov IA64_KR(CURRENT)=r13 - ld8 r8=[temp1] // os_status - ld8 r10=[temp2] // context - - /* Wire IA64_TR_CURRENT_STACK to the stack that we are resuming to. To - * avoid any dependencies on the algorithm in ia64_switch_to(), just - * purge any existing CURRENT_STACK mapping and insert the new one. - * - * r16 contains prev_IA64_KR_CURRENT_STACK, r13 contains - * prev_IA64_KR_CURRENT, these values may have been changed by the C - * code. Do not use r8, r9, r10, r22, they contain values ready for - * the return to SAL. - */ - - mov r15=IA64_KR(CURRENT_STACK) // physical granule mapped by IA64_TR_CURRENT_STACK - ;; - shl r15=r15,IA64_GRANULE_SHIFT - ;; - dep r15=-1,r15,61,3 // virtual granule - mov r18=IA64_GRANULE_SHIFT<<2 // for cr.itir.ps - ;; - ptr.d r15,r18 - ;; - srlz.d - - extr.u r19=r13,61,3 // r13 = prev_IA64_KR_CURRENT - shl r20=r16,IA64_GRANULE_SHIFT // r16 = prev_IA64_KR_CURRENT_STACK - movl r21=PAGE_KERNEL // page properties - ;; - mov IA64_KR(CURRENT_STACK)=r16 - cmp.ne p6,p0=RGN_KERNEL,r19 // new stack is in the kernel region? - or r21=r20,r21 // construct PA | page properties -(p6) br.spnt 1f // the dreaded cpu 0 idle task in region 5:( - ;; - mov cr.itir=r18 - mov cr.ifa=r13 - mov r20=IA64_TR_CURRENT_STACK - ;; - itr.d dtr[r20]=r21 - ;; - srlz.d -1: - - br.sptk b0 - -//EndStub////////////////////////////////////////////////////////////////////// - - -//++ -// Name: -// ia64_new_stack() -// -// Stub Description: -// -// Switch to the MCA/INIT stack. -// -// r2 contains the return address, r3 contains either -// IA64_MCA_CPU_MCA_STACK_OFFSET or IA64_MCA_CPU_INIT_STACK_OFFSET. -// -// On entry RBS is still on the original stack, this routine switches RBS -// to use the MCA/INIT stack. -// -// On entry, sos->pal_min_state is physical, on exit it is virtual. -// -//-- - -ia64_new_stack: - add regs=MCA_PT_REGS_OFFSET, r3 - add temp2=MCA_SOS_OFFSET+SOS(PAL_MIN_STATE), r3 - mov b0=r2 // save return address - GET_IA64_MCA_DATA(temp1) - invala - ;; - add temp2=temp2, temp1 // struct ia64_sal_os_state.pal_min_state on MCA or INIT stack - add regs=regs, temp1 // struct pt_regs on MCA or INIT stack - ;; - // Address of minstate area provided by PAL is physical, uncacheable. - // Convert to Linux virtual address in region 6 for C code. - ld8 ms=[temp2] // pal_min_state, physical - ;; - dep temp1=-1,ms,62,2 // set region 6 - mov temp3=IA64_RBS_OFFSET-MCA_PT_REGS_OFFSET - ;; - st8 [temp2]=temp1 // pal_min_state, virtual - - add temp4=temp3, regs // start of bspstore on new stack - ;; - mov ar.bspstore=temp4 // switch RBS to MCA/INIT stack - ;; - flushrs // must be first in group - br.sptk b0 - -//EndStub////////////////////////////////////////////////////////////////////// - - -//++ -// Name: -// ia64_old_stack() -// -// Stub Description: -// -// Switch to the old stack. -// -// r2 contains the return address, r3 contains either -// IA64_MCA_CPU_MCA_STACK_OFFSET or IA64_MCA_CPU_INIT_STACK_OFFSET. -// -// On entry, pal_min_state is virtual, on exit it is physical. -// -// On entry RBS is on the MCA/INIT stack, this routine switches RBS -// back to the previous stack. -// -// The psr is set to all zeroes. SAL return requires either all zeroes or -// just psr.mc set. Leaving psr.mc off allows INIT to be issued if this -// code does not perform correctly. -// -// The dirty registers at the time of the event were flushed to the -// MCA/INIT stack in ia64_pt_regs_save(). Restore the dirty registers -// before reverting to the previous bspstore. -//-- - -ia64_old_stack: - add regs=MCA_PT_REGS_OFFSET, r3 - mov b0=r2 // save return address - GET_IA64_MCA_DATA(temp2) - LOAD_PHYSICAL(p0,temp1,1f) - ;; - mov cr.ipsr=r0 - mov cr.ifs=r0 - mov cr.iip=temp1 - ;; - invala - rfi -1: - - add regs=regs, temp2 // struct pt_regs on MCA or INIT stack - ;; - add temp1=PT(LOADRS), regs - ;; - ld8 temp2=[temp1],PT(AR_BSPSTORE)-PT(LOADRS) // restore loadrs - ;; - ld8 temp3=[temp1],PT(AR_RNAT)-PT(AR_BSPSTORE) // restore ar.bspstore - mov ar.rsc=temp2 - ;; - loadrs - ld8 temp4=[temp1] // restore ar.rnat - ;; - mov ar.bspstore=temp3 // back to old stack - ;; - mov ar.rnat=temp4 - ;; - - br.sptk b0 - -//EndStub////////////////////////////////////////////////////////////////////// - - -//++ -// Name: -// ia64_set_kernel_registers() -// -// Stub Description: -// -// Set the registers that are required by the C code in order to run on an -// MCA/INIT stack. -// -// r2 contains the return address, r3 contains either -// IA64_MCA_CPU_MCA_STACK_OFFSET or IA64_MCA_CPU_INIT_STACK_OFFSET. -// -//-- - -ia64_set_kernel_registers: - add temp3=MCA_SP_OFFSET, r3 - mov b0=r2 // save return address - GET_IA64_MCA_DATA(temp1) - ;; - add r12=temp1, temp3 // kernel stack pointer on MCA/INIT stack - add r13=temp1, r3 // set current to start of MCA/INIT stack - add r20=temp1, r3 // physical start of MCA/INIT stack - ;; - DATA_PA_TO_VA(r12,temp2) - DATA_PA_TO_VA(r13,temp3) - ;; - mov IA64_KR(CURRENT)=r13 - - /* Wire IA64_TR_CURRENT_STACK to the MCA/INIT handler stack. To avoid - * any dependencies on the algorithm in ia64_switch_to(), just purge - * any existing CURRENT_STACK mapping and insert the new one. - */ - - mov r16=IA64_KR(CURRENT_STACK) // physical granule mapped by IA64_TR_CURRENT_STACK - ;; - shl r16=r16,IA64_GRANULE_SHIFT - ;; - dep r16=-1,r16,61,3 // virtual granule - mov r18=IA64_GRANULE_SHIFT<<2 // for cr.itir.ps - ;; - ptr.d r16,r18 - ;; - srlz.d - - shr.u r16=r20,IA64_GRANULE_SHIFT // r20 = physical start of MCA/INIT stack - movl r21=PAGE_KERNEL // page properties - ;; - mov IA64_KR(CURRENT_STACK)=r16 - or r21=r20,r21 // construct PA | page properties - ;; - mov cr.itir=r18 - mov cr.ifa=r13 - mov r20=IA64_TR_CURRENT_STACK - - movl r17=FPSR_DEFAULT - ;; - mov.m ar.fpsr=r17 // set ar.fpsr to kernel default value - ;; - itr.d dtr[r20]=r21 - ;; - srlz.d - - br.sptk b0 - -//EndStub////////////////////////////////////////////////////////////////////// - -#undef ms -#undef regs -#undef temp1 -#undef temp2 -#undef temp3 -#undef temp4 - - -// Support function for mca.c, it is here to avoid using inline asm. Given the -// address of an rnat slot, if that address is below the current ar.bspstore -// then return the contents of that slot, otherwise return the contents of -// ar.rnat. -GLOBAL_ENTRY(ia64_get_rnat) - alloc r14=ar.pfs,1,0,0,0 - mov ar.rsc=0 - ;; - mov r14=ar.bspstore - ;; - cmp.lt p6,p7=in0,r14 - ;; -(p6) ld8 r8=[in0] -(p7) mov r8=ar.rnat - mov ar.rsc=3 - br.ret.sptk.many rp -END(ia64_get_rnat) - - -// void ia64_set_psr_mc(void) -// -// Set psr.mc bit to mask MCA/INIT. -GLOBAL_ENTRY(ia64_set_psr_mc) - rsm psr.i | psr.ic // disable interrupts - ;; - srlz.d - ;; - mov r14 = psr // get psr{36:35,31:0} - movl r15 = 1f - ;; - dep r14 = -1, r14, PSR_MC, 1 // set psr.mc - ;; - dep r14 = -1, r14, PSR_IC, 1 // set psr.ic - ;; - dep r14 = -1, r14, PSR_BN, 1 // keep bank1 in use - ;; - mov cr.ipsr = r14 - mov cr.ifs = r0 - mov cr.iip = r15 - ;; - rfi -1: - br.ret.sptk.many rp -END(ia64_set_psr_mc) diff --git a/arch/ia64/kernel/mca_drv.c b/arch/ia64/kernel/mca_drv.c deleted file mode 100644 index 23c203639a96..000000000000 --- a/arch/ia64/kernel/mca_drv.c +++ /dev/null @@ -1,796 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * File: mca_drv.c - * Purpose: Generic MCA handling layer - * - * Copyright (C) 2004 FUJITSU LIMITED - * Copyright (C) 2004 Hidetoshi Seto - * Copyright (C) 2005 Silicon Graphics, Inc - * Copyright (C) 2005 Keith Owens - * Copyright (C) 2006 Russ Anderson - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include - -#include -#include - -#include "mca_drv.h" - -/* max size of SAL error record (default) */ -static int sal_rec_max = 10000; - -/* from mca_drv_asm.S */ -extern void *mca_handler_bhhook(void); - -static DEFINE_SPINLOCK(mca_bh_lock); - -typedef enum { - MCA_IS_LOCAL = 0, - MCA_IS_GLOBAL = 1 -} mca_type_t; - -#define MAX_PAGE_ISOLATE 1024 - -static struct page *page_isolate[MAX_PAGE_ISOLATE]; -static int num_page_isolate = 0; - -typedef enum { - ISOLATE_NG, - ISOLATE_OK, - ISOLATE_NONE -} isolate_status_t; - -typedef enum { - MCA_NOT_RECOVERED = 0, - MCA_RECOVERED = 1 -} recovery_status_t; - -/* - * This pool keeps pointers to the section part of SAL error record - */ -static struct { - slidx_list_t *buffer; /* section pointer list pool */ - int cur_idx; /* Current index of section pointer list pool */ - int max_idx; /* Maximum index of section pointer list pool */ -} slidx_pool; - -static int -fatal_mca(const char *fmt, ...) -{ - va_list args; - char buf[256]; - - va_start(args, fmt); - vsnprintf(buf, sizeof(buf), fmt, args); - va_end(args); - ia64_mca_printk(KERN_ALERT "MCA: %s\n", buf); - - return MCA_NOT_RECOVERED; -} - -static int -mca_recovered(const char *fmt, ...) -{ - va_list args; - char buf[256]; - - va_start(args, fmt); - vsnprintf(buf, sizeof(buf), fmt, args); - va_end(args); - ia64_mca_printk(KERN_INFO "MCA: %s\n", buf); - - return MCA_RECOVERED; -} - -/** - * mca_page_isolate - isolate a poisoned page in order not to use it later - * @paddr: poisoned memory location - * - * Return value: - * one of isolate_status_t, ISOLATE_OK/NG/NONE. - */ - -static isolate_status_t -mca_page_isolate(unsigned long paddr) -{ - int i; - struct page *p; - - /* whether physical address is valid or not */ - if (!ia64_phys_addr_valid(paddr)) - return ISOLATE_NONE; - - if (!pfn_valid(paddr >> PAGE_SHIFT)) - return ISOLATE_NONE; - - /* convert physical address to physical page number */ - p = pfn_to_page(paddr>>PAGE_SHIFT); - - /* check whether a page number have been already registered or not */ - for (i = 0; i < num_page_isolate; i++) - if (page_isolate[i] == p) - return ISOLATE_OK; /* already listed */ - - /* limitation check */ - if (num_page_isolate == MAX_PAGE_ISOLATE) - return ISOLATE_NG; - - /* kick pages having attribute 'SLAB' or 'Reserved' */ - if (PageSlab(p) || PageReserved(p)) - return ISOLATE_NG; - - /* add attribute 'Reserved' and register the page */ - get_page(p); - SetPageReserved(p); - page_isolate[num_page_isolate++] = p; - - return ISOLATE_OK; -} - -/** - * mca_hanlder_bh - Kill the process which occurred memory read error - * @paddr: poisoned address received from MCA Handler - */ - -void -mca_handler_bh(unsigned long paddr, void *iip, unsigned long ipsr) -{ - ia64_mlogbuf_dump(); - printk(KERN_ERR "OS_MCA: process [cpu %d, pid: %d, uid: %d, " - "iip: %p, psr: 0x%lx,paddr: 0x%lx](%s) encounters MCA.\n", - raw_smp_processor_id(), current->pid, - from_kuid(&init_user_ns, current_uid()), - iip, ipsr, paddr, current->comm); - - spin_lock(&mca_bh_lock); - switch (mca_page_isolate(paddr)) { - case ISOLATE_OK: - printk(KERN_DEBUG "Page isolation: ( %lx ) success.\n", paddr); - break; - case ISOLATE_NG: - printk(KERN_CRIT "Page isolation: ( %lx ) failure.\n", paddr); - break; - default: - break; - } - spin_unlock(&mca_bh_lock); - - /* This process is about to be killed itself */ - make_task_dead(SIGKILL); -} - -/** - * mca_make_peidx - Make index of processor error section - * @slpi: pointer to record of processor error section - * @peidx: pointer to index of processor error section - */ - -static void -mca_make_peidx(sal_log_processor_info_t *slpi, peidx_table_t *peidx) -{ - /* - * calculate the start address of - * "struct cpuid_info" and "sal_processor_static_info_t". - */ - u64 total_check_num = slpi->valid.num_cache_check - + slpi->valid.num_tlb_check - + slpi->valid.num_bus_check - + slpi->valid.num_reg_file_check - + slpi->valid.num_ms_check; - u64 head_size = sizeof(sal_log_mod_error_info_t) * total_check_num - + sizeof(sal_log_processor_info_t); - u64 mid_size = slpi->valid.cpuid_info * sizeof(struct sal_cpuid_info); - - peidx_head(peidx) = slpi; - peidx_mid(peidx) = (struct sal_cpuid_info *) - (slpi->valid.cpuid_info ? ((char*)slpi + head_size) : NULL); - peidx_bottom(peidx) = (sal_processor_static_info_t *) - (slpi->valid.psi_static_struct ? - ((char*)slpi + head_size + mid_size) : NULL); -} - -/** - * mca_make_slidx - Make index of SAL error record - * @buffer: pointer to SAL error record - * @slidx: pointer to index of SAL error record - * - * Return value: - * 1 if record has platform error / 0 if not - */ -#define LOG_INDEX_ADD_SECT_PTR(sect, ptr) \ - {slidx_list_t *hl = &slidx_pool.buffer[slidx_pool.cur_idx]; \ - hl->hdr = ptr; \ - list_add(&hl->list, &(sect)); \ - slidx_pool.cur_idx = (slidx_pool.cur_idx + 1)%slidx_pool.max_idx; } - -static int -mca_make_slidx(void *buffer, slidx_table_t *slidx) -{ - int platform_err = 0; - int record_len = ((sal_log_record_header_t*)buffer)->len; - u32 ercd_pos; - int sects; - sal_log_section_hdr_t *sp; - - /* - * Initialize index referring current record - */ - INIT_LIST_HEAD(&(slidx->proc_err)); - INIT_LIST_HEAD(&(slidx->mem_dev_err)); - INIT_LIST_HEAD(&(slidx->sel_dev_err)); - INIT_LIST_HEAD(&(slidx->pci_bus_err)); - INIT_LIST_HEAD(&(slidx->smbios_dev_err)); - INIT_LIST_HEAD(&(slidx->pci_comp_err)); - INIT_LIST_HEAD(&(slidx->plat_specific_err)); - INIT_LIST_HEAD(&(slidx->host_ctlr_err)); - INIT_LIST_HEAD(&(slidx->plat_bus_err)); - INIT_LIST_HEAD(&(slidx->unsupported)); - - /* - * Extract a Record Header - */ - slidx->header = buffer; - - /* - * Extract each section records - * (arranged from "int ia64_log_platform_info_print()") - */ - for (ercd_pos = sizeof(sal_log_record_header_t), sects = 0; - ercd_pos < record_len; ercd_pos += sp->len, sects++) { - sp = (sal_log_section_hdr_t *)((char*)buffer + ercd_pos); - if (!efi_guidcmp(sp->guid, SAL_PROC_DEV_ERR_SECT_GUID)) { - LOG_INDEX_ADD_SECT_PTR(slidx->proc_err, sp); - } else if (!efi_guidcmp(sp->guid, - SAL_PLAT_MEM_DEV_ERR_SECT_GUID)) { - platform_err = 1; - LOG_INDEX_ADD_SECT_PTR(slidx->mem_dev_err, sp); - } else if (!efi_guidcmp(sp->guid, - SAL_PLAT_SEL_DEV_ERR_SECT_GUID)) { - platform_err = 1; - LOG_INDEX_ADD_SECT_PTR(slidx->sel_dev_err, sp); - } else if (!efi_guidcmp(sp->guid, - SAL_PLAT_PCI_BUS_ERR_SECT_GUID)) { - platform_err = 1; - LOG_INDEX_ADD_SECT_PTR(slidx->pci_bus_err, sp); - } else if (!efi_guidcmp(sp->guid, - SAL_PLAT_SMBIOS_DEV_ERR_SECT_GUID)) { - platform_err = 1; - LOG_INDEX_ADD_SECT_PTR(slidx->smbios_dev_err, sp); - } else if (!efi_guidcmp(sp->guid, - SAL_PLAT_PCI_COMP_ERR_SECT_GUID)) { - platform_err = 1; - LOG_INDEX_ADD_SECT_PTR(slidx->pci_comp_err, sp); - } else if (!efi_guidcmp(sp->guid, - SAL_PLAT_SPECIFIC_ERR_SECT_GUID)) { - platform_err = 1; - LOG_INDEX_ADD_SECT_PTR(slidx->plat_specific_err, sp); - } else if (!efi_guidcmp(sp->guid, - SAL_PLAT_HOST_CTLR_ERR_SECT_GUID)) { - platform_err = 1; - LOG_INDEX_ADD_SECT_PTR(slidx->host_ctlr_err, sp); - } else if (!efi_guidcmp(sp->guid, - SAL_PLAT_BUS_ERR_SECT_GUID)) { - platform_err = 1; - LOG_INDEX_ADD_SECT_PTR(slidx->plat_bus_err, sp); - } else { - LOG_INDEX_ADD_SECT_PTR(slidx->unsupported, sp); - } - } - slidx->n_sections = sects; - - return platform_err; -} - -/** - * init_record_index_pools - Initialize pool of lists for SAL record index - * - * Return value: - * 0 on Success / -ENOMEM on Failure - */ -static int -init_record_index_pools(void) -{ - int i; - int rec_max_size; /* Maximum size of SAL error records */ - int sect_min_size; /* Minimum size of SAL error sections */ - /* minimum size table of each section */ - static int sal_log_sect_min_sizes[] = { - sizeof(sal_log_processor_info_t) - + sizeof(sal_processor_static_info_t), - sizeof(sal_log_mem_dev_err_info_t), - sizeof(sal_log_sel_dev_err_info_t), - sizeof(sal_log_pci_bus_err_info_t), - sizeof(sal_log_smbios_dev_err_info_t), - sizeof(sal_log_pci_comp_err_info_t), - sizeof(sal_log_plat_specific_err_info_t), - sizeof(sal_log_host_ctlr_err_info_t), - sizeof(sal_log_plat_bus_err_info_t), - }; - - /* - * MCA handler cannot allocate new memory on flight, - * so we preallocate enough memory to handle a SAL record. - * - * Initialize a handling set of slidx_pool: - * 1. Pick up the max size of SAL error records - * 2. Pick up the min size of SAL error sections - * 3. Allocate the pool as enough to 2 SAL records - * (now we can estimate the maxinum of section in a record.) - */ - - /* - 1 - */ - rec_max_size = sal_rec_max; - - /* - 2 - */ - sect_min_size = sal_log_sect_min_sizes[0]; - for (i = 1; i < ARRAY_SIZE(sal_log_sect_min_sizes); i++) - if (sect_min_size > sal_log_sect_min_sizes[i]) - sect_min_size = sal_log_sect_min_sizes[i]; - - /* - 3 - */ - slidx_pool.max_idx = (rec_max_size/sect_min_size) * 2 + 1; - slidx_pool.buffer = - kmalloc_array(slidx_pool.max_idx, sizeof(slidx_list_t), - GFP_KERNEL); - - return slidx_pool.buffer ? 0 : -ENOMEM; -} - - -/***************************************************************************** - * Recovery functions * - *****************************************************************************/ - -/** - * is_mca_global - Check whether this MCA is global or not - * @peidx: pointer of index of processor error section - * @pbci: pointer to pal_bus_check_info_t - * @sos: pointer to hand off struct between SAL and OS - * - * Return value: - * MCA_IS_LOCAL / MCA_IS_GLOBAL - */ - -static mca_type_t -is_mca_global(peidx_table_t *peidx, pal_bus_check_info_t *pbci, - struct ia64_sal_os_state *sos) -{ - pal_processor_state_info_t *psp = - (pal_processor_state_info_t*)peidx_psp(peidx); - - /* - * PAL can request a rendezvous, if the MCA has a global scope. - * If "rz_always" flag is set, SAL requests MCA rendezvous - * in spite of global MCA. - * Therefore it is local MCA when rendezvous has not been requested. - * Failed to rendezvous, the system must be down. - */ - switch (sos->rv_rc) { - case -1: /* SAL rendezvous unsuccessful */ - return MCA_IS_GLOBAL; - case 0: /* SAL rendezvous not required */ - return MCA_IS_LOCAL; - case 1: /* SAL rendezvous successful int */ - case 2: /* SAL rendezvous successful int with init */ - default: - break; - } - - /* - * If One or more Cache/TLB/Reg_File/Uarch_Check is here, - * it would be a local MCA. (i.e. processor internal error) - */ - if (psp->tc || psp->cc || psp->rc || psp->uc) - return MCA_IS_LOCAL; - - /* - * Bus_Check structure with Bus_Check.ib (internal bus error) flag set - * would be a global MCA. (e.g. a system bus address parity error) - */ - if (!pbci || pbci->ib) - return MCA_IS_GLOBAL; - - /* - * Bus_Check structure with Bus_Check.eb (external bus error) flag set - * could be either a local MCA or a global MCA. - * - * Referring Bus_Check.bsi: - * 0: Unknown/unclassified - * 1: BERR# - * 2: BINIT# - * 3: Hard Fail - * (FIXME: Are these SGI specific or generic bsi values?) - */ - if (pbci->eb) - switch (pbci->bsi) { - case 0: - /* e.g. a load from poisoned memory */ - return MCA_IS_LOCAL; - case 1: - case 2: - case 3: - return MCA_IS_GLOBAL; - } - - return MCA_IS_GLOBAL; -} - -/** - * get_target_identifier - Get the valid Cache or Bus check target identifier. - * @peidx: pointer of index of processor error section - * - * Return value: - * target address on Success / 0 on Failure - */ -static u64 -get_target_identifier(peidx_table_t *peidx) -{ - u64 target_address = 0; - sal_log_mod_error_info_t *smei; - pal_cache_check_info_t *pcci; - int i, level = 9; - - /* - * Look through the cache checks for a valid target identifier - * If more than one valid target identifier, return the one - * with the lowest cache level. - */ - for (i = 0; i < peidx_cache_check_num(peidx); i++) { - smei = (sal_log_mod_error_info_t *)peidx_cache_check(peidx, i); - if (smei->valid.target_identifier && smei->target_identifier) { - pcci = (pal_cache_check_info_t *)&(smei->check_info); - if (!target_address || (pcci->level < level)) { - target_address = smei->target_identifier; - level = pcci->level; - continue; - } - } - } - if (target_address) - return target_address; - - /* - * Look at the bus check for a valid target identifier - */ - smei = peidx_bus_check(peidx, 0); - if (smei && smei->valid.target_identifier) - return smei->target_identifier; - - return 0; -} - -/** - * recover_from_read_error - Try to recover the errors which type are "read"s. - * @slidx: pointer of index of SAL error record - * @peidx: pointer of index of processor error section - * @pbci: pointer of pal_bus_check_info - * @sos: pointer to hand off struct between SAL and OS - * - * Return value: - * 1 on Success / 0 on Failure - */ - -static int -recover_from_read_error(slidx_table_t *slidx, - peidx_table_t *peidx, pal_bus_check_info_t *pbci, - struct ia64_sal_os_state *sos) -{ - u64 target_identifier; - struct pal_min_state_area *pmsa; - struct ia64_psr *psr1, *psr2; - ia64_fptr_t *mca_hdlr_bh = (ia64_fptr_t*)mca_handler_bhhook; - - /* Is target address valid? */ - target_identifier = get_target_identifier(peidx); - if (!target_identifier) - return fatal_mca("target address not valid"); - - /* - * cpu read or memory-mapped io read - * - * offending process affected process OS MCA do - * kernel mode kernel mode down system - * kernel mode user mode kill the process - * user mode kernel mode down system (*) - * user mode user mode kill the process - * - * (*) You could terminate offending user-mode process - * if (pbci->pv && pbci->pl != 0) *and* if you sure - * the process not have any locks of kernel. - */ - - /* Is minstate valid? */ - if (!peidx_bottom(peidx) || !(peidx_bottom(peidx)->valid.minstate)) - return fatal_mca("minstate not valid"); - psr1 =(struct ia64_psr *)&(peidx_minstate_area(peidx)->pmsa_ipsr); - psr2 =(struct ia64_psr *)&(peidx_minstate_area(peidx)->pmsa_xpsr); - - /* - * Check the privilege level of interrupted context. - * If it is user-mode, then terminate affected process. - */ - - pmsa = sos->pal_min_state; - if (psr1->cpl != 0 || - ((psr2->cpl != 0) && mca_recover_range(pmsa->pmsa_iip))) { - /* - * setup for resume to bottom half of MCA, - * "mca_handler_bhhook" - */ - /* pass to bhhook as argument (gr8, ...) */ - pmsa->pmsa_gr[8-1] = target_identifier; - pmsa->pmsa_gr[9-1] = pmsa->pmsa_iip; - pmsa->pmsa_gr[10-1] = pmsa->pmsa_ipsr; - /* set interrupted return address (but no use) */ - pmsa->pmsa_br0 = pmsa->pmsa_iip; - /* change resume address to bottom half */ - pmsa->pmsa_iip = mca_hdlr_bh->fp; - pmsa->pmsa_gr[1-1] = mca_hdlr_bh->gp; - /* set cpl with kernel mode */ - psr2 = (struct ia64_psr *)&pmsa->pmsa_ipsr; - psr2->cpl = 0; - psr2->ri = 0; - psr2->bn = 1; - psr2->i = 0; - - return mca_recovered("user memory corruption. " - "kill affected process - recovered."); - } - - return fatal_mca("kernel context not recovered, iip 0x%lx\n", - pmsa->pmsa_iip); -} - -/** - * recover_from_platform_error - Recover from platform error. - * @slidx: pointer of index of SAL error record - * @peidx: pointer of index of processor error section - * @pbci: pointer of pal_bus_check_info - * @sos: pointer to hand off struct between SAL and OS - * - * Return value: - * 1 on Success / 0 on Failure - */ - -static int -recover_from_platform_error(slidx_table_t *slidx, peidx_table_t *peidx, - pal_bus_check_info_t *pbci, - struct ia64_sal_os_state *sos) -{ - int status = 0; - pal_processor_state_info_t *psp = - (pal_processor_state_info_t*)peidx_psp(peidx); - - if (psp->bc && pbci->eb && pbci->bsi == 0) { - switch(pbci->type) { - case 1: /* partial read */ - case 3: /* full line(cpu) read */ - case 9: /* I/O space read */ - status = recover_from_read_error(slidx, peidx, pbci, - sos); - break; - case 0: /* unknown */ - case 2: /* partial write */ - case 4: /* full line write */ - case 5: /* implicit or explicit write-back operation */ - case 6: /* snoop probe */ - case 7: /* incoming or outgoing ptc.g */ - case 8: /* write coalescing transactions */ - case 10: /* I/O space write */ - case 11: /* inter-processor interrupt message(IPI) */ - case 12: /* interrupt acknowledge or - external task priority cycle */ - default: - break; - } - } else if (psp->cc && !psp->bc) { /* Cache error */ - status = recover_from_read_error(slidx, peidx, pbci, sos); - } - - return status; -} - -/* - * recover_from_tlb_check - * @peidx: pointer of index of processor error section - * - * Return value: - * 1 on Success / 0 on Failure - */ -static int -recover_from_tlb_check(peidx_table_t *peidx) -{ - sal_log_mod_error_info_t *smei; - pal_tlb_check_info_t *ptci; - - smei = (sal_log_mod_error_info_t *)peidx_tlb_check(peidx, 0); - ptci = (pal_tlb_check_info_t *)&(smei->check_info); - - /* - * Look for signature of a duplicate TLB DTC entry, which is - * a SW bug and always fatal. - */ - if (ptci->op == PAL_TLB_CHECK_OP_PURGE - && !(ptci->itr || ptci->dtc || ptci->itc)) - return fatal_mca("Duplicate TLB entry"); - - return mca_recovered("TLB check recovered"); -} - -/** - * recover_from_processor_error - * @platform: whether there are some platform error section or not - * @slidx: pointer of index of SAL error record - * @peidx: pointer of index of processor error section - * @pbci: pointer of pal_bus_check_info - * @sos: pointer to hand off struct between SAL and OS - * - * Return value: - * 1 on Success / 0 on Failure - */ - -static int -recover_from_processor_error(int platform, slidx_table_t *slidx, - peidx_table_t *peidx, pal_bus_check_info_t *pbci, - struct ia64_sal_os_state *sos) -{ - pal_processor_state_info_t *psp = - (pal_processor_state_info_t*)peidx_psp(peidx); - - /* - * Processor recovery status must key off of the PAL recovery - * status in the Processor State Parameter. - */ - - /* - * The machine check is corrected. - */ - if (psp->cm == 1) - return mca_recovered("machine check is already corrected."); - - /* - * The error was not contained. Software must be reset. - */ - if (psp->us || psp->ci == 0) - return fatal_mca("error not contained"); - - /* - * Look for recoverable TLB check - */ - if (psp->tc && !(psp->cc || psp->bc || psp->rc || psp->uc)) - return recover_from_tlb_check(peidx); - - /* - * The cache check and bus check bits have four possible states - * cc bc - * 1 1 Memory error, attempt recovery - * 1 0 Cache error, attempt recovery - * 0 1 I/O error, attempt recovery - * 0 0 Other error type, not recovered - */ - if (psp->cc == 0 && (psp->bc == 0 || pbci == NULL)) - return fatal_mca("No cache or bus check"); - - /* - * Cannot handle more than one bus check. - */ - if (peidx_bus_check_num(peidx) > 1) - return fatal_mca("Too many bus checks"); - - if (pbci->ib) - return fatal_mca("Internal Bus error"); - if (pbci->eb && pbci->bsi > 0) - return fatal_mca("External bus check fatal status"); - - /* - * This is a local MCA and estimated as a recoverable error. - */ - if (platform) - return recover_from_platform_error(slidx, peidx, pbci, sos); - - /* - * On account of strange SAL error record, we cannot recover. - */ - return fatal_mca("Strange SAL record"); -} - -/** - * mca_try_to_recover - Try to recover from MCA - * @rec: pointer to a SAL error record - * @sos: pointer to hand off struct between SAL and OS - * - * Return value: - * 1 on Success / 0 on Failure - */ - -static int -mca_try_to_recover(void *rec, struct ia64_sal_os_state *sos) -{ - int platform_err; - int n_proc_err; - slidx_table_t slidx; - peidx_table_t peidx; - pal_bus_check_info_t pbci; - - /* Make index of SAL error record */ - platform_err = mca_make_slidx(rec, &slidx); - - /* Count processor error sections */ - n_proc_err = slidx_count(&slidx, proc_err); - - /* Now, OS can recover when there is one processor error section */ - if (n_proc_err > 1) - return fatal_mca("Too Many Errors"); - else if (n_proc_err == 0) - /* Weird SAL record ... We can't do anything */ - return fatal_mca("Weird SAL record"); - - /* Make index of processor error section */ - mca_make_peidx((sal_log_processor_info_t*) - slidx_first_entry(&slidx.proc_err)->hdr, &peidx); - - /* Extract Processor BUS_CHECK[0] */ - *((u64*)&pbci) = peidx_check_info(&peidx, bus_check, 0); - - /* Check whether MCA is global or not */ - if (is_mca_global(&peidx, &pbci, sos)) - return fatal_mca("global MCA"); - - /* Try to recover a processor error */ - return recover_from_processor_error(platform_err, &slidx, &peidx, - &pbci, sos); -} - -/* - * ============================================================================= - */ - -int __init mca_external_handler_init(void) -{ - if (init_record_index_pools()) - return -ENOMEM; - - /* register external mca handlers */ - if (ia64_reg_MCA_extension(mca_try_to_recover)) { - printk(KERN_ERR "ia64_reg_MCA_extension failed.\n"); - kfree(slidx_pool.buffer); - return -EFAULT; - } - return 0; -} - -void __exit mca_external_handler_exit(void) -{ - /* unregister external mca handlers */ - ia64_unreg_MCA_extension(); - kfree(slidx_pool.buffer); -} - -module_init(mca_external_handler_init); -module_exit(mca_external_handler_exit); - -module_param(sal_rec_max, int, 0644); -MODULE_PARM_DESC(sal_rec_max, "Max size of SAL error record"); - -MODULE_DESCRIPTION("ia64 platform dependent mca handler driver"); -MODULE_LICENSE("GPL"); diff --git a/arch/ia64/kernel/mca_drv.h b/arch/ia64/kernel/mca_drv.h deleted file mode 100644 index 45bc4e3ae14f..000000000000 --- a/arch/ia64/kernel/mca_drv.h +++ /dev/null @@ -1,123 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * File: mca_drv.h - * Purpose: Define helpers for Generic MCA handling - * - * Copyright (C) 2004 FUJITSU LIMITED - * Copyright (C) 2004 Hidetoshi Seto - */ -/* - * Processor error section: - * - * +-sal_log_processor_info_t *info-------------+ - * | sal_log_section_hdr_t header; | - * | ... | - * | sal_log_mod_error_info_t info[0]; | - * +-+----------------+-------------------------+ - * | CACHE_CHECK | ^ num_cache_check v - * +----------------+ - * | TLB_CHECK | ^ num_tlb_check v - * +----------------+ - * | BUS_CHECK | ^ num_bus_check v - * +----------------+ - * | REG_FILE_CHECK | ^ num_reg_file_check v - * +----------------+ - * | MS_CHECK | ^ num_ms_check v - * +-struct cpuid_info *id----------------------+ - * | regs[5]; | - * | reserved; | - * +-sal_processor_static_info_t *regs----------+ - * | valid; | - * | ... | - * | fr[128]; | - * +--------------------------------------------+ - */ - -/* peidx: index of processor error section */ -typedef struct peidx_table { - sal_log_processor_info_t *info; - struct sal_cpuid_info *id; - sal_processor_static_info_t *regs; -} peidx_table_t; - -#define peidx_head(p) (((p)->info)) -#define peidx_mid(p) (((p)->id)) -#define peidx_bottom(p) (((p)->regs)) - -#define peidx_psp(p) (&(peidx_head(p)->proc_state_parameter)) -#define peidx_field_valid(p) (&(peidx_head(p)->valid)) -#define peidx_minstate_area(p) (&(peidx_bottom(p)->min_state_area)) - -#define peidx_cache_check_num(p) (peidx_head(p)->valid.num_cache_check) -#define peidx_tlb_check_num(p) (peidx_head(p)->valid.num_tlb_check) -#define peidx_bus_check_num(p) (peidx_head(p)->valid.num_bus_check) -#define peidx_reg_file_check_num(p) (peidx_head(p)->valid.num_reg_file_check) -#define peidx_ms_check_num(p) (peidx_head(p)->valid.num_ms_check) - -#define peidx_cache_check_idx(p, n) (n) -#define peidx_tlb_check_idx(p, n) (peidx_cache_check_idx(p, peidx_cache_check_num(p)) + n) -#define peidx_bus_check_idx(p, n) (peidx_tlb_check_idx(p, peidx_tlb_check_num(p)) + n) -#define peidx_reg_file_check_idx(p, n) (peidx_bus_check_idx(p, peidx_bus_check_num(p)) + n) -#define peidx_ms_check_idx(p, n) (peidx_reg_file_check_idx(p, peidx_reg_file_check_num(p)) + n) - -#define peidx_mod_error_info(p, name, n) \ -({ int __idx = peidx_##name##_idx(p, n); \ - sal_log_mod_error_info_t *__ret = NULL; \ - if (peidx_##name##_num(p) > n) /*BUG*/ \ - __ret = &(peidx_head(p)->info[__idx]); \ - __ret; }) - -#define peidx_cache_check(p, n) peidx_mod_error_info(p, cache_check, n) -#define peidx_tlb_check(p, n) peidx_mod_error_info(p, tlb_check, n) -#define peidx_bus_check(p, n) peidx_mod_error_info(p, bus_check, n) -#define peidx_reg_file_check(p, n) peidx_mod_error_info(p, reg_file_check, n) -#define peidx_ms_check(p, n) peidx_mod_error_info(p, ms_check, n) - -#define peidx_check_info(proc, name, n) \ -({ \ - sal_log_mod_error_info_t *__info = peidx_mod_error_info(proc, name, n);\ - u64 __temp = __info && __info->valid.check_info \ - ? __info->check_info : 0; \ - __temp; }) - -/* slidx: index of SAL log error record */ - -typedef struct slidx_list { - struct list_head list; - sal_log_section_hdr_t *hdr; -} slidx_list_t; - -typedef struct slidx_table { - sal_log_record_header_t *header; - int n_sections; /* # of section headers */ - struct list_head proc_err; - struct list_head mem_dev_err; - struct list_head sel_dev_err; - struct list_head pci_bus_err; - struct list_head smbios_dev_err; - struct list_head pci_comp_err; - struct list_head plat_specific_err; - struct list_head host_ctlr_err; - struct list_head plat_bus_err; - struct list_head unsupported; /* list of unsupported sections */ -} slidx_table_t; - -#define slidx_foreach_entry(pos, head) \ - list_for_each_entry(pos, head, list) -#define slidx_first_entry(head) \ - (((head)->next != (head)) ? list_entry((head)->next, typeof(slidx_list_t), list) : NULL) -#define slidx_count(slidx, sec) \ -({ int __count = 0; \ - slidx_list_t *__pos; \ - slidx_foreach_entry(__pos, &((slidx)->sec)) { __count++; }\ - __count; }) - -struct mca_table_entry { - int start_addr; /* location-relative starting address of MCA recoverable range */ - int end_addr; /* location-relative ending address of MCA recoverable range */ -}; - -extern const struct mca_table_entry *search_mca_tables (unsigned long addr); -extern int mca_recover_range(unsigned long); -extern void ia64_mlogbuf_dump(void); - diff --git a/arch/ia64/kernel/mca_drv_asm.S b/arch/ia64/kernel/mca_drv_asm.S deleted file mode 100644 index 4428f57bee73..000000000000 --- a/arch/ia64/kernel/mca_drv_asm.S +++ /dev/null @@ -1,56 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * File: mca_drv_asm.S - * Purpose: Assembly portion of Generic MCA handling - * - * Copyright (C) 2004 FUJITSU LIMITED - * Copyright (C) 2004 Hidetoshi Seto - */ -#include - -#include -#include -#include - -GLOBAL_ENTRY(mca_handler_bhhook) - invala // clear RSE ? - cover - ;; - clrrrb - ;; - alloc r16=ar.pfs,0,2,3,0 // make a new frame - mov ar.rsc=0 - mov r13=IA64_KR(CURRENT) // current task pointer - ;; - mov r2=r13 - ;; - addl r22=IA64_RBS_OFFSET,r2 - ;; - mov ar.bspstore=r22 - addl sp=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r2 - ;; - adds r2=IA64_TASK_THREAD_ON_USTACK_OFFSET,r13 - ;; - st1 [r2]=r0 // clear current->thread.on_ustack flag - mov loc0=r16 - movl loc1=mca_handler_bh // recovery C function - ;; - mov out0=r8 // poisoned address - mov out1=r9 // iip - mov out2=r10 // psr - mov b6=loc1 - ;; - mov loc1=rp - ssm psr.ic - ;; - srlz.i - ;; - ssm psr.i - br.call.sptk.many rp=b6 // does not return ... - ;; - mov ar.pfs=loc0 - mov rp=loc1 - ;; - mov r8=r0 - br.ret.sptk.many rp -END(mca_handler_bhhook) diff --git a/arch/ia64/kernel/minstate.h b/arch/ia64/kernel/minstate.h deleted file mode 100644 index d6eab2a1084d..000000000000 --- a/arch/ia64/kernel/minstate.h +++ /dev/null @@ -1,251 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ - -#include - -#include "entry.h" -#include - -#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE -/* read ar.itc in advance, and use it before leaving bank 0 */ -#define ACCOUNT_GET_STAMP \ -(pUStk) mov.m r20=ar.itc; -#define ACCOUNT_SYS_ENTER \ -(pUStk) br.call.spnt rp=account_sys_enter \ - ;; -#else -#define ACCOUNT_GET_STAMP -#define ACCOUNT_SYS_ENTER -#endif - -.section ".data..patch.rse", "a" -.previous - -/* - * DO_SAVE_MIN switches to the kernel stacks (if necessary) and saves - * the minimum state necessary that allows us to turn psr.ic back - * on. - * - * Assumed state upon entry: - * psr.ic: off - * r31: contains saved predicates (pr) - * - * Upon exit, the state is as follows: - * psr.ic: off - * r2 = points to &pt_regs.r16 - * r8 = contents of ar.ccv - * r9 = contents of ar.csd - * r10 = contents of ar.ssd - * r11 = FPSR_DEFAULT - * r12 = kernel sp (kernel virtual address) - * r13 = points to current task_struct (kernel virtual address) - * p15 = TRUE if psr.i is set in cr.ipsr - * predicate registers (other than p2, p3, and p15), b6, r3, r14, r15: - * preserved - * - * Note that psr.ic is NOT turned on by this macro. This is so that - * we can pass interruption state as arguments to a handler. - */ -#define IA64_NATIVE_DO_SAVE_MIN(__COVER,SAVE_IFS,EXTRA,WORKAROUND) \ - mov r16=IA64_KR(CURRENT); /* M */ \ - mov r27=ar.rsc; /* M */ \ - mov r20=r1; /* A */ \ - mov r25=ar.unat; /* M */ \ - MOV_FROM_IPSR(p0,r29); /* M */ \ - mov r26=ar.pfs; /* I */ \ - MOV_FROM_IIP(r28); /* M */ \ - mov r21=ar.fpsr; /* M */ \ - __COVER; /* B;; (or nothing) */ \ - ;; \ - adds r16=IA64_TASK_THREAD_ON_USTACK_OFFSET,r16; \ - ;; \ - ld1 r17=[r16]; /* load current->thread.on_ustack flag */ \ - st1 [r16]=r0; /* clear current->thread.on_ustack flag */ \ - adds r1=-IA64_TASK_THREAD_ON_USTACK_OFFSET,r16 \ - /* switch from user to kernel RBS: */ \ - ;; \ - invala; /* M */ \ - SAVE_IFS; \ - cmp.eq pKStk,pUStk=r0,r17; /* are we in kernel mode already? */ \ - ;; \ -(pUStk) mov ar.rsc=0; /* set enforced lazy mode, pl 0, little-endian, loadrs=0 */ \ - ;; \ -(pUStk) mov.m r24=ar.rnat; \ -(pUStk) addl r22=IA64_RBS_OFFSET,r1; /* compute base of RBS */ \ -(pKStk) mov r1=sp; /* get sp */ \ - ;; \ -(pUStk) lfetch.fault.excl.nt1 [r22]; \ -(pUStk) addl r1=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r1; /* compute base of memory stack */ \ -(pUStk) mov r23=ar.bspstore; /* save ar.bspstore */ \ - ;; \ -(pUStk) mov ar.bspstore=r22; /* switch to kernel RBS */ \ -(pKStk) addl r1=-IA64_PT_REGS_SIZE,r1; /* if in kernel mode, use sp (r12) */ \ - ;; \ -(pUStk) mov r18=ar.bsp; \ -(pUStk) mov ar.rsc=0x3; /* set eager mode, pl 0, little-endian, loadrs=0 */ \ - adds r17=2*L1_CACHE_BYTES,r1; /* really: biggest cache-line size */ \ - adds r16=PT(CR_IPSR),r1; \ - ;; \ - lfetch.fault.excl.nt1 [r17],L1_CACHE_BYTES; \ - st8 [r16]=r29; /* save cr.ipsr */ \ - ;; \ - lfetch.fault.excl.nt1 [r17]; \ - tbit.nz p15,p0=r29,IA64_PSR_I_BIT; \ - mov r29=b0 \ - ;; \ - WORKAROUND; \ - adds r16=PT(R8),r1; /* initialize first base pointer */ \ - adds r17=PT(R9),r1; /* initialize second base pointer */ \ -(pKStk) mov r18=r0; /* make sure r18 isn't NaT */ \ - ;; \ -.mem.offset 0,0; st8.spill [r16]=r8,16; \ -.mem.offset 8,0; st8.spill [r17]=r9,16; \ - ;; \ -.mem.offset 0,0; st8.spill [r16]=r10,24; \ -.mem.offset 8,0; st8.spill [r17]=r11,24; \ - ;; \ - st8 [r16]=r28,16; /* save cr.iip */ \ - st8 [r17]=r30,16; /* save cr.ifs */ \ -(pUStk) sub r18=r18,r22; /* r18=RSE.ndirty*8 */ \ - mov r8=ar.ccv; \ - mov r9=ar.csd; \ - mov r10=ar.ssd; \ - movl r11=FPSR_DEFAULT; /* L-unit */ \ - ;; \ - st8 [r16]=r25,16; /* save ar.unat */ \ - st8 [r17]=r26,16; /* save ar.pfs */ \ - shl r18=r18,16; /* compute ar.rsc to be used for "loadrs" */ \ - ;; \ - st8 [r16]=r27,16; /* save ar.rsc */ \ -(pUStk) st8 [r17]=r24,16; /* save ar.rnat */ \ -(pKStk) adds r17=16,r17; /* skip over ar_rnat field */ \ - ;; /* avoid RAW on r16 & r17 */ \ -(pUStk) st8 [r16]=r23,16; /* save ar.bspstore */ \ - st8 [r17]=r31,16; /* save predicates */ \ -(pKStk) adds r16=16,r16; /* skip over ar_bspstore field */ \ - ;; \ - st8 [r16]=r29,16; /* save b0 */ \ - st8 [r17]=r18,16; /* save ar.rsc value for "loadrs" */ \ - cmp.eq pNonSys,pSys=r0,r0 /* initialize pSys=0, pNonSys=1 */ \ - ;; \ -.mem.offset 0,0; st8.spill [r16]=r20,16; /* save original r1 */ \ -.mem.offset 8,0; st8.spill [r17]=r12,16; \ - adds r12=-16,r1; /* switch to kernel memory stack (with 16 bytes of scratch) */ \ - ;; \ -.mem.offset 0,0; st8.spill [r16]=r13,16; \ -.mem.offset 8,0; st8.spill [r17]=r21,16; /* save ar.fpsr */ \ - mov r13=IA64_KR(CURRENT); /* establish `current' */ \ - ;; \ -.mem.offset 0,0; st8.spill [r16]=r15,16; \ -.mem.offset 8,0; st8.spill [r17]=r14,16; \ - ;; \ -.mem.offset 0,0; st8.spill [r16]=r2,16; \ -.mem.offset 8,0; st8.spill [r17]=r3,16; \ - ACCOUNT_GET_STAMP \ - adds r2=IA64_PT_REGS_R16_OFFSET,r1; \ - ;; \ - EXTRA; \ - movl r1=__gp; /* establish kernel global pointer */ \ - ;; \ - ACCOUNT_SYS_ENTER \ - bsw.1; /* switch back to bank 1 (must be last in insn group) */ \ - ;; - -/* - * SAVE_REST saves the remainder of pt_regs (with psr.ic on). - * - * Assumed state upon entry: - * psr.ic: on - * r2: points to &pt_regs.r16 - * r3: points to &pt_regs.r17 - * r8: contents of ar.ccv - * r9: contents of ar.csd - * r10: contents of ar.ssd - * r11: FPSR_DEFAULT - * - * Registers r14 and r15 are guaranteed not to be touched by SAVE_REST. - */ -#define SAVE_REST \ -.mem.offset 0,0; st8.spill [r2]=r16,16; \ -.mem.offset 8,0; st8.spill [r3]=r17,16; \ - ;; \ -.mem.offset 0,0; st8.spill [r2]=r18,16; \ -.mem.offset 8,0; st8.spill [r3]=r19,16; \ - ;; \ -.mem.offset 0,0; st8.spill [r2]=r20,16; \ -.mem.offset 8,0; st8.spill [r3]=r21,16; \ - mov r18=b6; \ - ;; \ -.mem.offset 0,0; st8.spill [r2]=r22,16; \ -.mem.offset 8,0; st8.spill [r3]=r23,16; \ - mov r19=b7; \ - ;; \ -.mem.offset 0,0; st8.spill [r2]=r24,16; \ -.mem.offset 8,0; st8.spill [r3]=r25,16; \ - ;; \ -.mem.offset 0,0; st8.spill [r2]=r26,16; \ -.mem.offset 8,0; st8.spill [r3]=r27,16; \ - ;; \ -.mem.offset 0,0; st8.spill [r2]=r28,16; \ -.mem.offset 8,0; st8.spill [r3]=r29,16; \ - ;; \ -.mem.offset 0,0; st8.spill [r2]=r30,16; \ -.mem.offset 8,0; st8.spill [r3]=r31,32; \ - ;; \ - mov ar.fpsr=r11; /* M-unit */ \ - st8 [r2]=r8,8; /* ar.ccv */ \ - adds r24=PT(B6)-PT(F7),r3; \ - ;; \ - stf.spill [r2]=f6,32; \ - stf.spill [r3]=f7,32; \ - ;; \ - stf.spill [r2]=f8,32; \ - stf.spill [r3]=f9,32; \ - ;; \ - stf.spill [r2]=f10; \ - stf.spill [r3]=f11; \ - adds r25=PT(B7)-PT(F11),r3; \ - ;; \ - st8 [r24]=r18,16; /* b6 */ \ - st8 [r25]=r19,16; /* b7 */ \ - ;; \ - st8 [r24]=r9; /* ar.csd */ \ - st8 [r25]=r10; /* ar.ssd */ \ - ;; - -#define RSE_WORKAROUND \ -(pUStk) extr.u r17=r18,3,6; \ -(pUStk) sub r16=r18,r22; \ -[1:](pKStk) br.cond.sptk.many 1f; \ - .xdata4 ".data..patch.rse",1b-. \ - ;; \ - cmp.ge p6,p7 = 33,r17; \ - ;; \ -(p6) mov r17=0x310; \ -(p7) mov r17=0x308; \ - ;; \ - cmp.leu p1,p0=r16,r17; \ -(p1) br.cond.sptk.many 1f; \ - dep.z r17=r26,0,62; \ - movl r16=2f; \ - ;; \ - mov ar.pfs=r17; \ - dep r27=r0,r27,16,14; \ - mov b0=r16; \ - ;; \ - br.ret.sptk b0; \ - ;; \ -2: \ - mov ar.rsc=r0 \ - ;; \ - flushrs; \ - ;; \ - mov ar.bspstore=r22 \ - ;; \ - mov r18=ar.bsp; \ - ;; \ -1: \ - .pred.rel "mutex", pKStk, pUStk - -#define SAVE_MIN_WITH_COVER DO_SAVE_MIN(COVER, mov r30=cr.ifs, , RSE_WORKAROUND) -#define SAVE_MIN_WITH_COVER_R19 DO_SAVE_MIN(COVER, mov r30=cr.ifs, mov r15=r19, RSE_WORKAROUND) -#define SAVE_MIN DO_SAVE_MIN( , mov r30=r0, , ) diff --git a/arch/ia64/kernel/module.c b/arch/ia64/kernel/module.c deleted file mode 100644 index 3661135da9d9..000000000000 --- a/arch/ia64/kernel/module.c +++ /dev/null @@ -1,959 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * IA-64-specific support for kernel module loader. - * - * Copyright (C) 2003 Hewlett-Packard Co - * David Mosberger-Tang - * - * Loosely based on patch by Rusty Russell. - */ - -/* relocs tested so far: - - DIR64LSB - FPTR64LSB - GPREL22 - LDXMOV - LDXMOV - LTOFF22 - LTOFF22X - LTOFF22X - LTOFF_FPTR22 - PCREL21B (for br.call only; br.cond is not supported out of modules!) - PCREL60B (for brl.cond only; brl.call is not supported for modules!) - PCREL64LSB - SECREL32LSB - SEGREL64LSB - */ - - -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#define ARCH_MODULE_DEBUG 0 - -#if ARCH_MODULE_DEBUG -# define DEBUGP printk -# define inline -#else -# define DEBUGP(fmt , a...) -#endif - -#ifdef CONFIG_ITANIUM -# define USE_BRL 0 -#else -# define USE_BRL 1 -#endif - -#define MAX_LTOFF ((uint64_t) (1 << 22)) /* max. allowable linkage-table offset */ - -/* Define some relocation helper macros/types: */ - -#define FORMAT_SHIFT 0 -#define FORMAT_BITS 3 -#define FORMAT_MASK ((1 << FORMAT_BITS) - 1) -#define VALUE_SHIFT 3 -#define VALUE_BITS 5 -#define VALUE_MASK ((1 << VALUE_BITS) - 1) - -enum reloc_target_format { - /* direct encoded formats: */ - RF_NONE = 0, - RF_INSN14 = 1, - RF_INSN22 = 2, - RF_INSN64 = 3, - RF_32MSB = 4, - RF_32LSB = 5, - RF_64MSB = 6, - RF_64LSB = 7, - - /* formats that cannot be directly decoded: */ - RF_INSN60, - RF_INSN21B, /* imm21 form 1 */ - RF_INSN21M, /* imm21 form 2 */ - RF_INSN21F /* imm21 form 3 */ -}; - -enum reloc_value_formula { - RV_DIRECT = 4, /* S + A */ - RV_GPREL = 5, /* @gprel(S + A) */ - RV_LTREL = 6, /* @ltoff(S + A) */ - RV_PLTREL = 7, /* @pltoff(S + A) */ - RV_FPTR = 8, /* @fptr(S + A) */ - RV_PCREL = 9, /* S + A - P */ - RV_LTREL_FPTR = 10, /* @ltoff(@fptr(S + A)) */ - RV_SEGREL = 11, /* @segrel(S + A) */ - RV_SECREL = 12, /* @secrel(S + A) */ - RV_BDREL = 13, /* BD + A */ - RV_LTV = 14, /* S + A (like RV_DIRECT, except frozen at static link-time) */ - RV_PCREL2 = 15, /* S + A - P */ - RV_SPECIAL = 16, /* various (see below) */ - RV_RSVD17 = 17, - RV_TPREL = 18, /* @tprel(S + A) */ - RV_LTREL_TPREL = 19, /* @ltoff(@tprel(S + A)) */ - RV_DTPMOD = 20, /* @dtpmod(S + A) */ - RV_LTREL_DTPMOD = 21, /* @ltoff(@dtpmod(S + A)) */ - RV_DTPREL = 22, /* @dtprel(S + A) */ - RV_LTREL_DTPREL = 23, /* @ltoff(@dtprel(S + A)) */ - RV_RSVD24 = 24, - RV_RSVD25 = 25, - RV_RSVD26 = 26, - RV_RSVD27 = 27 - /* 28-31 reserved for implementation-specific purposes. */ -}; - -#define N(reloc) [R_IA64_##reloc] = #reloc - -static const char *reloc_name[256] = { - N(NONE), N(IMM14), N(IMM22), N(IMM64), - N(DIR32MSB), N(DIR32LSB), N(DIR64MSB), N(DIR64LSB), - N(GPREL22), N(GPREL64I), N(GPREL32MSB), N(GPREL32LSB), - N(GPREL64MSB), N(GPREL64LSB), N(LTOFF22), N(LTOFF64I), - N(PLTOFF22), N(PLTOFF64I), N(PLTOFF64MSB), N(PLTOFF64LSB), - N(FPTR64I), N(FPTR32MSB), N(FPTR32LSB), N(FPTR64MSB), - N(FPTR64LSB), N(PCREL60B), N(PCREL21B), N(PCREL21M), - N(PCREL21F), N(PCREL32MSB), N(PCREL32LSB), N(PCREL64MSB), - N(PCREL64LSB), N(LTOFF_FPTR22), N(LTOFF_FPTR64I), N(LTOFF_FPTR32MSB), - N(LTOFF_FPTR32LSB), N(LTOFF_FPTR64MSB), N(LTOFF_FPTR64LSB), N(SEGREL32MSB), - N(SEGREL32LSB), N(SEGREL64MSB), N(SEGREL64LSB), N(SECREL32MSB), - N(SECREL32LSB), N(SECREL64MSB), N(SECREL64LSB), N(REL32MSB), - N(REL32LSB), N(REL64MSB), N(REL64LSB), N(LTV32MSB), - N(LTV32LSB), N(LTV64MSB), N(LTV64LSB), N(PCREL21BI), - N(PCREL22), N(PCREL64I), N(IPLTMSB), N(IPLTLSB), - N(COPY), N(LTOFF22X), N(LDXMOV), N(TPREL14), - N(TPREL22), N(TPREL64I), N(TPREL64MSB), N(TPREL64LSB), - N(LTOFF_TPREL22), N(DTPMOD64MSB), N(DTPMOD64LSB), N(LTOFF_DTPMOD22), - N(DTPREL14), N(DTPREL22), N(DTPREL64I), N(DTPREL32MSB), - N(DTPREL32LSB), N(DTPREL64MSB), N(DTPREL64LSB), N(LTOFF_DTPREL22) -}; - -#undef N - -/* Opaque struct for insns, to protect against derefs. */ -struct insn; - -static inline uint64_t -bundle (const struct insn *insn) -{ - return (uint64_t) insn & ~0xfUL; -} - -static inline int -slot (const struct insn *insn) -{ - return (uint64_t) insn & 0x3; -} - -static int -apply_imm64 (struct module *mod, struct insn *insn, uint64_t val) -{ - if (slot(insn) != 1 && slot(insn) != 2) { - printk(KERN_ERR "%s: invalid slot number %d for IMM64\n", - mod->name, slot(insn)); - return 0; - } - ia64_patch_imm64((u64) insn, val); - return 1; -} - -static int -apply_imm60 (struct module *mod, struct insn *insn, uint64_t val) -{ - if (slot(insn) != 1 && slot(insn) != 2) { - printk(KERN_ERR "%s: invalid slot number %d for IMM60\n", - mod->name, slot(insn)); - return 0; - } - if (val + ((uint64_t) 1 << 59) >= (1UL << 60)) { - printk(KERN_ERR "%s: value %ld out of IMM60 range\n", - mod->name, (long) val); - return 0; - } - ia64_patch_imm60((u64) insn, val); - return 1; -} - -static int -apply_imm22 (struct module *mod, struct insn *insn, uint64_t val) -{ - if (val + (1 << 21) >= (1 << 22)) { - printk(KERN_ERR "%s: value %li out of IMM22 range\n", - mod->name, (long)val); - return 0; - } - ia64_patch((u64) insn, 0x01fffcfe000UL, ( ((val & 0x200000UL) << 15) /* bit 21 -> 36 */ - | ((val & 0x1f0000UL) << 6) /* bit 16 -> 22 */ - | ((val & 0x00ff80UL) << 20) /* bit 7 -> 27 */ - | ((val & 0x00007fUL) << 13) /* bit 0 -> 13 */)); - return 1; -} - -static int -apply_imm21b (struct module *mod, struct insn *insn, uint64_t val) -{ - if (val + (1 << 20) >= (1 << 21)) { - printk(KERN_ERR "%s: value %li out of IMM21b range\n", - mod->name, (long)val); - return 0; - } - ia64_patch((u64) insn, 0x11ffffe000UL, ( ((val & 0x100000UL) << 16) /* bit 20 -> 36 */ - | ((val & 0x0fffffUL) << 13) /* bit 0 -> 13 */)); - return 1; -} - -#if USE_BRL - -struct plt_entry { - /* Three instruction bundles in PLT. */ - unsigned char bundle[2][16]; -}; - -static const struct plt_entry ia64_plt_template = { - { - { - 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, /* [MLX] nop.m 0 */ - 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, /* movl gp=TARGET_GP */ - 0x00, 0x00, 0x00, 0x60 - }, - { - 0x05, 0x00, 0x00, 0x00, 0x01, 0x00, /* [MLX] nop.m 0 */ - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* brl.many gp=TARGET_GP */ - 0x08, 0x00, 0x00, 0xc0 - } - } -}; - -static int -patch_plt (struct module *mod, struct plt_entry *plt, long target_ip, unsigned long target_gp) -{ - if (apply_imm64(mod, (struct insn *) (plt->bundle[0] + 2), target_gp) - && apply_imm60(mod, (struct insn *) (plt->bundle[1] + 2), - (target_ip - (int64_t) plt->bundle[1]) / 16)) - return 1; - return 0; -} - -unsigned long -plt_target (struct plt_entry *plt) -{ - uint64_t b0, b1, *b = (uint64_t *) plt->bundle[1]; - long off; - - b0 = b[0]; b1 = b[1]; - off = ( ((b1 & 0x00fffff000000000UL) >> 36) /* imm20b -> bit 0 */ - | ((b0 >> 48) << 20) | ((b1 & 0x7fffffUL) << 36) /* imm39 -> bit 20 */ - | ((b1 & 0x0800000000000000UL) << 0)); /* i -> bit 59 */ - return (long) plt->bundle[1] + 16*off; -} - -#else /* !USE_BRL */ - -struct plt_entry { - /* Three instruction bundles in PLT. */ - unsigned char bundle[3][16]; -}; - -static const struct plt_entry ia64_plt_template = { - { - { - 0x05, 0x00, 0x00, 0x00, 0x01, 0x00, /* [MLX] nop.m 0 */ - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* movl r16=TARGET_IP */ - 0x02, 0x00, 0x00, 0x60 - }, - { - 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, /* [MLX] nop.m 0 */ - 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, /* movl gp=TARGET_GP */ - 0x00, 0x00, 0x00, 0x60 - }, - { - 0x11, 0x00, 0x00, 0x00, 0x01, 0x00, /* [MIB] nop.m 0 */ - 0x60, 0x80, 0x04, 0x80, 0x03, 0x00, /* mov b6=r16 */ - 0x60, 0x00, 0x80, 0x00 /* br.few b6 */ - } - } -}; - -static int -patch_plt (struct module *mod, struct plt_entry *plt, long target_ip, unsigned long target_gp) -{ - if (apply_imm64(mod, (struct insn *) (plt->bundle[0] + 2), target_ip) - && apply_imm64(mod, (struct insn *) (plt->bundle[1] + 2), target_gp)) - return 1; - return 0; -} - -unsigned long -plt_target (struct plt_entry *plt) -{ - uint64_t b0, b1, *b = (uint64_t *) plt->bundle[0]; - - b0 = b[0]; b1 = b[1]; - return ( ((b1 & 0x000007f000000000) >> 36) /* imm7b -> bit 0 */ - | ((b1 & 0x07fc000000000000) >> 43) /* imm9d -> bit 7 */ - | ((b1 & 0x0003e00000000000) >> 29) /* imm5c -> bit 16 */ - | ((b1 & 0x0000100000000000) >> 23) /* ic -> bit 21 */ - | ((b0 >> 46) << 22) | ((b1 & 0x7fffff) << 40) /* imm41 -> bit 22 */ - | ((b1 & 0x0800000000000000) << 4)); /* i -> bit 63 */ -} - -#endif /* !USE_BRL */ - -void -module_arch_freeing_init (struct module *mod) -{ - if (mod->arch.init_unw_table) { - unw_remove_unwind_table(mod->arch.init_unw_table); - mod->arch.init_unw_table = NULL; - } -} - -/* Have we already seen one of these relocations? */ -/* FIXME: we could look in other sections, too --RR */ -static int -duplicate_reloc (const Elf64_Rela *rela, unsigned int num) -{ - unsigned int i; - - for (i = 0; i < num; i++) { - if (rela[i].r_info == rela[num].r_info && rela[i].r_addend == rela[num].r_addend) - return 1; - } - return 0; -} - -/* Count how many GOT entries we may need */ -static unsigned int -count_gots (const Elf64_Rela *rela, unsigned int num) -{ - unsigned int i, ret = 0; - - /* Sure, this is order(n^2), but it's usually short, and not - time critical */ - for (i = 0; i < num; i++) { - switch (ELF64_R_TYPE(rela[i].r_info)) { - case R_IA64_LTOFF22: - case R_IA64_LTOFF22X: - case R_IA64_LTOFF64I: - case R_IA64_LTOFF_FPTR22: - case R_IA64_LTOFF_FPTR64I: - case R_IA64_LTOFF_FPTR32MSB: - case R_IA64_LTOFF_FPTR32LSB: - case R_IA64_LTOFF_FPTR64MSB: - case R_IA64_LTOFF_FPTR64LSB: - if (!duplicate_reloc(rela, i)) - ret++; - break; - } - } - return ret; -} - -/* Count how many PLT entries we may need */ -static unsigned int -count_plts (const Elf64_Rela *rela, unsigned int num) -{ - unsigned int i, ret = 0; - - /* Sure, this is order(n^2), but it's usually short, and not - time critical */ - for (i = 0; i < num; i++) { - switch (ELF64_R_TYPE(rela[i].r_info)) { - case R_IA64_PCREL21B: - case R_IA64_PLTOFF22: - case R_IA64_PLTOFF64I: - case R_IA64_PLTOFF64MSB: - case R_IA64_PLTOFF64LSB: - case R_IA64_IPLTMSB: - case R_IA64_IPLTLSB: - if (!duplicate_reloc(rela, i)) - ret++; - break; - } - } - return ret; -} - -/* We need to create an function-descriptors for any internal function - which is referenced. */ -static unsigned int -count_fdescs (const Elf64_Rela *rela, unsigned int num) -{ - unsigned int i, ret = 0; - - /* Sure, this is order(n^2), but it's usually short, and not time critical. */ - for (i = 0; i < num; i++) { - switch (ELF64_R_TYPE(rela[i].r_info)) { - case R_IA64_FPTR64I: - case R_IA64_FPTR32LSB: - case R_IA64_FPTR32MSB: - case R_IA64_FPTR64LSB: - case R_IA64_FPTR64MSB: - case R_IA64_LTOFF_FPTR22: - case R_IA64_LTOFF_FPTR32LSB: - case R_IA64_LTOFF_FPTR32MSB: - case R_IA64_LTOFF_FPTR64I: - case R_IA64_LTOFF_FPTR64LSB: - case R_IA64_LTOFF_FPTR64MSB: - case R_IA64_IPLTMSB: - case R_IA64_IPLTLSB: - /* - * Jumps to static functions sometimes go straight to their - * offset. Of course, that may not be possible if the jump is - * from init -> core or vice. versa, so we need to generate an - * FDESC (and PLT etc) for that. - */ - case R_IA64_PCREL21B: - if (!duplicate_reloc(rela, i)) - ret++; - break; - } - } - return ret; -} - -int -module_frob_arch_sections (Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, char *secstrings, - struct module *mod) -{ - unsigned long core_plts = 0, init_plts = 0, gots = 0, fdescs = 0; - Elf64_Shdr *s, *sechdrs_end = sechdrs + ehdr->e_shnum; - - /* - * To store the PLTs and function-descriptors, we expand the .text section for - * core module-code and the .init.text section for initialization code. - */ - for (s = sechdrs; s < sechdrs_end; ++s) - if (strcmp(".core.plt", secstrings + s->sh_name) == 0) - mod->arch.core_plt = s; - else if (strcmp(".init.plt", secstrings + s->sh_name) == 0) - mod->arch.init_plt = s; - else if (strcmp(".got", secstrings + s->sh_name) == 0) - mod->arch.got = s; - else if (strcmp(".opd", secstrings + s->sh_name) == 0) - mod->arch.opd = s; - else if (strcmp(".IA_64.unwind", secstrings + s->sh_name) == 0) - mod->arch.unwind = s; - - if (!mod->arch.core_plt || !mod->arch.init_plt || !mod->arch.got || !mod->arch.opd) { - printk(KERN_ERR "%s: sections missing\n", mod->name); - return -ENOEXEC; - } - - /* GOT and PLTs can occur in any relocated section... */ - for (s = sechdrs + 1; s < sechdrs_end; ++s) { - const Elf64_Rela *rels = (void *)ehdr + s->sh_offset; - unsigned long numrels = s->sh_size/sizeof(Elf64_Rela); - - if (s->sh_type != SHT_RELA) - continue; - - gots += count_gots(rels, numrels); - fdescs += count_fdescs(rels, numrels); - if (strstr(secstrings + s->sh_name, ".init")) - init_plts += count_plts(rels, numrels); - else - core_plts += count_plts(rels, numrels); - } - - mod->arch.core_plt->sh_type = SHT_NOBITS; - mod->arch.core_plt->sh_flags = SHF_EXECINSTR | SHF_ALLOC; - mod->arch.core_plt->sh_addralign = 16; - mod->arch.core_plt->sh_size = core_plts * sizeof(struct plt_entry); - mod->arch.init_plt->sh_type = SHT_NOBITS; - mod->arch.init_plt->sh_flags = SHF_EXECINSTR | SHF_ALLOC; - mod->arch.init_plt->sh_addralign = 16; - mod->arch.init_plt->sh_size = init_plts * sizeof(struct plt_entry); - mod->arch.got->sh_type = SHT_NOBITS; - mod->arch.got->sh_flags = ARCH_SHF_SMALL | SHF_ALLOC; - mod->arch.got->sh_addralign = 8; - mod->arch.got->sh_size = gots * sizeof(struct got_entry); - mod->arch.opd->sh_type = SHT_NOBITS; - mod->arch.opd->sh_flags = SHF_ALLOC; - mod->arch.opd->sh_addralign = 8; - mod->arch.opd->sh_size = fdescs * sizeof(struct fdesc); - DEBUGP("%s: core.plt=%lx, init.plt=%lx, got=%lx, fdesc=%lx\n", - __func__, mod->arch.core_plt->sh_size, mod->arch.init_plt->sh_size, - mod->arch.got->sh_size, mod->arch.opd->sh_size); - return 0; -} - -static inline bool -in_init (const struct module *mod, uint64_t addr) -{ - return within_module_init(addr, mod); -} - -static inline bool -in_core (const struct module *mod, uint64_t addr) -{ - return within_module_core(addr, mod); -} - -static inline bool -is_internal (const struct module *mod, uint64_t value) -{ - return in_init(mod, value) || in_core(mod, value); -} - -/* - * Get gp-relative offset for the linkage-table entry of VALUE. - */ -static uint64_t -get_ltoff (struct module *mod, uint64_t value, int *okp) -{ - struct got_entry *got, *e; - - if (!*okp) - return 0; - - got = (void *) mod->arch.got->sh_addr; - for (e = got; e < got + mod->arch.next_got_entry; ++e) - if (e->val == value) - goto found; - - /* Not enough GOT entries? */ - BUG_ON(e >= (struct got_entry *) (mod->arch.got->sh_addr + mod->arch.got->sh_size)); - - e->val = value; - ++mod->arch.next_got_entry; - found: - return (uint64_t) e - mod->arch.gp; -} - -static inline int -gp_addressable (struct module *mod, uint64_t value) -{ - return value - mod->arch.gp + MAX_LTOFF/2 < MAX_LTOFF; -} - -/* Get PC-relative PLT entry for this value. Returns 0 on failure. */ -static uint64_t -get_plt (struct module *mod, const struct insn *insn, uint64_t value, int *okp) -{ - struct plt_entry *plt, *plt_end; - uint64_t target_ip, target_gp; - - if (!*okp) - return 0; - - if (in_init(mod, (uint64_t) insn)) { - plt = (void *) mod->arch.init_plt->sh_addr; - plt_end = (void *) plt + mod->arch.init_plt->sh_size; - } else { - plt = (void *) mod->arch.core_plt->sh_addr; - plt_end = (void *) plt + mod->arch.core_plt->sh_size; - } - - /* "value" is a pointer to a function-descriptor; fetch the target ip/gp from it: */ - target_ip = ((uint64_t *) value)[0]; - target_gp = ((uint64_t *) value)[1]; - - /* Look for existing PLT entry. */ - while (plt->bundle[0][0]) { - if (plt_target(plt) == target_ip) - goto found; - if (++plt >= plt_end) - BUG(); - } - *plt = ia64_plt_template; - if (!patch_plt(mod, plt, target_ip, target_gp)) { - *okp = 0; - return 0; - } -#if ARCH_MODULE_DEBUG - if (plt_target(plt) != target_ip) { - printk("%s: mistargeted PLT: wanted %lx, got %lx\n", - __func__, target_ip, plt_target(plt)); - *okp = 0; - return 0; - } -#endif - found: - return (uint64_t) plt; -} - -/* Get function descriptor for VALUE. */ -static uint64_t -get_fdesc (struct module *mod, uint64_t value, int *okp) -{ - struct fdesc *fdesc = (void *) mod->arch.opd->sh_addr; - - if (!*okp) - return 0; - - if (!value) { - printk(KERN_ERR "%s: fdesc for zero requested!\n", mod->name); - return 0; - } - - if (!is_internal(mod, value)) - /* - * If it's not a module-local entry-point, "value" already points to a - * function-descriptor. - */ - return value; - - /* Look for existing function descriptor. */ - while (fdesc->addr) { - if (fdesc->addr == value) - return (uint64_t)fdesc; - if ((uint64_t) ++fdesc >= mod->arch.opd->sh_addr + mod->arch.opd->sh_size) - BUG(); - } - - /* Create new one */ - fdesc->addr = value; - fdesc->gp = mod->arch.gp; - return (uint64_t) fdesc; -} - -static inline int -do_reloc (struct module *mod, uint8_t r_type, Elf64_Sym *sym, uint64_t addend, - Elf64_Shdr *sec, void *location) -{ - enum reloc_target_format format = (r_type >> FORMAT_SHIFT) & FORMAT_MASK; - enum reloc_value_formula formula = (r_type >> VALUE_SHIFT) & VALUE_MASK; - uint64_t val; - int ok = 1; - - val = sym->st_value + addend; - - switch (formula) { - case RV_SEGREL: /* segment base is arbitrarily chosen to be 0 for kernel modules */ - case RV_DIRECT: - break; - - case RV_GPREL: val -= mod->arch.gp; break; - case RV_LTREL: val = get_ltoff(mod, val, &ok); break; - case RV_PLTREL: val = get_plt(mod, location, val, &ok); break; - case RV_FPTR: val = get_fdesc(mod, val, &ok); break; - case RV_SECREL: val -= sec->sh_addr; break; - case RV_LTREL_FPTR: val = get_ltoff(mod, get_fdesc(mod, val, &ok), &ok); break; - - case RV_PCREL: - switch (r_type) { - case R_IA64_PCREL21B: - if ((in_init(mod, val) && in_core(mod, (uint64_t)location)) || - (in_core(mod, val) && in_init(mod, (uint64_t)location))) { - /* - * Init section may have been allocated far away from core, - * if the branch won't reach, then allocate a plt for it. - */ - uint64_t delta = ((int64_t)val - (int64_t)location) / 16; - if (delta + (1 << 20) >= (1 << 21)) { - val = get_fdesc(mod, val, &ok); - val = get_plt(mod, location, val, &ok); - } - } else if (!is_internal(mod, val)) - val = get_plt(mod, location, val, &ok); - fallthrough; - default: - val -= bundle(location); - break; - - case R_IA64_PCREL32MSB: - case R_IA64_PCREL32LSB: - case R_IA64_PCREL64MSB: - case R_IA64_PCREL64LSB: - val -= (uint64_t) location; - break; - - } - switch (r_type) { - case R_IA64_PCREL60B: format = RF_INSN60; break; - case R_IA64_PCREL21B: format = RF_INSN21B; break; - case R_IA64_PCREL21M: format = RF_INSN21M; break; - case R_IA64_PCREL21F: format = RF_INSN21F; break; - default: break; - } - break; - - case RV_BDREL: - val -= (uint64_t) (in_init(mod, val) ? mod->mem[MOD_INIT_TEXT].base : - mod->mem[MOD_TEXT].base); - break; - - case RV_LTV: - /* can link-time value relocs happen here? */ - BUG(); - break; - - case RV_PCREL2: - if (r_type == R_IA64_PCREL21BI) { - if (!is_internal(mod, val)) { - printk(KERN_ERR "%s: %s reloc against " - "non-local symbol (%lx)\n", __func__, - reloc_name[r_type], (unsigned long)val); - return -ENOEXEC; - } - format = RF_INSN21B; - } - val -= bundle(location); - break; - - case RV_SPECIAL: - switch (r_type) { - case R_IA64_IPLTMSB: - case R_IA64_IPLTLSB: - val = get_fdesc(mod, get_plt(mod, location, val, &ok), &ok); - format = RF_64LSB; - if (r_type == R_IA64_IPLTMSB) - format = RF_64MSB; - break; - - case R_IA64_SUB: - val = addend - sym->st_value; - format = RF_INSN64; - break; - - case R_IA64_LTOFF22X: - if (gp_addressable(mod, val)) - val -= mod->arch.gp; - else - val = get_ltoff(mod, val, &ok); - format = RF_INSN22; - break; - - case R_IA64_LDXMOV: - if (gp_addressable(mod, val)) { - /* turn "ld8" into "mov": */ - DEBUGP("%s: patching ld8 at %p to mov\n", __func__, location); - ia64_patch((u64) location, 0x1fff80fe000UL, 0x10000000000UL); - } - return 0; - - default: - if (reloc_name[r_type]) - printk(KERN_ERR "%s: special reloc %s not supported", - mod->name, reloc_name[r_type]); - else - printk(KERN_ERR "%s: unknown special reloc %x\n", - mod->name, r_type); - return -ENOEXEC; - } - break; - - case RV_TPREL: - case RV_LTREL_TPREL: - case RV_DTPMOD: - case RV_LTREL_DTPMOD: - case RV_DTPREL: - case RV_LTREL_DTPREL: - printk(KERN_ERR "%s: %s reloc not supported\n", - mod->name, reloc_name[r_type] ? reloc_name[r_type] : "?"); - return -ENOEXEC; - - default: - printk(KERN_ERR "%s: unknown reloc %x\n", mod->name, r_type); - return -ENOEXEC; - } - - if (!ok) - return -ENOEXEC; - - DEBUGP("%s: [%p]<-%016lx = %s(%lx)\n", __func__, location, val, - reloc_name[r_type] ? reloc_name[r_type] : "?", sym->st_value + addend); - - switch (format) { - case RF_INSN21B: ok = apply_imm21b(mod, location, (int64_t) val / 16); break; - case RF_INSN22: ok = apply_imm22(mod, location, val); break; - case RF_INSN64: ok = apply_imm64(mod, location, val); break; - case RF_INSN60: ok = apply_imm60(mod, location, (int64_t) val / 16); break; - case RF_32LSB: put_unaligned(val, (uint32_t *) location); break; - case RF_64LSB: put_unaligned(val, (uint64_t *) location); break; - case RF_32MSB: /* ia64 Linux is little-endian... */ - case RF_64MSB: /* ia64 Linux is little-endian... */ - case RF_INSN14: /* must be within-module, i.e., resolved by "ld -r" */ - case RF_INSN21M: /* must be within-module, i.e., resolved by "ld -r" */ - case RF_INSN21F: /* must be within-module, i.e., resolved by "ld -r" */ - printk(KERN_ERR "%s: format %u needed by %s reloc is not supported\n", - mod->name, format, reloc_name[r_type] ? reloc_name[r_type] : "?"); - return -ENOEXEC; - - default: - printk(KERN_ERR "%s: relocation %s resulted in unknown format %u\n", - mod->name, reloc_name[r_type] ? reloc_name[r_type] : "?", format); - return -ENOEXEC; - } - return ok ? 0 : -ENOEXEC; -} - -int -apply_relocate_add (Elf64_Shdr *sechdrs, const char *strtab, unsigned int symindex, - unsigned int relsec, struct module *mod) -{ - unsigned int i, n = sechdrs[relsec].sh_size / sizeof(Elf64_Rela); - Elf64_Rela *rela = (void *) sechdrs[relsec].sh_addr; - Elf64_Shdr *target_sec; - int ret; - - DEBUGP("%s: applying section %u (%u relocs) to %u\n", __func__, - relsec, n, sechdrs[relsec].sh_info); - - target_sec = sechdrs + sechdrs[relsec].sh_info; - - if (target_sec->sh_entsize == ~0UL) - /* - * If target section wasn't allocated, we don't need to relocate it. - * Happens, e.g., for debug sections. - */ - return 0; - - if (!mod->arch.gp) { - /* - * XXX Should have an arch-hook for running this after final section - * addresses have been selected... - */ - uint64_t gp; - struct module_memory *mod_mem; - - mod_mem = &mod->mem[MOD_DATA]; - if (mod_mem->size > MAX_LTOFF) - /* - * This takes advantage of fact that SHF_ARCH_SMALL gets allocated - * at the end of the module. - */ - gp = mod_mem->size - MAX_LTOFF / 2; - else - gp = mod_mem->size / 2; - gp = (uint64_t) mod_mem->base + ((gp + 7) & -8); - mod->arch.gp = gp; - DEBUGP("%s: placing gp at 0x%lx\n", __func__, gp); - } - - for (i = 0; i < n; i++) { - ret = do_reloc(mod, ELF64_R_TYPE(rela[i].r_info), - ((Elf64_Sym *) sechdrs[symindex].sh_addr - + ELF64_R_SYM(rela[i].r_info)), - rela[i].r_addend, target_sec, - (void *) target_sec->sh_addr + rela[i].r_offset); - if (ret < 0) - return ret; - } - return 0; -} - -/* - * Modules contain a single unwind table which covers both the core and the init text - * sections but since the two are not contiguous, we need to split this table up such that - * we can register (and unregister) each "segment" separately. Fortunately, this sounds - * more complicated than it really is. - */ -static void -register_unwind_table (struct module *mod) -{ - struct unw_table_entry *start = (void *) mod->arch.unwind->sh_addr; - struct unw_table_entry *end = start + mod->arch.unwind->sh_size / sizeof (*start); - struct unw_table_entry *e1, *e2, *core, *init; - unsigned long num_init = 0, num_core = 0; - - /* First, count how many init and core unwind-table entries there are. */ - for (e1 = start; e1 < end; ++e1) - if (in_init(mod, e1->start_offset)) - ++num_init; - else - ++num_core; - /* - * Second, sort the table such that all unwind-table entries for the init and core - * text sections are nicely separated. We do this with a stupid bubble sort - * (unwind tables don't get ridiculously huge). - */ - for (e1 = start; e1 < end; ++e1) { - for (e2 = e1 + 1; e2 < end; ++e2) { - if (e2->start_offset < e1->start_offset) { - swap(*e1, *e2); - } - } - } - /* - * Third, locate the init and core segments in the unwind table: - */ - if (in_init(mod, start->start_offset)) { - init = start; - core = start + num_init; - } else { - core = start; - init = start + num_core; - } - - DEBUGP("%s: name=%s, gp=%lx, num_init=%lu, num_core=%lu\n", __func__, - mod->name, mod->arch.gp, num_init, num_core); - - /* - * Fourth, register both tables (if not empty). - */ - if (num_core > 0) { - mod->arch.core_unw_table = unw_add_unwind_table(mod->name, 0, mod->arch.gp, - core, core + num_core); - DEBUGP("%s: core: handle=%p [%p-%p)\n", __func__, - mod->arch.core_unw_table, core, core + num_core); - } - if (num_init > 0) { - mod->arch.init_unw_table = unw_add_unwind_table(mod->name, 0, mod->arch.gp, - init, init + num_init); - DEBUGP("%s: init: handle=%p [%p-%p)\n", __func__, - mod->arch.init_unw_table, init, init + num_init); - } -} - -int -module_finalize (const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, struct module *mod) -{ - struct mod_arch_specific *mas = &mod->arch; - - DEBUGP("%s: init: entry=%p\n", __func__, mod->init); - if (mas->unwind) - register_unwind_table(mod); - - /* - * ".opd" was already relocated to the final destination. Store - * it's address for use in symbolizer. - */ - mas->opd_addr = (void *)mas->opd->sh_addr; - mas->opd_size = mas->opd->sh_size; - - /* - * Module relocation was already done at this point. Section - * headers are about to be deleted. Wipe out load-time context. - */ - mas->core_plt = NULL; - mas->init_plt = NULL; - mas->got = NULL; - mas->opd = NULL; - mas->unwind = NULL; - mas->gp = 0; - mas->next_got_entry = 0; - - return 0; -} - -void -module_arch_cleanup (struct module *mod) -{ - if (mod->arch.init_unw_table) { - unw_remove_unwind_table(mod->arch.init_unw_table); - mod->arch.init_unw_table = NULL; - } - if (mod->arch.core_unw_table) { - unw_remove_unwind_table(mod->arch.core_unw_table); - mod->arch.core_unw_table = NULL; - } -} - -void *dereference_module_function_descriptor(struct module *mod, void *ptr) -{ - struct mod_arch_specific *mas = &mod->arch; - - if (ptr < mas->opd_addr || ptr >= mas->opd_addr + mas->opd_size) - return ptr; - - return dereference_function_descriptor(ptr); -} diff --git a/arch/ia64/kernel/msi_ia64.c b/arch/ia64/kernel/msi_ia64.c deleted file mode 100644 index 025e5133c860..000000000000 --- a/arch/ia64/kernel/msi_ia64.c +++ /dev/null @@ -1,198 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * MSI hooks for standard x86 apic - */ - -#include -#include -#include -#include -#include -#include - -static struct irq_chip ia64_msi_chip; - -#ifdef CONFIG_SMP -static int ia64_set_msi_irq_affinity(struct irq_data *idata, - const cpumask_t *cpu_mask, bool force) -{ - struct msi_msg msg; - u32 addr, data; - int cpu = cpumask_first_and(cpu_mask, cpu_online_mask); - unsigned int irq = idata->irq; - - if (irq_prepare_move(irq, cpu)) - return -1; - - __get_cached_msi_msg(irq_data_get_msi_desc(idata), &msg); - - addr = msg.address_lo; - addr &= MSI_ADDR_DEST_ID_MASK; - addr |= MSI_ADDR_DEST_ID_CPU(cpu_physical_id(cpu)); - msg.address_lo = addr; - - data = msg.data; - data &= MSI_DATA_VECTOR_MASK; - data |= MSI_DATA_VECTOR(irq_to_vector(irq)); - msg.data = data; - - pci_write_msi_msg(irq, &msg); - irq_data_update_affinity(idata, cpumask_of(cpu)); - - return 0; -} -#endif /* CONFIG_SMP */ - -int arch_setup_msi_irq(struct pci_dev *pdev, struct msi_desc *desc) -{ - struct msi_msg msg; - unsigned long dest_phys_id; - int irq, vector; - - irq = create_irq(); - if (irq < 0) - return irq; - - irq_set_msi_desc(irq, desc); - dest_phys_id = cpu_physical_id(cpumask_any_and(&(irq_to_domain(irq)), - cpu_online_mask)); - vector = irq_to_vector(irq); - - msg.address_hi = 0; - msg.address_lo = - MSI_ADDR_HEADER | - MSI_ADDR_DEST_MODE_PHYS | - MSI_ADDR_REDIRECTION_CPU | - MSI_ADDR_DEST_ID_CPU(dest_phys_id); - - msg.data = - MSI_DATA_TRIGGER_EDGE | - MSI_DATA_LEVEL_ASSERT | - MSI_DATA_DELIVERY_FIXED | - MSI_DATA_VECTOR(vector); - - pci_write_msi_msg(irq, &msg); - irq_set_chip_and_handler(irq, &ia64_msi_chip, handle_edge_irq); - - return 0; -} - -void arch_teardown_msi_irq(unsigned int irq) -{ - destroy_irq(irq); -} - -static void ia64_ack_msi_irq(struct irq_data *data) -{ - irq_complete_move(data->irq); - irq_move_irq(data); - ia64_eoi(); -} - -static int ia64_msi_retrigger_irq(struct irq_data *data) -{ - unsigned int vector = irq_to_vector(data->irq); - ia64_resend_irq(vector); - - return 1; -} - -/* - * Generic ops used on most IA64 platforms. - */ -static struct irq_chip ia64_msi_chip = { - .name = "PCI-MSI", - .irq_mask = pci_msi_mask_irq, - .irq_unmask = pci_msi_unmask_irq, - .irq_ack = ia64_ack_msi_irq, -#ifdef CONFIG_SMP - .irq_set_affinity = ia64_set_msi_irq_affinity, -#endif - .irq_retrigger = ia64_msi_retrigger_irq, -}; - -#ifdef CONFIG_INTEL_IOMMU -#ifdef CONFIG_SMP -static int dmar_msi_set_affinity(struct irq_data *data, - const struct cpumask *mask, bool force) -{ - unsigned int irq = data->irq; - struct irq_cfg *cfg = irq_cfg + irq; - struct msi_msg msg; - int cpu = cpumask_first_and(mask, cpu_online_mask); - - if (irq_prepare_move(irq, cpu)) - return -1; - - dmar_msi_read(irq, &msg); - - msg.data &= ~MSI_DATA_VECTOR_MASK; - msg.data |= MSI_DATA_VECTOR(cfg->vector); - msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK; - msg.address_lo |= MSI_ADDR_DEST_ID_CPU(cpu_physical_id(cpu)); - - dmar_msi_write(irq, &msg); - irq_data_update_affinity(data, mask); - - return 0; -} -#endif /* CONFIG_SMP */ - -static struct irq_chip dmar_msi_type = { - .name = "DMAR_MSI", - .irq_unmask = dmar_msi_unmask, - .irq_mask = dmar_msi_mask, - .irq_ack = ia64_ack_msi_irq, -#ifdef CONFIG_SMP - .irq_set_affinity = dmar_msi_set_affinity, -#endif - .irq_retrigger = ia64_msi_retrigger_irq, -}; - -static void -msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg) -{ - struct irq_cfg *cfg = irq_cfg + irq; - unsigned dest; - - dest = cpu_physical_id(cpumask_first_and(&(irq_to_domain(irq)), - cpu_online_mask)); - - msg->address_hi = 0; - msg->address_lo = - MSI_ADDR_HEADER | - MSI_ADDR_DEST_MODE_PHYS | - MSI_ADDR_REDIRECTION_CPU | - MSI_ADDR_DEST_ID_CPU(dest); - - msg->data = - MSI_DATA_TRIGGER_EDGE | - MSI_DATA_LEVEL_ASSERT | - MSI_DATA_DELIVERY_FIXED | - MSI_DATA_VECTOR(cfg->vector); -} - -int dmar_alloc_hwirq(int id, int node, void *arg) -{ - int irq; - struct msi_msg msg; - - irq = create_irq(); - if (irq > 0) { - irq_set_handler_data(irq, arg); - irq_set_chip_and_handler_name(irq, &dmar_msi_type, - handle_edge_irq, "edge"); - msi_compose_msg(NULL, irq, &msg); - dmar_msi_write(irq, &msg); - } - - return irq; -} - -void dmar_free_hwirq(int irq) -{ - irq_set_handler_data(irq, NULL); - destroy_irq(irq); -} -#endif /* CONFIG_INTEL_IOMMU */ - diff --git a/arch/ia64/kernel/numa.c b/arch/ia64/kernel/numa.c deleted file mode 100644 index 8a959f20662d..000000000000 --- a/arch/ia64/kernel/numa.c +++ /dev/null @@ -1,73 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * - * ia64 kernel NUMA specific stuff - * - * Copyright (C) 2002 Erich Focht - * Copyright (C) 2004 Silicon Graphics, Inc. - * Jesse Barnes - */ -#include -#include -#include -#include - -u16 cpu_to_node_map[NR_CPUS] __cacheline_aligned; -EXPORT_SYMBOL(cpu_to_node_map); - -cpumask_t node_to_cpu_mask[MAX_NUMNODES] __cacheline_aligned; -EXPORT_SYMBOL(node_to_cpu_mask); - -void map_cpu_to_node(int cpu, int nid) -{ - int oldnid; - if (nid < 0) { /* just initialize by zero */ - cpu_to_node_map[cpu] = 0; - return; - } - /* sanity check first */ - oldnid = cpu_to_node_map[cpu]; - if (cpumask_test_cpu(cpu, &node_to_cpu_mask[oldnid])) { - return; /* nothing to do */ - } - /* we don't have cpu-driven node hot add yet... - In usual case, node is created from SRAT at boot time. */ - if (!node_online(nid)) - nid = first_online_node; - cpu_to_node_map[cpu] = nid; - cpumask_set_cpu(cpu, &node_to_cpu_mask[nid]); - return; -} - -void unmap_cpu_from_node(int cpu, int nid) -{ - WARN_ON(!cpumask_test_cpu(cpu, &node_to_cpu_mask[nid])); - WARN_ON(cpu_to_node_map[cpu] != nid); - cpu_to_node_map[cpu] = 0; - cpumask_clear_cpu(cpu, &node_to_cpu_mask[nid]); -} - - -/** - * build_cpu_to_node_map - setup cpu to node and node to cpumask arrays - * - * Build cpu to node mapping and initialize the per node cpu masks using - * info from the node_cpuid array handed to us by ACPI. - */ -void __init build_cpu_to_node_map(void) -{ - int cpu, i, node; - - for(node=0; node < MAX_NUMNODES; node++) - cpumask_clear(&node_to_cpu_mask[node]); - - for_each_possible_early_cpu(cpu) { - node = NUMA_NO_NODE; - for (i = 0; i < NR_CPUS; ++i) - if (cpu_physical_id(cpu) == node_cpuid[i].phys_id) { - node = node_cpuid[i].nid; - break; - } - map_cpu_to_node(cpu, node); - } -} diff --git a/arch/ia64/kernel/pal.S b/arch/ia64/kernel/pal.S deleted file mode 100644 index fb6db6966f70..000000000000 --- a/arch/ia64/kernel/pal.S +++ /dev/null @@ -1,306 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * PAL Firmware support - * IA-64 Processor Programmers Reference Vol 2 - * - * Copyright (C) 1999 Don Dugger - * Copyright (C) 1999 Walt Drummond - * Copyright (C) 1999-2001, 2003 Hewlett-Packard Co - * David Mosberger - * Stephane Eranian - * - * 05/22/2000 eranian Added support for stacked register calls - * 05/24/2000 eranian Added support for physical mode static calls - */ - -#include -#include -#include - - .data -pal_entry_point: - data8 ia64_pal_default_handler - .text - -/* - * Set the PAL entry point address. This could be written in C code, but we - * do it here to keep it all in one module (besides, it's so trivial that it's - * not a big deal). - * - * in0 Address of the PAL entry point (text address, NOT a function - * descriptor). - */ -GLOBAL_ENTRY(ia64_pal_handler_init) - alloc r3=ar.pfs,1,0,0,0 - movl r2=pal_entry_point - ;; - st8 [r2]=in0 - br.ret.sptk.many rp -END(ia64_pal_handler_init) - -/* - * Default PAL call handler. This needs to be coded in assembly because it - * uses the static calling convention, i.e., the RSE may not be used and - * calls are done via "br.cond" (not "br.call"). - */ -GLOBAL_ENTRY(ia64_pal_default_handler) - mov r8=-1 - br.cond.sptk.many rp -END(ia64_pal_default_handler) - -/* - * Make a PAL call using the static calling convention. - * - * in0 Index of PAL service - * in1 - in3 Remaining PAL arguments - */ -GLOBAL_ENTRY(ia64_pal_call_static) - .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(4) - alloc loc1 = ar.pfs,4,5,0,0 - movl loc2 = pal_entry_point -1: { - mov r28 = in0 - mov r29 = in1 - mov r8 = ip - } - ;; - ld8 loc2 = [loc2] // loc2 <- entry point - adds r8 = 1f-1b,r8 - mov loc4=ar.rsc // save RSE configuration - ;; - mov ar.rsc=0 // put RSE in enforced lazy, LE mode - mov loc3 = psr - mov loc0 = rp - .body - mov r30 = in2 - - mov r31 = in3 - mov b7 = loc2 - - rsm psr.i - ;; - mov rp = r8 - br.cond.sptk.many b7 -1: mov psr.l = loc3 - mov ar.rsc = loc4 // restore RSE configuration - mov ar.pfs = loc1 - mov rp = loc0 - ;; - srlz.d // serialize restoration of psr.l - br.ret.sptk.many b0 -END(ia64_pal_call_static) -EXPORT_SYMBOL(ia64_pal_call_static) - -/* - * Make a PAL call using the stacked registers calling convention. - * - * Inputs: - * in0 Index of PAL service - * in2 - in3 Remaining PAL arguments - */ -GLOBAL_ENTRY(ia64_pal_call_stacked) - .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(4) - alloc loc1 = ar.pfs,4,4,4,0 - movl loc2 = pal_entry_point - - mov r28 = in0 // Index MUST be copied to r28 - mov out0 = in0 // AND in0 of PAL function - mov loc0 = rp - .body - ;; - ld8 loc2 = [loc2] // loc2 <- entry point - mov out1 = in1 - mov out2 = in2 - mov out3 = in3 - mov loc3 = psr - ;; - rsm psr.i - mov b7 = loc2 - ;; - br.call.sptk.many rp=b7 // now make the call -.ret0: mov psr.l = loc3 - mov ar.pfs = loc1 - mov rp = loc0 - ;; - srlz.d // serialize restoration of psr.l - br.ret.sptk.many b0 -END(ia64_pal_call_stacked) -EXPORT_SYMBOL(ia64_pal_call_stacked) - -/* - * Make a physical mode PAL call using the static registers calling convention. - * - * Inputs: - * in0 Index of PAL service - * in2 - in3 Remaining PAL arguments - * - * PSR_LP, PSR_TB, PSR_ID, PSR_DA are never set by the kernel. - * So we don't need to clear them. - */ -#define PAL_PSR_BITS_TO_CLEAR \ - (IA64_PSR_I | IA64_PSR_IT | IA64_PSR_DT | IA64_PSR_DB | IA64_PSR_RT |\ - IA64_PSR_DD | IA64_PSR_SS | IA64_PSR_RI | IA64_PSR_ED | \ - IA64_PSR_DFL | IA64_PSR_DFH) - -#define PAL_PSR_BITS_TO_SET \ - (IA64_PSR_BN) - - -GLOBAL_ENTRY(ia64_pal_call_phys_static) - .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(4) - alloc loc1 = ar.pfs,4,7,0,0 - movl loc2 = pal_entry_point -1: { - mov r28 = in0 // copy procedure index - mov r8 = ip // save ip to compute branch - mov loc0 = rp // save rp - } - .body - ;; - ld8 loc2 = [loc2] // loc2 <- entry point - mov r29 = in1 // first argument - mov r30 = in2 // copy arg2 - mov r31 = in3 // copy arg3 - ;; - mov loc3 = psr // save psr - adds r8 = 1f-1b,r8 // calculate return address for call - ;; - mov loc4=ar.rsc // save RSE configuration - dep.z loc2=loc2,0,61 // convert pal entry point to physical - tpa r8=r8 // convert rp to physical - ;; - mov b7 = loc2 // install target to branch reg - mov ar.rsc=0 // put RSE in enforced lazy, LE mode - movl r16=PAL_PSR_BITS_TO_CLEAR - movl r17=PAL_PSR_BITS_TO_SET - ;; - or loc3=loc3,r17 // add in psr the bits to set - ;; - andcm r16=loc3,r16 // removes bits to clear from psr - br.call.sptk.many rp=ia64_switch_mode_phys - mov rp = r8 // install return address (physical) - mov loc5 = r19 - mov loc6 = r20 - br.cond.sptk.many b7 -1: - mov ar.rsc=0 // put RSE in enforced lazy, LE mode - mov r16=loc3 // r16= original psr - mov r19=loc5 - mov r20=loc6 - br.call.sptk.many rp=ia64_switch_mode_virt // return to virtual mode - mov psr.l = loc3 // restore init PSR - - mov ar.pfs = loc1 - mov rp = loc0 - ;; - mov ar.rsc=loc4 // restore RSE configuration - srlz.d // serialize restoration of psr.l - br.ret.sptk.many b0 -END(ia64_pal_call_phys_static) -EXPORT_SYMBOL(ia64_pal_call_phys_static) - -/* - * Make a PAL call using the stacked registers in physical mode. - * - * Inputs: - * in0 Index of PAL service - * in2 - in3 Remaining PAL arguments - */ -GLOBAL_ENTRY(ia64_pal_call_phys_stacked) - .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(5) - alloc loc1 = ar.pfs,5,7,4,0 - movl loc2 = pal_entry_point -1: { - mov r28 = in0 // copy procedure index - mov loc0 = rp // save rp - } - .body - ;; - ld8 loc2 = [loc2] // loc2 <- entry point - mov loc3 = psr // save psr - ;; - mov loc4=ar.rsc // save RSE configuration - dep.z loc2=loc2,0,61 // convert pal entry point to physical - ;; - mov ar.rsc=0 // put RSE in enforced lazy, LE mode - movl r16=PAL_PSR_BITS_TO_CLEAR - movl r17=PAL_PSR_BITS_TO_SET - ;; - or loc3=loc3,r17 // add in psr the bits to set - mov b7 = loc2 // install target to branch reg - ;; - andcm r16=loc3,r16 // removes bits to clear from psr - br.call.sptk.many rp=ia64_switch_mode_phys - - mov out0 = in0 // first argument - mov out1 = in1 // copy arg2 - mov out2 = in2 // copy arg3 - mov out3 = in3 // copy arg3 - mov loc5 = r19 - mov loc6 = r20 - - br.call.sptk.many rp=b7 // now make the call - - mov ar.rsc=0 // put RSE in enforced lazy, LE mode - mov r16=loc3 // r16= original psr - mov r19=loc5 - mov r20=loc6 - br.call.sptk.many rp=ia64_switch_mode_virt // return to virtual mode - - mov psr.l = loc3 // restore init PSR - mov ar.pfs = loc1 - mov rp = loc0 - ;; - mov ar.rsc=loc4 // restore RSE configuration - srlz.d // serialize restoration of psr.l - br.ret.sptk.many b0 -END(ia64_pal_call_phys_stacked) -EXPORT_SYMBOL(ia64_pal_call_phys_stacked) - -/* - * Save scratch fp scratch regs which aren't saved in pt_regs already - * (fp10-fp15). - * - * NOTE: We need to do this since firmware (SAL and PAL) may use any of the - * scratch regs fp-low partition. - * - * Inputs: - * in0 Address of stack storage for fp regs - */ -GLOBAL_ENTRY(ia64_save_scratch_fpregs) - alloc r3=ar.pfs,1,0,0,0 - add r2=16,in0 - ;; - stf.spill [in0] = f10,32 - stf.spill [r2] = f11,32 - ;; - stf.spill [in0] = f12,32 - stf.spill [r2] = f13,32 - ;; - stf.spill [in0] = f14,32 - stf.spill [r2] = f15,32 - br.ret.sptk.many rp -END(ia64_save_scratch_fpregs) -EXPORT_SYMBOL(ia64_save_scratch_fpregs) - -/* - * Load scratch fp scratch regs (fp10-fp15) - * - * Inputs: - * in0 Address of stack storage for fp regs - */ -GLOBAL_ENTRY(ia64_load_scratch_fpregs) - alloc r3=ar.pfs,1,0,0,0 - add r2=16,in0 - ;; - ldf.fill f10 = [in0],32 - ldf.fill f11 = [r2],32 - ;; - ldf.fill f12 = [in0],32 - ldf.fill f13 = [r2],32 - ;; - ldf.fill f14 = [in0],32 - ldf.fill f15 = [r2],32 - br.ret.sptk.many rp -END(ia64_load_scratch_fpregs) -EXPORT_SYMBOL(ia64_load_scratch_fpregs) diff --git a/arch/ia64/kernel/palinfo.c b/arch/ia64/kernel/palinfo.c deleted file mode 100644 index b9ae093bfe37..000000000000 --- a/arch/ia64/kernel/palinfo.c +++ /dev/null @@ -1,942 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * palinfo.c - * - * Prints processor specific information reported by PAL. - * This code is based on specification of PAL as of the - * Intel IA-64 Architecture Software Developer's Manual v1.0. - * - * - * Copyright (C) 2000-2001, 2003 Hewlett-Packard Co - * Stephane Eranian - * Copyright (C) 2004 Intel Corporation - * Ashok Raj - * - * 05/26/2000 S.Eranian initial release - * 08/21/2000 S.Eranian updated to July 2000 PAL specs - * 02/05/2001 S.Eranian fixed module support - * 10/23/2001 S.Eranian updated pal_perf_mon_info bug fixes - * 03/24/2004 Ashok Raj updated to work with CPU Hotplug - * 10/26/2006 Russ Anderson updated processor features to rev 2.2 spec - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include - -MODULE_AUTHOR("Stephane Eranian "); -MODULE_DESCRIPTION("/proc interface to IA-64 PAL"); -MODULE_LICENSE("GPL"); - -#define PALINFO_VERSION "0.5" - -typedef int (*palinfo_func_t)(struct seq_file *); - -typedef struct { - const char *name; /* name of the proc entry */ - palinfo_func_t proc_read; /* function to call for reading */ - struct proc_dir_entry *entry; /* registered entry (removal) */ -} palinfo_entry_t; - - -/* - * A bunch of string array to get pretty printing - */ - -static const char *cache_types[] = { - "", /* not used */ - "Instruction", - "Data", - "Data/Instruction" /* unified */ -}; - -static const char *cache_mattrib[]={ - "WriteThrough", - "WriteBack", - "", /* reserved */ - "" /* reserved */ -}; - -static const char *cache_st_hints[]={ - "Temporal, level 1", - "Reserved", - "Reserved", - "Non-temporal, all levels", - "Reserved", - "Reserved", - "Reserved", - "Reserved" -}; - -static const char *cache_ld_hints[]={ - "Temporal, level 1", - "Non-temporal, level 1", - "Reserved", - "Non-temporal, all levels", - "Reserved", - "Reserved", - "Reserved", - "Reserved" -}; - -static const char *rse_hints[]={ - "enforced lazy", - "eager stores", - "eager loads", - "eager loads and stores" -}; - -#define RSE_HINTS_COUNT ARRAY_SIZE(rse_hints) - -static const char *mem_attrib[]={ - "WB", /* 000 */ - "SW", /* 001 */ - "010", /* 010 */ - "011", /* 011 */ - "UC", /* 100 */ - "UCE", /* 101 */ - "WC", /* 110 */ - "NaTPage" /* 111 */ -}; - -/* - * Take a 64bit vector and produces a string such that - * if bit n is set then 2^n in clear text is generated. The adjustment - * to the right unit is also done. - * - * Input: - * - a pointer to a buffer to hold the string - * - a 64-bit vector - * Output: - * - a pointer to the end of the buffer - * - */ -static void bitvector_process(struct seq_file *m, u64 vector) -{ - int i,j; - static const char *units[]={ "", "K", "M", "G", "T" }; - - for (i=0, j=0; i < 64; i++ , j=i/10) { - if (vector & 0x1) - seq_printf(m, "%d%s ", 1 << (i-j*10), units[j]); - vector >>= 1; - } -} - -/* - * Take a 64bit vector and produces a string such that - * if bit n is set then register n is present. The function - * takes into account consecutive registers and prints out ranges. - * - * Input: - * - a pointer to a buffer to hold the string - * - a 64-bit vector - * Ouput: - * - a pointer to the end of the buffer - * - */ -static void bitregister_process(struct seq_file *m, u64 *reg_info, int max) -{ - int i, begin, skip = 0; - u64 value = reg_info[0]; - - value >>= i = begin = ffs(value) - 1; - - for(; i < max; i++ ) { - - if (i != 0 && (i%64) == 0) value = *++reg_info; - - if ((value & 0x1) == 0 && skip == 0) { - if (begin <= i - 2) - seq_printf(m, "%d-%d ", begin, i-1); - else - seq_printf(m, "%d ", i-1); - skip = 1; - begin = -1; - } else if ((value & 0x1) && skip == 1) { - skip = 0; - begin = i; - } - value >>=1; - } - if (begin > -1) { - if (begin < 127) - seq_printf(m, "%d-127", begin); - else - seq_puts(m, "127"); - } -} - -static int power_info(struct seq_file *m) -{ - s64 status; - u64 halt_info_buffer[8]; - pal_power_mgmt_info_u_t *halt_info =(pal_power_mgmt_info_u_t *)halt_info_buffer; - int i; - - status = ia64_pal_halt_info(halt_info); - if (status != 0) return 0; - - for (i=0; i < 8 ; i++ ) { - if (halt_info[i].pal_power_mgmt_info_s.im == 1) { - seq_printf(m, - "Power level %d:\n" - "\tentry_latency : %d cycles\n" - "\texit_latency : %d cycles\n" - "\tpower consumption : %d mW\n" - "\tCache+TLB coherency : %s\n", i, - halt_info[i].pal_power_mgmt_info_s.entry_latency, - halt_info[i].pal_power_mgmt_info_s.exit_latency, - halt_info[i].pal_power_mgmt_info_s.power_consumption, - halt_info[i].pal_power_mgmt_info_s.co ? "Yes" : "No"); - } else { - seq_printf(m,"Power level %d: not implemented\n", i); - } - } - return 0; -} - -static int cache_info(struct seq_file *m) -{ - unsigned long i, levels, unique_caches; - pal_cache_config_info_t cci; - int j, k; - long status; - - if ((status = ia64_pal_cache_summary(&levels, &unique_caches)) != 0) { - printk(KERN_ERR "ia64_pal_cache_summary=%ld\n", status); - return 0; - } - - seq_printf(m, "Cache levels : %ld\nUnique caches : %ld\n\n", - levels, unique_caches); - - for (i=0; i < levels; i++) { - for (j=2; j >0 ; j--) { - /* even without unification some level may not be present */ - if ((status=ia64_pal_cache_config_info(i,j, &cci)) != 0) - continue; - - seq_printf(m, - "%s Cache level %lu:\n" - "\tSize : %u bytes\n" - "\tAttributes : ", - cache_types[j+cci.pcci_unified], i+1, - cci.pcci_cache_size); - - if (cci.pcci_unified) - seq_puts(m, "Unified "); - - seq_printf(m, "%s\n", cache_mattrib[cci.pcci_cache_attr]); - - seq_printf(m, - "\tAssociativity : %d\n" - "\tLine size : %d bytes\n" - "\tStride : %d bytes\n", - cci.pcci_assoc, - 1<>=1; - } - seq_puts(m, "\n\tLoad hints : "); - - for(k=0; k < 8; k++ ) { - if (cci.pcci_ld_hints & 0x1) - seq_printf(m, "[%s]", cache_ld_hints[k]); - cci.pcci_ld_hints >>=1; - } - seq_printf(m, - "\n\tAlias boundary : %d byte(s)\n" - "\tTag LSB : %d\n" - "\tTag MSB : %d\n", - 1<0 ; j--) { - tc_pages = 0; /* just in case */ - - /* even without unification, some levels may not be present */ - if ((status=ia64_pal_vm_info(i,j, &tc_info, &tc_pages)) != 0) - continue; - - seq_printf(m, - "\n%s Translation Cache Level %d:\n" - "\tHash sets : %d\n" - "\tAssociativity : %d\n" - "\tNumber of entries : %d\n" - "\tFlags : ", - cache_types[j+tc_info.tc_unified], i+1, - tc_info.tc_num_sets, - tc_info.tc_associativity, - tc_info.tc_num_entries); - - if (tc_info.tc_pf) - seq_puts(m, "PreferredPageSizeOptimized "); - if (tc_info.tc_unified) - seq_puts(m, "Unified "); - if (tc_info.tc_reduce_tr) - seq_puts(m, "TCReduction"); - - seq_puts(m, "\n\tSupported page sizes: "); - - bitvector_process(m, tc_pages); - - /* when unified date (j=2) is enough */ - if (tc_info.tc_unified) - break; - } - } - } - - seq_putc(m, '\n'); - return 0; -} - - -static int register_info(struct seq_file *m) -{ - u64 reg_info[2]; - u64 info; - unsigned long phys_stacked; - pal_hints_u_t hints; - unsigned long iregs, dregs; - static const char * const info_type[] = { - "Implemented AR(s)", - "AR(s) with read side-effects", - "Implemented CR(s)", - "CR(s) with read side-effects", - }; - - for(info=0; info < 4; info++) { - if (ia64_pal_register_info(info, ®_info[0], ®_info[1]) != 0) - return 0; - seq_printf(m, "%-32s : ", info_type[info]); - bitregister_process(m, reg_info, 128); - seq_putc(m, '\n'); - } - - if (ia64_pal_rse_info(&phys_stacked, &hints) == 0) - seq_printf(m, - "RSE stacked physical registers : %ld\n" - "RSE load/store hints : %ld (%s)\n", - phys_stacked, hints.ph_data, - hints.ph_data < RSE_HINTS_COUNT ? rse_hints[hints.ph_data]: "(??)"); - - if (ia64_pal_debug_info(&iregs, &dregs)) - return 0; - - seq_printf(m, - "Instruction debug register pairs : %ld\n" - "Data debug register pairs : %ld\n", iregs, dregs); - - return 0; -} - -static const char *const proc_features_0[]={ /* Feature set 0 */ - NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, - NULL,NULL,NULL,NULL,NULL,NULL,NULL, NULL,NULL, - NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, - NULL,NULL,NULL,NULL,NULL, NULL,NULL,NULL,NULL, - "Unimplemented instruction address fault", - "INIT, PMI, and LINT pins", - "Simple unimplemented instr addresses", - "Variable P-state performance", - "Virtual machine features implemented", - "XIP,XPSR,XFS implemented", - "XR1-XR3 implemented", - "Disable dynamic predicate prediction", - "Disable processor physical number", - "Disable dynamic data cache prefetch", - "Disable dynamic inst cache prefetch", - "Disable dynamic branch prediction", - NULL, NULL, NULL, NULL, - "Disable P-states", - "Enable MCA on Data Poisoning", - "Enable vmsw instruction", - "Enable extern environmental notification", - "Disable BINIT on processor time-out", - "Disable dynamic power management (DPM)", - "Disable coherency", - "Disable cache", - "Enable CMCI promotion", - "Enable MCA to BINIT promotion", - "Enable MCA promotion", - "Enable BERR promotion" -}; - -static const char *const proc_features_16[]={ /* Feature set 16 */ - "Disable ETM", - "Enable ETM", - "Enable MCA on half-way timer", - "Enable snoop WC", - NULL, - "Enable Fast Deferral", - "Disable MCA on memory aliasing", - "Enable RSB", - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - "DP system processor", - "Low Voltage", - "HT supported", - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL -}; - -static const char *const *const proc_features[]={ - proc_features_0, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, - proc_features_16, - NULL, NULL, NULL, NULL, -}; - -static void feature_set_info(struct seq_file *m, u64 avail, u64 status, u64 control, - unsigned long set) -{ - const char *const *vf, *const *v; - int i; - - vf = v = proc_features[set]; - for(i=0; i < 64; i++, avail >>=1, status >>=1, control >>=1) { - - if (!(control)) /* No remaining bits set */ - break; - if (!(avail & 0x1)) /* Print only bits that are available */ - continue; - if (vf) - v = vf + i; - if ( v && *v ) { - seq_printf(m, "%-40s : %s %s\n", *v, - avail & 0x1 ? (status & 0x1 ? - "On " : "Off"): "", - avail & 0x1 ? (control & 0x1 ? - "Ctrl" : "NoCtrl"): ""); - } else { - seq_printf(m, "Feature set %2ld bit %2d\t\t\t" - " : %s %s\n", - set, i, - avail & 0x1 ? (status & 0x1 ? - "On " : "Off"): "", - avail & 0x1 ? (control & 0x1 ? - "Ctrl" : "NoCtrl"): ""); - } - } -} - -static int processor_info(struct seq_file *m) -{ - u64 avail=1, status=1, control=1, feature_set=0; - s64 ret; - - do { - ret = ia64_pal_proc_get_features(&avail, &status, &control, - feature_set); - if (ret < 0) - return 0; - - if (ret == 1) { - feature_set++; - continue; - } - - feature_set_info(m, avail, status, control, feature_set); - feature_set++; - } while(1); - - return 0; -} - -static const char *const bus_features[]={ - NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, - NULL,NULL,NULL,NULL,NULL,NULL,NULL, NULL,NULL, - NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, - NULL,NULL, - "Request Bus Parking", - "Bus Lock Mask", - "Enable Half Transfer", - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, - "Enable Cache Line Repl. Shared", - "Enable Cache Line Repl. Exclusive", - "Disable Transaction Queuing", - "Disable Response Error Checking", - "Disable Bus Error Checking", - "Disable Bus Requester Internal Error Signalling", - "Disable Bus Requester Error Signalling", - "Disable Bus Initialization Event Checking", - "Disable Bus Initialization Event Signalling", - "Disable Bus Address Error Checking", - "Disable Bus Address Error Signalling", - "Disable Bus Data Error Checking" -}; - - -static int bus_info(struct seq_file *m) -{ - const char *const *v = bus_features; - pal_bus_features_u_t av, st, ct; - u64 avail, status, control; - int i; - s64 ret; - - if ((ret=ia64_pal_bus_get_features(&av, &st, &ct)) != 0) - return 0; - - avail = av.pal_bus_features_val; - status = st.pal_bus_features_val; - control = ct.pal_bus_features_val; - - for(i=0; i < 64; i++, v++, avail >>=1, status >>=1, control >>=1) { - if ( ! *v ) - continue; - seq_printf(m, "%-48s : %s%s %s\n", *v, - avail & 0x1 ? "" : "NotImpl", - avail & 0x1 ? (status & 0x1 ? "On" : "Off"): "", - avail & 0x1 ? (control & 0x1 ? "Ctrl" : "NoCtrl"): ""); - } - return 0; -} - -static int version_info(struct seq_file *m) -{ - pal_version_u_t min_ver, cur_ver; - - if (ia64_pal_version(&min_ver, &cur_ver) != 0) - return 0; - - seq_printf(m, - "PAL_vendor : 0x%02x (min=0x%02x)\n" - "PAL_A : %02x.%02x (min=%02x.%02x)\n" - "PAL_B : %02x.%02x (min=%02x.%02x)\n", - cur_ver.pal_version_s.pv_pal_vendor, - min_ver.pal_version_s.pv_pal_vendor, - cur_ver.pal_version_s.pv_pal_a_model, - cur_ver.pal_version_s.pv_pal_a_rev, - min_ver.pal_version_s.pv_pal_a_model, - min_ver.pal_version_s.pv_pal_a_rev, - cur_ver.pal_version_s.pv_pal_b_model, - cur_ver.pal_version_s.pv_pal_b_rev, - min_ver.pal_version_s.pv_pal_b_model, - min_ver.pal_version_s.pv_pal_b_rev); - return 0; -} - -static int frequency_info(struct seq_file *m) -{ - struct pal_freq_ratio proc, itc, bus; - unsigned long base; - - if (ia64_pal_freq_base(&base) == -1) - seq_puts(m, "Output clock : not implemented\n"); - else - seq_printf(m, "Output clock : %ld ticks/s\n", base); - - if (ia64_pal_freq_ratios(&proc, &bus, &itc) != 0) return 0; - - seq_printf(m, - "Processor/Clock ratio : %d/%d\n" - "Bus/Clock ratio : %d/%d\n" - "ITC/Clock ratio : %d/%d\n", - proc.num, proc.den, bus.num, bus.den, itc.num, itc.den); - return 0; -} - -static int tr_info(struct seq_file *m) -{ - long status; - pal_tr_valid_u_t tr_valid; - u64 tr_buffer[4]; - pal_vm_info_1_u_t vm_info_1; - pal_vm_info_2_u_t vm_info_2; - unsigned long i, j; - unsigned long max[3], pgm; - struct ifa_reg { - unsigned long valid:1; - unsigned long ig:11; - unsigned long vpn:52; - } *ifa_reg; - struct itir_reg { - unsigned long rv1:2; - unsigned long ps:6; - unsigned long key:24; - unsigned long rv2:32; - } *itir_reg; - struct gr_reg { - unsigned long p:1; - unsigned long rv1:1; - unsigned long ma:3; - unsigned long a:1; - unsigned long d:1; - unsigned long pl:2; - unsigned long ar:3; - unsigned long ppn:38; - unsigned long rv2:2; - unsigned long ed:1; - unsigned long ig:11; - } *gr_reg; - struct rid_reg { - unsigned long ig1:1; - unsigned long rv1:1; - unsigned long ig2:6; - unsigned long rid:24; - unsigned long rv2:32; - } *rid_reg; - - if ((status = ia64_pal_vm_summary(&vm_info_1, &vm_info_2)) !=0) { - printk(KERN_ERR "ia64_pal_vm_summary=%ld\n", status); - return 0; - } - max[0] = vm_info_1.pal_vm_info_1_s.max_itr_entry+1; - max[1] = vm_info_1.pal_vm_info_1_s.max_dtr_entry+1; - - for (i=0; i < 2; i++ ) { - for (j=0; j < max[i]; j++) { - - status = ia64_pal_tr_read(j, i, tr_buffer, &tr_valid); - if (status != 0) { - printk(KERN_ERR "palinfo: pal call failed on tr[%lu:%lu]=%ld\n", - i, j, status); - continue; - } - - ifa_reg = (struct ifa_reg *)&tr_buffer[2]; - - if (ifa_reg->valid == 0) - continue; - - gr_reg = (struct gr_reg *)tr_buffer; - itir_reg = (struct itir_reg *)&tr_buffer[1]; - rid_reg = (struct rid_reg *)&tr_buffer[3]; - - pgm = -1 << (itir_reg->ps - 12); - seq_printf(m, - "%cTR%lu: av=%d pv=%d dv=%d mv=%d\n" - "\tppn : 0x%lx\n" - "\tvpn : 0x%lx\n" - "\tps : ", - "ID"[i], j, - tr_valid.pal_tr_valid_s.access_rights_valid, - tr_valid.pal_tr_valid_s.priv_level_valid, - tr_valid.pal_tr_valid_s.dirty_bit_valid, - tr_valid.pal_tr_valid_s.mem_attr_valid, - (gr_reg->ppn & pgm)<< 12, (ifa_reg->vpn & pgm)<< 12); - - bitvector_process(m, 1<< itir_reg->ps); - - seq_printf(m, - "\n\tpl : %d\n" - "\tar : %d\n" - "\trid : %x\n" - "\tp : %d\n" - "\tma : %d\n" - "\td : %d\n", - gr_reg->pl, gr_reg->ar, rid_reg->rid, gr_reg->p, gr_reg->ma, - gr_reg->d); - } - } - return 0; -} - - - -/* - * List {name,function} pairs for every entry in /proc/palinfo/cpu* - */ -static const palinfo_entry_t palinfo_entries[]={ - { "version_info", version_info, }, - { "vm_info", vm_info, }, - { "cache_info", cache_info, }, - { "power_info", power_info, }, - { "register_info", register_info, }, - { "processor_info", processor_info, }, - { "frequency_info", frequency_info, }, - { "bus_info", bus_info }, - { "tr_info", tr_info, } -}; - -#define NR_PALINFO_ENTRIES (int) ARRAY_SIZE(palinfo_entries) - -static struct proc_dir_entry *palinfo_dir; - -/* - * This data structure is used to pass which cpu,function is being requested - * It must fit in a 64bit quantity to be passed to the proc callback routine - * - * In SMP mode, when we get a request for another CPU, we must call that - * other CPU using IPI and wait for the result before returning. - */ -typedef union { - u64 value; - struct { - unsigned req_cpu: 32; /* for which CPU this info is */ - unsigned func_id: 32; /* which function is requested */ - } pal_func_cpu; -} pal_func_cpu_u_t; - -#define req_cpu pal_func_cpu.req_cpu -#define func_id pal_func_cpu.func_id - -#ifdef CONFIG_SMP - -/* - * used to hold information about final function to call - */ -typedef struct { - palinfo_func_t func; /* pointer to function to call */ - struct seq_file *m; /* buffer to store results */ - int ret; /* return value from call */ -} palinfo_smp_data_t; - - -/* - * this function does the actual final call and he called - * from the smp code, i.e., this is the palinfo callback routine - */ -static void -palinfo_smp_call(void *info) -{ - palinfo_smp_data_t *data = (palinfo_smp_data_t *)info; - data->ret = (*data->func)(data->m); -} - -/* - * function called to trigger the IPI, we need to access a remote CPU - * Return: - * 0 : error or nothing to output - * otherwise how many bytes in the "page" buffer were written - */ -static -int palinfo_handle_smp(struct seq_file *m, pal_func_cpu_u_t *f) -{ - palinfo_smp_data_t ptr; - int ret; - - ptr.func = palinfo_entries[f->func_id].proc_read; - ptr.m = m; - ptr.ret = 0; /* just in case */ - - - /* will send IPI to other CPU and wait for completion of remote call */ - if ((ret=smp_call_function_single(f->req_cpu, palinfo_smp_call, &ptr, 1))) { - printk(KERN_ERR "palinfo: remote CPU call from %d to %d on function %d: " - "error %d\n", smp_processor_id(), f->req_cpu, f->func_id, ret); - return 0; - } - return ptr.ret; -} -#else /* ! CONFIG_SMP */ -static -int palinfo_handle_smp(struct seq_file *m, pal_func_cpu_u_t *f) -{ - printk(KERN_ERR "palinfo: should not be called with non SMP kernel\n"); - return 0; -} -#endif /* CONFIG_SMP */ - -/* - * Entry point routine: all calls go through this function - */ -static int proc_palinfo_show(struct seq_file *m, void *v) -{ - pal_func_cpu_u_t *f = (pal_func_cpu_u_t *)&m->private; - - /* - * in SMP mode, we may need to call another CPU to get correct - * information. PAL, by definition, is processor specific - */ - if (f->req_cpu == get_cpu()) - (*palinfo_entries[f->func_id].proc_read)(m); - else - palinfo_handle_smp(m, f); - - put_cpu(); - return 0; -} - -static int palinfo_add_proc(unsigned int cpu) -{ - pal_func_cpu_u_t f; - struct proc_dir_entry *cpu_dir; - int j; - char cpustr[3+4+1]; /* cpu numbers are up to 4095 on itanic */ - sprintf(cpustr, "cpu%d", cpu); - - cpu_dir = proc_mkdir(cpustr, palinfo_dir); - if (!cpu_dir) - return -EINVAL; - - f.req_cpu = cpu; - - for (j=0; j < NR_PALINFO_ENTRIES; j++) { - f.func_id = j; - proc_create_single_data(palinfo_entries[j].name, 0, cpu_dir, - proc_palinfo_show, (void *)f.value); - } - return 0; -} - -static int palinfo_del_proc(unsigned int hcpu) -{ - char cpustr[3+4+1]; /* cpu numbers are up to 4095 on itanic */ - - sprintf(cpustr, "cpu%d", hcpu); - remove_proc_subtree(cpustr, palinfo_dir); - return 0; -} - -static enum cpuhp_state hp_online; - -static int __init palinfo_init(void) -{ - int i = 0; - - printk(KERN_INFO "PAL Information Facility v%s\n", PALINFO_VERSION); - palinfo_dir = proc_mkdir("pal", NULL); - if (!palinfo_dir) - return -ENOMEM; - - i = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "ia64/palinfo:online", - palinfo_add_proc, palinfo_del_proc); - if (i < 0) { - remove_proc_subtree("pal", NULL); - return i; - } - hp_online = i; - return 0; -} - -static void __exit palinfo_exit(void) -{ - cpuhp_remove_state(hp_online); - remove_proc_subtree("pal", NULL); -} - -module_init(palinfo_init); -module_exit(palinfo_exit); diff --git a/arch/ia64/kernel/patch.c b/arch/ia64/kernel/patch.c deleted file mode 100644 index 7f21a8c57ed7..000000000000 --- a/arch/ia64/kernel/patch.c +++ /dev/null @@ -1,237 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Instruction-patching support. - * - * Copyright (C) 2003 Hewlett-Packard Co - * David Mosberger-Tang - */ -#include -#include - -#include -#include -#include -#include - -/* - * This was adapted from code written by Tony Luck: - * - * The 64-bit value in a "movl reg=value" is scattered between the two words of the bundle - * like this: - * - * 6 6 5 4 3 2 1 - * 3210987654321098765432109876543210987654321098765432109876543210 - * ABBBBBBBBBBBBBBBBBBBBBBBCCCCCCCCCCCCCCCCCCDEEEEEFFFFFFFFFGGGGGGG - * - * CCCCCCCCCCCCCCCCCCxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx - * xxxxAFFFFFFFFFEEEEEDxGGGGGGGxxxxxxxxxxxxxBBBBBBBBBBBBBBBBBBBBBBB - */ -static u64 -get_imm64 (u64 insn_addr) -{ - u64 *p = (u64 *) (insn_addr & -16); /* mask out slot number */ - - return ( (p[1] & 0x0800000000000000UL) << 4) | /*A*/ - ((p[1] & 0x00000000007fffffUL) << 40) | /*B*/ - ((p[0] & 0xffffc00000000000UL) >> 24) | /*C*/ - ((p[1] & 0x0000100000000000UL) >> 23) | /*D*/ - ((p[1] & 0x0003e00000000000UL) >> 29) | /*E*/ - ((p[1] & 0x07fc000000000000UL) >> 43) | /*F*/ - ((p[1] & 0x000007f000000000UL) >> 36); /*G*/ -} - -/* Patch instruction with "val" where "mask" has 1 bits. */ -void -ia64_patch (u64 insn_addr, u64 mask, u64 val) -{ - u64 m0, m1, v0, v1, b0, b1, *b = (u64 *) (insn_addr & -16); -# define insn_mask ((1UL << 41) - 1) - unsigned long shift; - - b0 = b[0]; b1 = b[1]; - shift = 5 + 41 * (insn_addr % 16); /* 5 bits of template, then 3 x 41-bit instructions */ - if (shift >= 64) { - m1 = mask << (shift - 64); - v1 = val << (shift - 64); - } else { - m0 = mask << shift; m1 = mask >> (64 - shift); - v0 = val << shift; v1 = val >> (64 - shift); - b[0] = (b0 & ~m0) | (v0 & m0); - } - b[1] = (b1 & ~m1) | (v1 & m1); -} - -void -ia64_patch_imm64 (u64 insn_addr, u64 val) -{ - /* The assembler may generate offset pointing to either slot 1 - or slot 2 for a long (2-slot) instruction, occupying slots 1 - and 2. */ - insn_addr &= -16UL; - ia64_patch(insn_addr + 2, - 0x01fffefe000UL, ( ((val & 0x8000000000000000UL) >> 27) /* bit 63 -> 36 */ - | ((val & 0x0000000000200000UL) << 0) /* bit 21 -> 21 */ - | ((val & 0x00000000001f0000UL) << 6) /* bit 16 -> 22 */ - | ((val & 0x000000000000ff80UL) << 20) /* bit 7 -> 27 */ - | ((val & 0x000000000000007fUL) << 13) /* bit 0 -> 13 */)); - ia64_patch(insn_addr + 1, 0x1ffffffffffUL, val >> 22); -} - -void -ia64_patch_imm60 (u64 insn_addr, u64 val) -{ - /* The assembler may generate offset pointing to either slot 1 - or slot 2 for a long (2-slot) instruction, occupying slots 1 - and 2. */ - insn_addr &= -16UL; - ia64_patch(insn_addr + 2, - 0x011ffffe000UL, ( ((val & 0x0800000000000000UL) >> 23) /* bit 59 -> 36 */ - | ((val & 0x00000000000fffffUL) << 13) /* bit 0 -> 13 */)); - ia64_patch(insn_addr + 1, 0x1fffffffffcUL, val >> 18); -} - -/* - * We need sometimes to load the physical address of a kernel - * object. Often we can convert the virtual address to physical - * at execution time, but sometimes (either for performance reasons - * or during error recovery) we cannot to this. Patch the marked - * bundles to load the physical address. - */ -void __init -ia64_patch_vtop (unsigned long start, unsigned long end) -{ - s32 *offp = (s32 *) start; - u64 ip; - - while (offp < (s32 *) end) { - ip = (u64) offp + *offp; - - /* replace virtual address with corresponding physical address: */ - ia64_patch_imm64(ip, ia64_tpa(get_imm64(ip))); - ia64_fc((void *) ip); - ++offp; - } - ia64_sync_i(); - ia64_srlz_i(); -} - -/* - * Disable the RSE workaround by turning the conditional branch - * that we tagged in each place the workaround was used into an - * unconditional branch. - */ -void __init -ia64_patch_rse (unsigned long start, unsigned long end) -{ - s32 *offp = (s32 *) start; - u64 ip, *b; - - while (offp < (s32 *) end) { - ip = (u64) offp + *offp; - - b = (u64 *)(ip & -16); - b[1] &= ~0xf800000L; - ia64_fc((void *) ip); - ++offp; - } - ia64_sync_i(); - ia64_srlz_i(); -} - -void __init -ia64_patch_mckinley_e9 (unsigned long start, unsigned long end) -{ - static int first_time = 1; - int need_workaround; - s32 *offp = (s32 *) start; - u64 *wp; - - need_workaround = (local_cpu_data->family == 0x1f && local_cpu_data->model == 0); - - if (first_time) { - first_time = 0; - if (need_workaround) - printk(KERN_INFO "Leaving McKinley Errata 9 workaround enabled\n"); - } - if (need_workaround) - return; - - while (offp < (s32 *) end) { - wp = (u64 *) ia64_imva((char *) offp + *offp); - wp[0] = 0x0000000100000011UL; /* nop.m 0; nop.i 0; br.ret.sptk.many b6 */ - wp[1] = 0x0084006880000200UL; - wp[2] = 0x0000000100000000UL; /* nop.m 0; nop.i 0; nop.i 0 */ - wp[3] = 0x0004000000000200UL; - ia64_fc(wp); ia64_fc(wp + 2); - ++offp; - } - ia64_sync_i(); - ia64_srlz_i(); -} - -static void __init -patch_fsyscall_table (unsigned long start, unsigned long end) -{ - extern unsigned long fsyscall_table[NR_syscalls]; - s32 *offp = (s32 *) start; - u64 ip; - - while (offp < (s32 *) end) { - ip = (u64) ia64_imva((char *) offp + *offp); - ia64_patch_imm64(ip, (u64) fsyscall_table); - ia64_fc((void *) ip); - ++offp; - } - ia64_sync_i(); - ia64_srlz_i(); -} - -static void __init -patch_brl_fsys_bubble_down (unsigned long start, unsigned long end) -{ - extern char fsys_bubble_down[]; - s32 *offp = (s32 *) start; - u64 ip; - - while (offp < (s32 *) end) { - ip = (u64) offp + *offp; - ia64_patch_imm60((u64) ia64_imva((void *) ip), - (u64) (fsys_bubble_down - (ip & -16)) / 16); - ia64_fc((void *) ip); - ++offp; - } - ia64_sync_i(); - ia64_srlz_i(); -} - -void __init -ia64_patch_gate (void) -{ -# define START(name) ((unsigned long) __start_gate_##name##_patchlist) -# define END(name) ((unsigned long)__end_gate_##name##_patchlist) - - patch_fsyscall_table(START(fsyscall), END(fsyscall)); - patch_brl_fsys_bubble_down(START(brl_fsys_bubble_down), END(brl_fsys_bubble_down)); - ia64_patch_vtop(START(vtop), END(vtop)); - ia64_patch_mckinley_e9(START(mckinley_e9), END(mckinley_e9)); -} - -void ia64_patch_phys_stack_reg(unsigned long val) -{ - s32 * offp = (s32 *) __start___phys_stack_reg_patchlist; - s32 * end = (s32 *) __end___phys_stack_reg_patchlist; - u64 ip, mask, imm; - - /* see instruction format A4: adds r1 = imm13, r3 */ - mask = (0x3fUL << 27) | (0x7f << 13); - imm = (((val >> 7) & 0x3f) << 27) | (val & 0x7f) << 13; - - while (offp < end) { - ip = (u64) offp + *offp; - ia64_patch(ip, mask, imm); - ia64_fc((void *)ip); - ++offp; - } - ia64_sync_i(); - ia64_srlz_i(); -} diff --git a/arch/ia64/kernel/pci-dma.c b/arch/ia64/kernel/pci-dma.c deleted file mode 100644 index c90221733c6b..000000000000 --- a/arch/ia64/kernel/pci-dma.c +++ /dev/null @@ -1,33 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Dynamic DMA mapping support. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -int no_iommu __read_mostly; -#ifdef CONFIG_IOMMU_DEBUG -int force_iommu __read_mostly = 1; -#else -int force_iommu __read_mostly; -#endif - -static int __init pci_iommu_init(void) -{ - if (iommu_detected) - intel_iommu_init(); - - return 0; -} - -/* Must execute after PCI subsystem */ -fs_initcall(pci_iommu_init); diff --git a/arch/ia64/kernel/perfmon_itanium.h b/arch/ia64/kernel/perfmon_itanium.h deleted file mode 100644 index dbd04028aafa..000000000000 --- a/arch/ia64/kernel/perfmon_itanium.h +++ /dev/null @@ -1,116 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * This file contains the Itanium PMU register description tables - * and pmc checker. - * - * Copyright (C) 2002-2003 Hewlett Packard Co - * Stephane Eranian - */ -static int pfm_ita_pmc_check(struct task_struct *task, pfm_context_t *ctx, unsigned int cnum, unsigned long *val, struct pt_regs *regs); - -static pfm_reg_desc_t pfm_ita_pmc_desc[PMU_MAX_PMCS]={ -/* pmc0 */ { PFM_REG_CONTROL , 0, 0x1UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, -/* pmc1 */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, -/* pmc2 */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, -/* pmc3 */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, -/* pmc4 */ { PFM_REG_COUNTING, 6, 0x0UL, -1UL, NULL, NULL, {RDEP(4),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, -/* pmc5 */ { PFM_REG_COUNTING, 6, 0x0UL, -1UL, NULL, NULL, {RDEP(5),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, -/* pmc6 */ { PFM_REG_COUNTING, 6, 0x0UL, -1UL, NULL, NULL, {RDEP(6),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, -/* pmc7 */ { PFM_REG_COUNTING, 6, 0x0UL, -1UL, NULL, NULL, {RDEP(7),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, -/* pmc8 */ { PFM_REG_CONFIG , 0, 0xf00000003ffffff8UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, -/* pmc9 */ { PFM_REG_CONFIG , 0, 0xf00000003ffffff8UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, -/* pmc10 */ { PFM_REG_MONITOR , 6, 0x0UL, -1UL, NULL, NULL, {RDEP(0)|RDEP(1),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, -/* pmc11 */ { PFM_REG_MONITOR , 6, 0x0000000010000000UL, -1UL, NULL, pfm_ita_pmc_check, {RDEP(2)|RDEP(3)|RDEP(17),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, -/* pmc12 */ { PFM_REG_MONITOR , 6, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, -/* pmc13 */ { PFM_REG_CONFIG , 0, 0x0003ffff00000001UL, -1UL, NULL, pfm_ita_pmc_check, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, - { PFM_REG_END , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}}, /* end marker */ -}; - -static pfm_reg_desc_t pfm_ita_pmd_desc[PMU_MAX_PMDS]={ -/* pmd0 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(1),0UL, 0UL, 0UL}, {RDEP(10),0UL, 0UL, 0UL}}, -/* pmd1 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(0),0UL, 0UL, 0UL}, {RDEP(10),0UL, 0UL, 0UL}}, -/* pmd2 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(3)|RDEP(17),0UL, 0UL, 0UL}, {RDEP(11),0UL, 0UL, 0UL}}, -/* pmd3 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(2)|RDEP(17),0UL, 0UL, 0UL}, {RDEP(11),0UL, 0UL, 0UL}}, -/* pmd4 */ { PFM_REG_COUNTING, 0, 0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(4),0UL, 0UL, 0UL}}, -/* pmd5 */ { PFM_REG_COUNTING, 0, 0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(5),0UL, 0UL, 0UL}}, -/* pmd6 */ { PFM_REG_COUNTING, 0, 0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(6),0UL, 0UL, 0UL}}, -/* pmd7 */ { PFM_REG_COUNTING, 0, 0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(7),0UL, 0UL, 0UL}}, -/* pmd8 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}}, -/* pmd9 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}}, -/* pmd10 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}}, -/* pmd11 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}}, -/* pmd12 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}}, -/* pmd13 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}}, -/* pmd14 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}}, -/* pmd15 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}}, -/* pmd16 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}}, -/* pmd17 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(2)|RDEP(3),0UL, 0UL, 0UL}, {RDEP(11),0UL, 0UL, 0UL}}, - { PFM_REG_END , 0, 0UL, -1UL, NULL, NULL, {0,}, {0,}}, /* end marker */ -}; - -static int -pfm_ita_pmc_check(struct task_struct *task, pfm_context_t *ctx, unsigned int cnum, unsigned long *val, struct pt_regs *regs) -{ - int ret; - int is_loaded; - - /* sanitfy check */ - if (ctx == NULL) return -EINVAL; - - is_loaded = ctx->ctx_state == PFM_CTX_LOADED || ctx->ctx_state == PFM_CTX_MASKED; - - /* - * we must clear the (instruction) debug registers if pmc13.ta bit is cleared - * before they are written (fl_using_dbreg==0) to avoid picking up stale information. - */ - if (cnum == 13 && is_loaded && ((*val & 0x1) == 0UL) && ctx->ctx_fl_using_dbreg == 0) { - - DPRINT(("pmc[%d]=0x%lx has active pmc13.ta cleared, clearing ibr\n", cnum, *val)); - - /* don't mix debug with perfmon */ - if (task && (task->thread.flags & IA64_THREAD_DBG_VALID) != 0) return -EINVAL; - - /* - * a count of 0 will mark the debug registers as in use and also - * ensure that they are properly cleared. - */ - ret = pfm_write_ibr_dbr(1, ctx, NULL, 0, regs); - if (ret) return ret; - } - - /* - * we must clear the (data) debug registers if pmc11.pt bit is cleared - * before they are written (fl_using_dbreg==0) to avoid picking up stale information. - */ - if (cnum == 11 && is_loaded && ((*val >> 28)& 0x1) == 0 && ctx->ctx_fl_using_dbreg == 0) { - - DPRINT(("pmc[%d]=0x%lx has active pmc11.pt cleared, clearing dbr\n", cnum, *val)); - - /* don't mix debug with perfmon */ - if (task && (task->thread.flags & IA64_THREAD_DBG_VALID) != 0) return -EINVAL; - - /* - * a count of 0 will mark the debug registers as in use and also - * ensure that they are properly cleared. - */ - ret = pfm_write_ibr_dbr(0, ctx, NULL, 0, regs); - if (ret) return ret; - } - return 0; -} - -/* - * impl_pmcs, impl_pmds are computed at runtime to minimize errors! - */ -static pmu_config_t pmu_conf_ita={ - .pmu_name = "Itanium", - .pmu_family = 0x7, - .ovfl_val = (1UL << 32) - 1, - .pmd_desc = pfm_ita_pmd_desc, - .pmc_desc = pfm_ita_pmc_desc, - .num_ibrs = 8, - .num_dbrs = 8, - .use_rr_dbregs = 1, /* debug register are use for range retrictions */ -}; - - diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c deleted file mode 100644 index 9a5cd9fad3a9..000000000000 --- a/arch/ia64/kernel/process.c +++ /dev/null @@ -1,611 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Architecture-specific setup. - * - * Copyright (C) 1998-2003 Hewlett-Packard Co - * David Mosberger-Tang - * 04/11/17 Ashok Raj Added CPU Hotplug Support - * - * 2005-10-07 Keith Owens - * Add notify_die() hooks. - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "entry.h" - -#include "sigframe.h" - -void (*ia64_mark_idle)(int); - -unsigned long boot_option_idle_override = IDLE_NO_OVERRIDE; -EXPORT_SYMBOL(boot_option_idle_override); -void (*pm_power_off) (void); -EXPORT_SYMBOL(pm_power_off); - -static void -ia64_do_show_stack (struct unw_frame_info *info, void *arg) -{ - unsigned long ip, sp, bsp; - const char *loglvl = arg; - - printk("%s\nCall Trace:\n", loglvl); - do { - unw_get_ip(info, &ip); - if (ip == 0) - break; - - unw_get_sp(info, &sp); - unw_get_bsp(info, &bsp); - printk("%s [<%016lx>] %pS\n" - " sp=%016lx bsp=%016lx\n", - loglvl, ip, (void *)ip, sp, bsp); - } while (unw_unwind(info) >= 0); -} - -void -show_stack (struct task_struct *task, unsigned long *sp, const char *loglvl) -{ - if (!task) - unw_init_running(ia64_do_show_stack, (void *)loglvl); - else { - struct unw_frame_info info; - - unw_init_from_blocked_task(&info, task); - ia64_do_show_stack(&info, (void *)loglvl); - } -} - -void -show_regs (struct pt_regs *regs) -{ - unsigned long ip = regs->cr_iip + ia64_psr(regs)->ri; - - print_modules(); - printk("\n"); - show_regs_print_info(KERN_DEFAULT); - printk("psr : %016lx ifs : %016lx ip : [<%016lx>] %s (%s)\n", - regs->cr_ipsr, regs->cr_ifs, ip, print_tainted(), - init_utsname()->release); - printk("ip is at %pS\n", (void *)ip); - printk("unat: %016lx pfs : %016lx rsc : %016lx\n", - regs->ar_unat, regs->ar_pfs, regs->ar_rsc); - printk("rnat: %016lx bsps: %016lx pr : %016lx\n", - regs->ar_rnat, regs->ar_bspstore, regs->pr); - printk("ldrs: %016lx ccv : %016lx fpsr: %016lx\n", - regs->loadrs, regs->ar_ccv, regs->ar_fpsr); - printk("csd : %016lx ssd : %016lx\n", regs->ar_csd, regs->ar_ssd); - printk("b0 : %016lx b6 : %016lx b7 : %016lx\n", regs->b0, regs->b6, regs->b7); - printk("f6 : %05lx%016lx f7 : %05lx%016lx\n", - regs->f6.u.bits[1], regs->f6.u.bits[0], - regs->f7.u.bits[1], regs->f7.u.bits[0]); - printk("f8 : %05lx%016lx f9 : %05lx%016lx\n", - regs->f8.u.bits[1], regs->f8.u.bits[0], - regs->f9.u.bits[1], regs->f9.u.bits[0]); - printk("f10 : %05lx%016lx f11 : %05lx%016lx\n", - regs->f10.u.bits[1], regs->f10.u.bits[0], - regs->f11.u.bits[1], regs->f11.u.bits[0]); - - printk("r1 : %016lx r2 : %016lx r3 : %016lx\n", regs->r1, regs->r2, regs->r3); - printk("r8 : %016lx r9 : %016lx r10 : %016lx\n", regs->r8, regs->r9, regs->r10); - printk("r11 : %016lx r12 : %016lx r13 : %016lx\n", regs->r11, regs->r12, regs->r13); - printk("r14 : %016lx r15 : %016lx r16 : %016lx\n", regs->r14, regs->r15, regs->r16); - printk("r17 : %016lx r18 : %016lx r19 : %016lx\n", regs->r17, regs->r18, regs->r19); - printk("r20 : %016lx r21 : %016lx r22 : %016lx\n", regs->r20, regs->r21, regs->r22); - printk("r23 : %016lx r24 : %016lx r25 : %016lx\n", regs->r23, regs->r24, regs->r25); - printk("r26 : %016lx r27 : %016lx r28 : %016lx\n", regs->r26, regs->r27, regs->r28); - printk("r29 : %016lx r30 : %016lx r31 : %016lx\n", regs->r29, regs->r30, regs->r31); - - if (user_mode(regs)) { - /* print the stacked registers */ - unsigned long val, *bsp, ndirty; - int i, sof, is_nat = 0; - - sof = regs->cr_ifs & 0x7f; /* size of frame */ - ndirty = (regs->loadrs >> 19); - bsp = ia64_rse_skip_regs((unsigned long *) regs->ar_bspstore, ndirty); - for (i = 0; i < sof; ++i) { - get_user(val, (unsigned long __user *) ia64_rse_skip_regs(bsp, i)); - printk("r%-3u:%c%016lx%s", 32 + i, is_nat ? '*' : ' ', val, - ((i == sof - 1) || (i % 3) == 2) ? "\n" : " "); - } - } else - show_stack(NULL, NULL, KERN_DEFAULT); -} - -/* local support for deprecated console_print */ -void -console_print(const char *s) -{ - printk(KERN_EMERG "%s", s); -} - -void -do_notify_resume_user(sigset_t *unused, struct sigscratch *scr, long in_syscall) -{ - if (fsys_mode(current, &scr->pt)) { - /* - * defer signal-handling etc. until we return to - * privilege-level 0. - */ - if (!ia64_psr(&scr->pt)->lp) - ia64_psr(&scr->pt)->lp = 1; - return; - } - - /* deal with pending signal delivery */ - if (test_thread_flag(TIF_SIGPENDING) || - test_thread_flag(TIF_NOTIFY_SIGNAL)) { - local_irq_enable(); /* force interrupt enable */ - ia64_do_signal(scr, in_syscall); - } - - if (test_thread_flag(TIF_NOTIFY_RESUME)) { - local_irq_enable(); /* force interrupt enable */ - resume_user_mode_work(&scr->pt); - } - - /* copy user rbs to kernel rbs */ - if (unlikely(test_thread_flag(TIF_RESTORE_RSE))) { - local_irq_enable(); /* force interrupt enable */ - ia64_sync_krbs(); - } - - local_irq_disable(); /* force interrupt disable */ -} - -static int __init nohalt_setup(char * str) -{ - cpu_idle_poll_ctrl(true); - return 1; -} -__setup("nohalt", nohalt_setup); - -#ifdef CONFIG_HOTPLUG_CPU -/* We don't actually take CPU down, just spin without interrupts. */ -static inline void __noreturn play_dead(void) -{ - unsigned int this_cpu = smp_processor_id(); - - /* Ack it */ - __this_cpu_write(cpu_state, CPU_DEAD); - - max_xtp(); - local_irq_disable(); - idle_task_exit(); - ia64_jump_to_sal(&sal_boot_rendez_state[this_cpu]); - /* - * The above is a point of no-return, the processor is - * expected to be in SAL loop now. - */ - BUG(); -} -#else -static inline void __noreturn play_dead(void) -{ - BUG(); -} -#endif /* CONFIG_HOTPLUG_CPU */ - -void __noreturn arch_cpu_idle_dead(void) -{ - play_dead(); -} - -void arch_cpu_idle(void) -{ - void (*mark_idle)(int) = ia64_mark_idle; - -#ifdef CONFIG_SMP - min_xtp(); -#endif - rmb(); - if (mark_idle) - (*mark_idle)(1); - - raw_safe_halt(); - raw_local_irq_disable(); - - if (mark_idle) - (*mark_idle)(0); -#ifdef CONFIG_SMP - normal_xtp(); -#endif -} - -void -ia64_save_extra (struct task_struct *task) -{ - if ((task->thread.flags & IA64_THREAD_DBG_VALID) != 0) - ia64_save_debug_regs(&task->thread.dbr[0]); -} - -void -ia64_load_extra (struct task_struct *task) -{ - if ((task->thread.flags & IA64_THREAD_DBG_VALID) != 0) - ia64_load_debug_regs(&task->thread.dbr[0]); -} - -/* - * Copy the state of an ia-64 thread. - * - * We get here through the following call chain: - * - * from user-level: from kernel: - * - * - * sys_clone : - * kernel_clone kernel_clone - * copy_thread copy_thread - * - * This means that the stack layout is as follows: - * - * +---------------------+ (highest addr) - * | struct pt_regs | - * +---------------------+ - * | struct switch_stack | - * +---------------------+ - * | | - * | memory stack | - * | | <-- sp (lowest addr) - * +---------------------+ - * - * Observe that we copy the unat values that are in pt_regs and switch_stack. Spilling an - * integer to address X causes bit N in ar.unat to be set to the NaT bit of the register, - * with N=(X & 0x1ff)/8. Thus, copying the unat value preserves the NaT bits ONLY if the - * pt_regs structure in the parent is congruent to that of the child, modulo 512. Since - * the stack is page aligned and the page size is at least 4KB, this is always the case, - * so there is nothing to worry about. - */ -int -copy_thread(struct task_struct *p, const struct kernel_clone_args *args) -{ - unsigned long clone_flags = args->flags; - unsigned long user_stack_base = args->stack; - unsigned long user_stack_size = args->stack_size; - unsigned long tls = args->tls; - extern char ia64_ret_from_clone; - struct switch_stack *child_stack, *stack; - unsigned long rbs, child_rbs, rbs_size; - struct pt_regs *child_ptregs; - struct pt_regs *regs = current_pt_regs(); - int retval = 0; - - child_ptregs = (struct pt_regs *) ((unsigned long) p + IA64_STK_OFFSET) - 1; - child_stack = (struct switch_stack *) child_ptregs - 1; - - rbs = (unsigned long) current + IA64_RBS_OFFSET; - child_rbs = (unsigned long) p + IA64_RBS_OFFSET; - - /* copy parts of thread_struct: */ - p->thread.ksp = (unsigned long) child_stack - 16; - - /* - * NOTE: The calling convention considers all floating point - * registers in the high partition (fph) to be scratch. Since - * the only way to get to this point is through a system call, - * we know that the values in fph are all dead. Hence, there - * is no need to inherit the fph state from the parent to the - * child and all we have to do is to make sure that - * IA64_THREAD_FPH_VALID is cleared in the child. - * - * XXX We could push this optimization a bit further by - * clearing IA64_THREAD_FPH_VALID on ANY system call. - * However, it's not clear this is worth doing. Also, it - * would be a slight deviation from the normal Linux system - * call behavior where scratch registers are preserved across - * system calls (unless used by the system call itself). - */ -# define THREAD_FLAGS_TO_CLEAR (IA64_THREAD_FPH_VALID | IA64_THREAD_DBG_VALID \ - | IA64_THREAD_PM_VALID) -# define THREAD_FLAGS_TO_SET 0 - p->thread.flags = ((current->thread.flags & ~THREAD_FLAGS_TO_CLEAR) - | THREAD_FLAGS_TO_SET); - - ia64_drop_fpu(p); /* don't pick up stale state from a CPU's fph */ - - if (unlikely(args->fn)) { - if (unlikely(args->idle)) { - /* fork_idle() called us */ - return 0; - } - memset(child_stack, 0, sizeof(*child_ptregs) + sizeof(*child_stack)); - child_stack->r4 = (unsigned long) args->fn; - child_stack->r5 = (unsigned long) args->fn_arg; - /* - * Preserve PSR bits, except for bits 32-34 and 37-45, - * which we can't read. - */ - child_ptregs->cr_ipsr = ia64_getreg(_IA64_REG_PSR) | IA64_PSR_BN; - /* mark as valid, empty frame */ - child_ptregs->cr_ifs = 1UL << 63; - child_stack->ar_fpsr = child_ptregs->ar_fpsr - = ia64_getreg(_IA64_REG_AR_FPSR); - child_stack->pr = (1 << PRED_KERNEL_STACK); - child_stack->ar_bspstore = child_rbs; - child_stack->b0 = (unsigned long) &ia64_ret_from_clone; - - /* stop some PSR bits from being inherited. - * the psr.up/psr.pp bits must be cleared on fork but inherited on execve() - * therefore we must specify them explicitly here and not include them in - * IA64_PSR_BITS_TO_CLEAR. - */ - child_ptregs->cr_ipsr = ((child_ptregs->cr_ipsr | IA64_PSR_BITS_TO_SET) - & ~(IA64_PSR_BITS_TO_CLEAR | IA64_PSR_PP | IA64_PSR_UP)); - - return 0; - } - stack = ((struct switch_stack *) regs) - 1; - /* copy parent's switch_stack & pt_regs to child: */ - memcpy(child_stack, stack, sizeof(*child_ptregs) + sizeof(*child_stack)); - - /* copy the parent's register backing store to the child: */ - rbs_size = stack->ar_bspstore - rbs; - memcpy((void *) child_rbs, (void *) rbs, rbs_size); - if (clone_flags & CLONE_SETTLS) - child_ptregs->r13 = tls; - if (user_stack_base) { - child_ptregs->r12 = user_stack_base + user_stack_size - 16; - child_ptregs->ar_bspstore = user_stack_base; - child_ptregs->ar_rnat = 0; - child_ptregs->loadrs = 0; - } - child_stack->ar_bspstore = child_rbs + rbs_size; - child_stack->b0 = (unsigned long) &ia64_ret_from_clone; - - /* stop some PSR bits from being inherited. - * the psr.up/psr.pp bits must be cleared on fork but inherited on execve() - * therefore we must specify them explicitly here and not include them in - * IA64_PSR_BITS_TO_CLEAR. - */ - child_ptregs->cr_ipsr = ((child_ptregs->cr_ipsr | IA64_PSR_BITS_TO_SET) - & ~(IA64_PSR_BITS_TO_CLEAR | IA64_PSR_PP | IA64_PSR_UP)); - return retval; -} - -asmlinkage long ia64_clone(unsigned long clone_flags, unsigned long stack_start, - unsigned long stack_size, unsigned long parent_tidptr, - unsigned long child_tidptr, unsigned long tls) -{ - struct kernel_clone_args args = { - .flags = (lower_32_bits(clone_flags) & ~CSIGNAL), - .pidfd = (int __user *)parent_tidptr, - .child_tid = (int __user *)child_tidptr, - .parent_tid = (int __user *)parent_tidptr, - .exit_signal = (lower_32_bits(clone_flags) & CSIGNAL), - .stack = stack_start, - .stack_size = stack_size, - .tls = tls, - }; - - return kernel_clone(&args); -} - -static void -do_copy_task_regs (struct task_struct *task, struct unw_frame_info *info, void *arg) -{ - unsigned long mask, sp, nat_bits = 0, ar_rnat, urbs_end, cfm; - unsigned long ip; - elf_greg_t *dst = arg; - struct pt_regs *pt; - char nat; - int i; - - memset(dst, 0, sizeof(elf_gregset_t)); /* don't leak any kernel bits to user-level */ - - if (unw_unwind_to_user(info) < 0) - return; - - unw_get_sp(info, &sp); - pt = (struct pt_regs *) (sp + 16); - - urbs_end = ia64_get_user_rbs_end(task, pt, &cfm); - - if (ia64_sync_user_rbs(task, info->sw, pt->ar_bspstore, urbs_end) < 0) - return; - - ia64_peek(task, info->sw, urbs_end, (long) ia64_rse_rnat_addr((long *) urbs_end), - &ar_rnat); - - /* - * coredump format: - * r0-r31 - * NaT bits (for r0-r31; bit N == 1 iff rN is a NaT) - * predicate registers (p0-p63) - * b0-b7 - * ip cfm user-mask - * ar.rsc ar.bsp ar.bspstore ar.rnat - * ar.ccv ar.unat ar.fpsr ar.pfs ar.lc ar.ec - */ - - /* r0 is zero */ - for (i = 1, mask = (1UL << i); i < 32; ++i) { - unw_get_gr(info, i, &dst[i], &nat); - if (nat) - nat_bits |= mask; - mask <<= 1; - } - dst[32] = nat_bits; - unw_get_pr(info, &dst[33]); - - for (i = 0; i < 8; ++i) - unw_get_br(info, i, &dst[34 + i]); - - unw_get_rp(info, &ip); - dst[42] = ip + ia64_psr(pt)->ri; - dst[43] = cfm; - dst[44] = pt->cr_ipsr & IA64_PSR_UM; - - unw_get_ar(info, UNW_AR_RSC, &dst[45]); - /* - * For bsp and bspstore, unw_get_ar() would return the kernel - * addresses, but we need the user-level addresses instead: - */ - dst[46] = urbs_end; /* note: by convention PT_AR_BSP points to the end of the urbs! */ - dst[47] = pt->ar_bspstore; - dst[48] = ar_rnat; - unw_get_ar(info, UNW_AR_CCV, &dst[49]); - unw_get_ar(info, UNW_AR_UNAT, &dst[50]); - unw_get_ar(info, UNW_AR_FPSR, &dst[51]); - dst[52] = pt->ar_pfs; /* UNW_AR_PFS is == to pt->cr_ifs for interrupt frames */ - unw_get_ar(info, UNW_AR_LC, &dst[53]); - unw_get_ar(info, UNW_AR_EC, &dst[54]); - unw_get_ar(info, UNW_AR_CSD, &dst[55]); - unw_get_ar(info, UNW_AR_SSD, &dst[56]); -} - -static void -do_copy_regs (struct unw_frame_info *info, void *arg) -{ - do_copy_task_regs(current, info, arg); -} - -void -ia64_elf_core_copy_regs (struct pt_regs *pt, elf_gregset_t dst) -{ - unw_init_running(do_copy_regs, dst); -} - -/* - * Flush thread state. This is called when a thread does an execve(). - */ -void -flush_thread (void) -{ - /* drop floating-point and debug-register state if it exists: */ - current->thread.flags &= ~(IA64_THREAD_FPH_VALID | IA64_THREAD_DBG_VALID); - ia64_drop_fpu(current); -} - -/* - * Clean up state associated with a thread. This is called when - * the thread calls exit(). - */ -void -exit_thread (struct task_struct *tsk) -{ - - ia64_drop_fpu(tsk); -} - -unsigned long -__get_wchan (struct task_struct *p) -{ - struct unw_frame_info info; - unsigned long ip; - int count = 0; - - /* - * Note: p may not be a blocked task (it could be current or - * another process running on some other CPU. Rather than - * trying to determine if p is really blocked, we just assume - * it's blocked and rely on the unwind routines to fail - * gracefully if the process wasn't really blocked after all. - * --davidm 99/12/15 - */ - unw_init_from_blocked_task(&info, p); - do { - if (task_is_running(p)) - return 0; - if (unw_unwind(&info) < 0) - return 0; - unw_get_ip(&info, &ip); - if (!in_sched_functions(ip)) - return ip; - } while (count++ < 16); - return 0; -} - -void -cpu_halt (void) -{ - pal_power_mgmt_info_u_t power_info[8]; - unsigned long min_power; - int i, min_power_state; - - if (ia64_pal_halt_info(power_info) != 0) - return; - - min_power_state = 0; - min_power = power_info[0].pal_power_mgmt_info_s.power_consumption; - for (i = 1; i < 8; ++i) - if (power_info[i].pal_power_mgmt_info_s.im - && power_info[i].pal_power_mgmt_info_s.power_consumption < min_power) { - min_power = power_info[i].pal_power_mgmt_info_s.power_consumption; - min_power_state = i; - } - - while (1) - ia64_pal_halt(min_power_state); -} - -void machine_shutdown(void) -{ - smp_shutdown_nonboot_cpus(reboot_cpu); - -#ifdef CONFIG_KEXEC - kexec_disable_iosapic(); -#endif -} - -void -machine_restart (char *restart_cmd) -{ - (void) notify_die(DIE_MACHINE_RESTART, restart_cmd, NULL, 0, 0, 0); - efi_reboot(REBOOT_WARM, NULL); -} - -void -machine_halt (void) -{ - (void) notify_die(DIE_MACHINE_HALT, "", NULL, 0, 0, 0); - cpu_halt(); -} - -void -machine_power_off (void) -{ - do_kernel_power_off(); - machine_halt(); -} - -EXPORT_SYMBOL(ia64_delay_loop); diff --git a/arch/ia64/kernel/ptrace.c b/arch/ia64/kernel/ptrace.c deleted file mode 100644 index 4c41912c550f..000000000000 --- a/arch/ia64/kernel/ptrace.c +++ /dev/null @@ -1,2012 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Kernel support for the ptrace() and syscall tracing interfaces. - * - * Copyright (C) 1999-2005 Hewlett-Packard Co - * David Mosberger-Tang - * Copyright (C) 2006 Intel Co - * 2006-08-12 - IA64 Native Utrace implementation support added by - * Anil S Keshavamurthy - * - * Derived from the x86 and Alpha versions. - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include - -#include "entry.h" - -/* - * Bits in the PSR that we allow ptrace() to change: - * be, up, ac, mfl, mfh (the user mask; five bits total) - * db (debug breakpoint fault; one bit) - * id (instruction debug fault disable; one bit) - * dd (data debug fault disable; one bit) - * ri (restart instruction; two bits) - * is (instruction set; one bit) - */ -#define IPSR_MASK (IA64_PSR_UM | IA64_PSR_DB | IA64_PSR_IS \ - | IA64_PSR_ID | IA64_PSR_DD | IA64_PSR_RI) - -#define MASK(nbits) ((1UL << (nbits)) - 1) /* mask with NBITS bits set */ -#define PFM_MASK MASK(38) - -#define PTRACE_DEBUG 0 - -#if PTRACE_DEBUG -# define dprintk(format...) printk(format) -# define inline -#else -# define dprintk(format...) -#endif - -/* Return TRUE if PT was created due to kernel-entry via a system-call. */ - -static inline int -in_syscall (struct pt_regs *pt) -{ - return (long) pt->cr_ifs >= 0; -} - -/* - * Collect the NaT bits for r1-r31 from scratch_unat and return a NaT - * bitset where bit i is set iff the NaT bit of register i is set. - */ -unsigned long -ia64_get_scratch_nat_bits (struct pt_regs *pt, unsigned long scratch_unat) -{ -# define GET_BITS(first, last, unat) \ - ({ \ - unsigned long bit = ia64_unat_pos(&pt->r##first); \ - unsigned long nbits = (last - first + 1); \ - unsigned long mask = MASK(nbits) << first; \ - unsigned long dist; \ - if (bit < first) \ - dist = 64 + bit - first; \ - else \ - dist = bit - first; \ - ia64_rotr(unat, dist) & mask; \ - }) - unsigned long val; - - /* - * Registers that are stored consecutively in struct pt_regs - * can be handled in parallel. If the register order in - * struct_pt_regs changes, this code MUST be updated. - */ - val = GET_BITS( 1, 1, scratch_unat); - val |= GET_BITS( 2, 3, scratch_unat); - val |= GET_BITS(12, 13, scratch_unat); - val |= GET_BITS(14, 14, scratch_unat); - val |= GET_BITS(15, 15, scratch_unat); - val |= GET_BITS( 8, 11, scratch_unat); - val |= GET_BITS(16, 31, scratch_unat); - return val; - -# undef GET_BITS -} - -/* - * Set the NaT bits for the scratch registers according to NAT and - * return the resulting unat (assuming the scratch registers are - * stored in PT). - */ -unsigned long -ia64_put_scratch_nat_bits (struct pt_regs *pt, unsigned long nat) -{ -# define PUT_BITS(first, last, nat) \ - ({ \ - unsigned long bit = ia64_unat_pos(&pt->r##first); \ - unsigned long nbits = (last - first + 1); \ - unsigned long mask = MASK(nbits) << first; \ - long dist; \ - if (bit < first) \ - dist = 64 + bit - first; \ - else \ - dist = bit - first; \ - ia64_rotl(nat & mask, dist); \ - }) - unsigned long scratch_unat; - - /* - * Registers that are stored consecutively in struct pt_regs - * can be handled in parallel. If the register order in - * struct_pt_regs changes, this code MUST be updated. - */ - scratch_unat = PUT_BITS( 1, 1, nat); - scratch_unat |= PUT_BITS( 2, 3, nat); - scratch_unat |= PUT_BITS(12, 13, nat); - scratch_unat |= PUT_BITS(14, 14, nat); - scratch_unat |= PUT_BITS(15, 15, nat); - scratch_unat |= PUT_BITS( 8, 11, nat); - scratch_unat |= PUT_BITS(16, 31, nat); - - return scratch_unat; - -# undef PUT_BITS -} - -#define IA64_MLX_TEMPLATE 0x2 -#define IA64_MOVL_OPCODE 6 - -void -ia64_increment_ip (struct pt_regs *regs) -{ - unsigned long w0, ri = ia64_psr(regs)->ri + 1; - - if (ri > 2) { - ri = 0; - regs->cr_iip += 16; - } else if (ri == 2) { - get_user(w0, (char __user *) regs->cr_iip + 0); - if (((w0 >> 1) & 0xf) == IA64_MLX_TEMPLATE) { - /* - * rfi'ing to slot 2 of an MLX bundle causes - * an illegal operation fault. We don't want - * that to happen... - */ - ri = 0; - regs->cr_iip += 16; - } - } - ia64_psr(regs)->ri = ri; -} - -void -ia64_decrement_ip (struct pt_regs *regs) -{ - unsigned long w0, ri = ia64_psr(regs)->ri - 1; - - if (ia64_psr(regs)->ri == 0) { - regs->cr_iip -= 16; - ri = 2; - get_user(w0, (char __user *) regs->cr_iip + 0); - if (((w0 >> 1) & 0xf) == IA64_MLX_TEMPLATE) { - /* - * rfi'ing to slot 2 of an MLX bundle causes - * an illegal operation fault. We don't want - * that to happen... - */ - ri = 1; - } - } - ia64_psr(regs)->ri = ri; -} - -/* - * This routine is used to read an rnat bits that are stored on the - * kernel backing store. Since, in general, the alignment of the user - * and kernel are different, this is not completely trivial. In - * essence, we need to construct the user RNAT based on up to two - * kernel RNAT values and/or the RNAT value saved in the child's - * pt_regs. - * - * user rbs - * - * +--------+ <-- lowest address - * | slot62 | - * +--------+ - * | rnat | 0x....1f8 - * +--------+ - * | slot00 | \ - * +--------+ | - * | slot01 | > child_regs->ar_rnat - * +--------+ | - * | slot02 | / kernel rbs - * +--------+ +--------+ - * <- child_regs->ar_bspstore | slot61 | <-- krbs - * +- - - - + +--------+ - * | slot62 | - * +- - - - + +--------+ - * | rnat | - * +- - - - + +--------+ - * vrnat | slot00 | - * +- - - - + +--------+ - * = = - * +--------+ - * | slot00 | \ - * +--------+ | - * | slot01 | > child_stack->ar_rnat - * +--------+ | - * | slot02 | / - * +--------+ - * <--- child_stack->ar_bspstore - * - * The way to think of this code is as follows: bit 0 in the user rnat - * corresponds to some bit N (0 <= N <= 62) in one of the kernel rnat - * value. The kernel rnat value holding this bit is stored in - * variable rnat0. rnat1 is loaded with the kernel rnat value that - * form the upper bits of the user rnat value. - * - * Boundary cases: - * - * o when reading the rnat "below" the first rnat slot on the kernel - * backing store, rnat0/rnat1 are set to 0 and the low order bits are - * merged in from pt->ar_rnat. - * - * o when reading the rnat "above" the last rnat slot on the kernel - * backing store, rnat0/rnat1 gets its value from sw->ar_rnat. - */ -static unsigned long -get_rnat (struct task_struct *task, struct switch_stack *sw, - unsigned long *krbs, unsigned long *urnat_addr, - unsigned long *urbs_end) -{ - unsigned long rnat0 = 0, rnat1 = 0, urnat = 0, *slot0_kaddr; - unsigned long umask = 0, mask, m; - unsigned long *kbsp, *ubspstore, *rnat0_kaddr, *rnat1_kaddr, shift; - long num_regs, nbits; - struct pt_regs *pt; - - pt = task_pt_regs(task); - kbsp = (unsigned long *) sw->ar_bspstore; - ubspstore = (unsigned long *) pt->ar_bspstore; - - if (urbs_end < urnat_addr) - nbits = ia64_rse_num_regs(urnat_addr - 63, urbs_end); - else - nbits = 63; - mask = MASK(nbits); - /* - * First, figure out which bit number slot 0 in user-land maps - * to in the kernel rnat. Do this by figuring out how many - * register slots we're beyond the user's backingstore and - * then computing the equivalent address in kernel space. - */ - num_regs = ia64_rse_num_regs(ubspstore, urnat_addr + 1); - slot0_kaddr = ia64_rse_skip_regs(krbs, num_regs); - shift = ia64_rse_slot_num(slot0_kaddr); - rnat1_kaddr = ia64_rse_rnat_addr(slot0_kaddr); - rnat0_kaddr = rnat1_kaddr - 64; - - if (ubspstore + 63 > urnat_addr) { - /* some bits need to be merged in from pt->ar_rnat */ - umask = MASK(ia64_rse_slot_num(ubspstore)) & mask; - urnat = (pt->ar_rnat & umask); - mask &= ~umask; - if (!mask) - return urnat; - } - - m = mask << shift; - if (rnat0_kaddr >= kbsp) - rnat0 = sw->ar_rnat; - else if (rnat0_kaddr > krbs) - rnat0 = *rnat0_kaddr; - urnat |= (rnat0 & m) >> shift; - - m = mask >> (63 - shift); - if (rnat1_kaddr >= kbsp) - rnat1 = sw->ar_rnat; - else if (rnat1_kaddr > krbs) - rnat1 = *rnat1_kaddr; - urnat |= (rnat1 & m) << (63 - shift); - return urnat; -} - -/* - * The reverse of get_rnat. - */ -static void -put_rnat (struct task_struct *task, struct switch_stack *sw, - unsigned long *krbs, unsigned long *urnat_addr, unsigned long urnat, - unsigned long *urbs_end) -{ - unsigned long rnat0 = 0, rnat1 = 0, *slot0_kaddr, umask = 0, mask, m; - unsigned long *kbsp, *ubspstore, *rnat0_kaddr, *rnat1_kaddr, shift; - long num_regs, nbits; - struct pt_regs *pt; - unsigned long cfm, *urbs_kargs; - - pt = task_pt_regs(task); - kbsp = (unsigned long *) sw->ar_bspstore; - ubspstore = (unsigned long *) pt->ar_bspstore; - - urbs_kargs = urbs_end; - if (in_syscall(pt)) { - /* - * If entered via syscall, don't allow user to set rnat bits - * for syscall args. - */ - cfm = pt->cr_ifs; - urbs_kargs = ia64_rse_skip_regs(urbs_end, -(cfm & 0x7f)); - } - - if (urbs_kargs >= urnat_addr) - nbits = 63; - else { - if ((urnat_addr - 63) >= urbs_kargs) - return; - nbits = ia64_rse_num_regs(urnat_addr - 63, urbs_kargs); - } - mask = MASK(nbits); - - /* - * First, figure out which bit number slot 0 in user-land maps - * to in the kernel rnat. Do this by figuring out how many - * register slots we're beyond the user's backingstore and - * then computing the equivalent address in kernel space. - */ - num_regs = ia64_rse_num_regs(ubspstore, urnat_addr + 1); - slot0_kaddr = ia64_rse_skip_regs(krbs, num_regs); - shift = ia64_rse_slot_num(slot0_kaddr); - rnat1_kaddr = ia64_rse_rnat_addr(slot0_kaddr); - rnat0_kaddr = rnat1_kaddr - 64; - - if (ubspstore + 63 > urnat_addr) { - /* some bits need to be place in pt->ar_rnat: */ - umask = MASK(ia64_rse_slot_num(ubspstore)) & mask; - pt->ar_rnat = (pt->ar_rnat & ~umask) | (urnat & umask); - mask &= ~umask; - if (!mask) - return; - } - /* - * Note: Section 11.1 of the EAS guarantees that bit 63 of an - * rnat slot is ignored. so we don't have to clear it here. - */ - rnat0 = (urnat << shift); - m = mask << shift; - if (rnat0_kaddr >= kbsp) - sw->ar_rnat = (sw->ar_rnat & ~m) | (rnat0 & m); - else if (rnat0_kaddr > krbs) - *rnat0_kaddr = ((*rnat0_kaddr & ~m) | (rnat0 & m)); - - rnat1 = (urnat >> (63 - shift)); - m = mask >> (63 - shift); - if (rnat1_kaddr >= kbsp) - sw->ar_rnat = (sw->ar_rnat & ~m) | (rnat1 & m); - else if (rnat1_kaddr > krbs) - *rnat1_kaddr = ((*rnat1_kaddr & ~m) | (rnat1 & m)); -} - -static inline int -on_kernel_rbs (unsigned long addr, unsigned long bspstore, - unsigned long urbs_end) -{ - unsigned long *rnat_addr = ia64_rse_rnat_addr((unsigned long *) - urbs_end); - return (addr >= bspstore && addr <= (unsigned long) rnat_addr); -} - -/* - * Read a word from the user-level backing store of task CHILD. ADDR - * is the user-level address to read the word from, VAL a pointer to - * the return value, and USER_BSP gives the end of the user-level - * backing store (i.e., it's the address that would be in ar.bsp after - * the user executed a "cover" instruction). - * - * This routine takes care of accessing the kernel register backing - * store for those registers that got spilled there. It also takes - * care of calculating the appropriate RNaT collection words. - */ -long -ia64_peek (struct task_struct *child, struct switch_stack *child_stack, - unsigned long user_rbs_end, unsigned long addr, long *val) -{ - unsigned long *bspstore, *krbs, regnum, *laddr, *urbs_end, *rnat_addr; - struct pt_regs *child_regs; - size_t copied; - long ret; - - urbs_end = (long *) user_rbs_end; - laddr = (unsigned long *) addr; - child_regs = task_pt_regs(child); - bspstore = (unsigned long *) child_regs->ar_bspstore; - krbs = (unsigned long *) child + IA64_RBS_OFFSET/8; - if (on_kernel_rbs(addr, (unsigned long) bspstore, - (unsigned long) urbs_end)) - { - /* - * Attempt to read the RBS in an area that's actually - * on the kernel RBS => read the corresponding bits in - * the kernel RBS. - */ - rnat_addr = ia64_rse_rnat_addr(laddr); - ret = get_rnat(child, child_stack, krbs, rnat_addr, urbs_end); - - if (laddr == rnat_addr) { - /* return NaT collection word itself */ - *val = ret; - return 0; - } - - if (((1UL << ia64_rse_slot_num(laddr)) & ret) != 0) { - /* - * It is implementation dependent whether the - * data portion of a NaT value gets saved on a - * st8.spill or RSE spill (e.g., see EAS 2.6, - * 4.4.4.6 Register Spill and Fill). To get - * consistent behavior across all possible - * IA-64 implementations, we return zero in - * this case. - */ - *val = 0; - return 0; - } - - if (laddr < urbs_end) { - /* - * The desired word is on the kernel RBS and - * is not a NaT. - */ - regnum = ia64_rse_num_regs(bspstore, laddr); - *val = *ia64_rse_skip_regs(krbs, regnum); - return 0; - } - } - copied = access_process_vm(child, addr, &ret, sizeof(ret), FOLL_FORCE); - if (copied != sizeof(ret)) - return -EIO; - *val = ret; - return 0; -} - -long -ia64_poke (struct task_struct *child, struct switch_stack *child_stack, - unsigned long user_rbs_end, unsigned long addr, long val) -{ - unsigned long *bspstore, *krbs, regnum, *laddr; - unsigned long *urbs_end = (long *) user_rbs_end; - struct pt_regs *child_regs; - - laddr = (unsigned long *) addr; - child_regs = task_pt_regs(child); - bspstore = (unsigned long *) child_regs->ar_bspstore; - krbs = (unsigned long *) child + IA64_RBS_OFFSET/8; - if (on_kernel_rbs(addr, (unsigned long) bspstore, - (unsigned long) urbs_end)) - { - /* - * Attempt to write the RBS in an area that's actually - * on the kernel RBS => write the corresponding bits - * in the kernel RBS. - */ - if (ia64_rse_is_rnat_slot(laddr)) - put_rnat(child, child_stack, krbs, laddr, val, - urbs_end); - else { - if (laddr < urbs_end) { - regnum = ia64_rse_num_regs(bspstore, laddr); - *ia64_rse_skip_regs(krbs, regnum) = val; - } - } - } else if (access_process_vm(child, addr, &val, sizeof(val), - FOLL_FORCE | FOLL_WRITE) - != sizeof(val)) - return -EIO; - return 0; -} - -/* - * Calculate the address of the end of the user-level register backing - * store. This is the address that would have been stored in ar.bsp - * if the user had executed a "cover" instruction right before - * entering the kernel. If CFMP is not NULL, it is used to return the - * "current frame mask" that was active at the time the kernel was - * entered. - */ -unsigned long -ia64_get_user_rbs_end (struct task_struct *child, struct pt_regs *pt, - unsigned long *cfmp) -{ - unsigned long *krbs, *bspstore, cfm = pt->cr_ifs; - long ndirty; - - krbs = (unsigned long *) child + IA64_RBS_OFFSET/8; - bspstore = (unsigned long *) pt->ar_bspstore; - ndirty = ia64_rse_num_regs(krbs, krbs + (pt->loadrs >> 19)); - - if (in_syscall(pt)) - ndirty += (cfm & 0x7f); - else - cfm &= ~(1UL << 63); /* clear valid bit */ - - if (cfmp) - *cfmp = cfm; - return (unsigned long) ia64_rse_skip_regs(bspstore, ndirty); -} - -/* - * Synchronize (i.e, write) the RSE backing store living in kernel - * space to the VM of the CHILD task. SW and PT are the pointers to - * the switch_stack and pt_regs structures, respectively. - * USER_RBS_END is the user-level address at which the backing store - * ends. - */ -long -ia64_sync_user_rbs (struct task_struct *child, struct switch_stack *sw, - unsigned long user_rbs_start, unsigned long user_rbs_end) -{ - unsigned long addr, val; - long ret; - - /* now copy word for word from kernel rbs to user rbs: */ - for (addr = user_rbs_start; addr < user_rbs_end; addr += 8) { - ret = ia64_peek(child, sw, user_rbs_end, addr, &val); - if (ret < 0) - return ret; - if (access_process_vm(child, addr, &val, sizeof(val), - FOLL_FORCE | FOLL_WRITE) - != sizeof(val)) - return -EIO; - } - return 0; -} - -static long -ia64_sync_kernel_rbs (struct task_struct *child, struct switch_stack *sw, - unsigned long user_rbs_start, unsigned long user_rbs_end) -{ - unsigned long addr, val; - long ret; - - /* now copy word for word from user rbs to kernel rbs: */ - for (addr = user_rbs_start; addr < user_rbs_end; addr += 8) { - if (access_process_vm(child, addr, &val, sizeof(val), - FOLL_FORCE) - != sizeof(val)) - return -EIO; - - ret = ia64_poke(child, sw, user_rbs_end, addr, val); - if (ret < 0) - return ret; - } - return 0; -} - -typedef long (*syncfunc_t)(struct task_struct *, struct switch_stack *, - unsigned long, unsigned long); - -static void do_sync_rbs(struct unw_frame_info *info, void *arg) -{ - struct pt_regs *pt; - unsigned long urbs_end; - syncfunc_t fn = arg; - - if (unw_unwind_to_user(info) < 0) - return; - pt = task_pt_regs(info->task); - urbs_end = ia64_get_user_rbs_end(info->task, pt, NULL); - - fn(info->task, info->sw, pt->ar_bspstore, urbs_end); -} - -/* - * when a thread is stopped (ptraced), debugger might change thread's user - * stack (change memory directly), and we must avoid the RSE stored in kernel - * to override user stack (user space's RSE is newer than kernel's in the - * case). To workaround the issue, we copy kernel RSE to user RSE before the - * task is stopped, so user RSE has updated data. we then copy user RSE to - * kernel after the task is resummed from traced stop and kernel will use the - * newer RSE to return to user. TIF_RESTORE_RSE is the flag to indicate we need - * synchronize user RSE to kernel. - */ -void ia64_ptrace_stop(void) -{ - if (test_and_set_tsk_thread_flag(current, TIF_RESTORE_RSE)) - return; - set_notify_resume(current); - unw_init_running(do_sync_rbs, ia64_sync_user_rbs); -} - -/* - * This is called to read back the register backing store. - */ -void ia64_sync_krbs(void) -{ - clear_tsk_thread_flag(current, TIF_RESTORE_RSE); - - unw_init_running(do_sync_rbs, ia64_sync_kernel_rbs); -} - -/* - * Write f32-f127 back to task->thread.fph if it has been modified. - */ -inline void -ia64_flush_fph (struct task_struct *task) -{ - struct ia64_psr *psr = ia64_psr(task_pt_regs(task)); - - /* - * Prevent migrating this task while - * we're fiddling with the FPU state - */ - preempt_disable(); - if (ia64_is_local_fpu_owner(task) && psr->mfh) { - psr->mfh = 0; - task->thread.flags |= IA64_THREAD_FPH_VALID; - ia64_save_fpu(&task->thread.fph[0]); - } - preempt_enable(); -} - -/* - * Sync the fph state of the task so that it can be manipulated - * through thread.fph. If necessary, f32-f127 are written back to - * thread.fph or, if the fph state hasn't been used before, thread.fph - * is cleared to zeroes. Also, access to f32-f127 is disabled to - * ensure that the task picks up the state from thread.fph when it - * executes again. - */ -void -ia64_sync_fph (struct task_struct *task) -{ - struct ia64_psr *psr = ia64_psr(task_pt_regs(task)); - - ia64_flush_fph(task); - if (!(task->thread.flags & IA64_THREAD_FPH_VALID)) { - task->thread.flags |= IA64_THREAD_FPH_VALID; - memset(&task->thread.fph, 0, sizeof(task->thread.fph)); - } - ia64_drop_fpu(task); - psr->dfh = 1; -} - -/* - * Change the machine-state of CHILD such that it will return via the normal - * kernel exit-path, rather than the syscall-exit path. - */ -static void -convert_to_non_syscall (struct task_struct *child, struct pt_regs *pt, - unsigned long cfm) -{ - struct unw_frame_info info, prev_info; - unsigned long ip, sp, pr; - - unw_init_from_blocked_task(&info, child); - while (1) { - prev_info = info; - if (unw_unwind(&info) < 0) - return; - - unw_get_sp(&info, &sp); - if ((long)((unsigned long)child + IA64_STK_OFFSET - sp) - < IA64_PT_REGS_SIZE) { - dprintk("ptrace.%s: ran off the top of the kernel " - "stack\n", __func__); - return; - } - if (unw_get_pr (&prev_info, &pr) < 0) { - unw_get_rp(&prev_info, &ip); - dprintk("ptrace.%s: failed to read " - "predicate register (ip=0x%lx)\n", - __func__, ip); - return; - } - if (unw_is_intr_frame(&info) - && (pr & (1UL << PRED_USER_STACK))) - break; - } - - /* - * Note: at the time of this call, the target task is blocked - * in notify_resume_user() and by clearling PRED_LEAVE_SYSCALL - * (aka, "pLvSys") we redirect execution from - * .work_pending_syscall_end to .work_processed_kernel. - */ - unw_get_pr(&prev_info, &pr); - pr &= ~((1UL << PRED_SYSCALL) | (1UL << PRED_LEAVE_SYSCALL)); - pr |= (1UL << PRED_NON_SYSCALL); - unw_set_pr(&prev_info, pr); - - pt->cr_ifs = (1UL << 63) | cfm; - /* - * Clear the memory that is NOT written on syscall-entry to - * ensure we do not leak kernel-state to user when execution - * resumes. - */ - pt->r2 = 0; - pt->r3 = 0; - pt->r14 = 0; - memset(&pt->r16, 0, 16*8); /* clear r16-r31 */ - memset(&pt->f6, 0, 6*16); /* clear f6-f11 */ - pt->b7 = 0; - pt->ar_ccv = 0; - pt->ar_csd = 0; - pt->ar_ssd = 0; -} - -static int -access_nat_bits (struct task_struct *child, struct pt_regs *pt, - struct unw_frame_info *info, - unsigned long *data, int write_access) -{ - unsigned long regnum, nat_bits, scratch_unat, dummy = 0; - char nat = 0; - - if (write_access) { - nat_bits = *data; - scratch_unat = ia64_put_scratch_nat_bits(pt, nat_bits); - if (unw_set_ar(info, UNW_AR_UNAT, scratch_unat) < 0) { - dprintk("ptrace: failed to set ar.unat\n"); - return -1; - } - for (regnum = 4; regnum <= 7; ++regnum) { - unw_get_gr(info, regnum, &dummy, &nat); - unw_set_gr(info, regnum, dummy, - (nat_bits >> regnum) & 1); - } - } else { - if (unw_get_ar(info, UNW_AR_UNAT, &scratch_unat) < 0) { - dprintk("ptrace: failed to read ar.unat\n"); - return -1; - } - nat_bits = ia64_get_scratch_nat_bits(pt, scratch_unat); - for (regnum = 4; regnum <= 7; ++regnum) { - unw_get_gr(info, regnum, &dummy, &nat); - nat_bits |= (nat != 0) << regnum; - } - *data = nat_bits; - } - return 0; -} - -static int -access_elf_reg(struct task_struct *target, struct unw_frame_info *info, - unsigned long addr, unsigned long *data, int write_access); - -static long -ptrace_getregs (struct task_struct *child, struct pt_all_user_regs __user *ppr) -{ - unsigned long psr, ec, lc, rnat, bsp, cfm, nat_bits, val; - struct unw_frame_info info; - struct ia64_fpreg fpval; - struct switch_stack *sw; - struct pt_regs *pt; - long ret, retval = 0; - char nat = 0; - int i; - - if (!access_ok(ppr, sizeof(struct pt_all_user_regs))) - return -EIO; - - pt = task_pt_regs(child); - sw = (struct switch_stack *) (child->thread.ksp + 16); - unw_init_from_blocked_task(&info, child); - if (unw_unwind_to_user(&info) < 0) { - return -EIO; - } - - if (((unsigned long) ppr & 0x7) != 0) { - dprintk("ptrace:unaligned register address %p\n", ppr); - return -EIO; - } - - if (access_elf_reg(child, &info, ELF_CR_IPSR_OFFSET, &psr, 0) < 0 || - access_elf_reg(child, &info, ELF_AR_EC_OFFSET, &ec, 0) < 0 || - access_elf_reg(child, &info, ELF_AR_LC_OFFSET, &lc, 0) < 0 || - access_elf_reg(child, &info, ELF_AR_RNAT_OFFSET, &rnat, 0) < 0 || - access_elf_reg(child, &info, ELF_AR_BSP_OFFSET, &bsp, 0) < 0 || - access_elf_reg(child, &info, ELF_CFM_OFFSET, &cfm, 0) < 0 || - access_elf_reg(child, &info, ELF_NAT_OFFSET, &nat_bits, 0) < 0) - return -EIO; - - /* control regs */ - - retval |= __put_user(pt->cr_iip, &ppr->cr_iip); - retval |= __put_user(psr, &ppr->cr_ipsr); - - /* app regs */ - - retval |= __put_user(pt->ar_pfs, &ppr->ar[PT_AUR_PFS]); - retval |= __put_user(pt->ar_rsc, &ppr->ar[PT_AUR_RSC]); - retval |= __put_user(pt->ar_bspstore, &ppr->ar[PT_AUR_BSPSTORE]); - retval |= __put_user(pt->ar_unat, &ppr->ar[PT_AUR_UNAT]); - retval |= __put_user(pt->ar_ccv, &ppr->ar[PT_AUR_CCV]); - retval |= __put_user(pt->ar_fpsr, &ppr->ar[PT_AUR_FPSR]); - - retval |= __put_user(ec, &ppr->ar[PT_AUR_EC]); - retval |= __put_user(lc, &ppr->ar[PT_AUR_LC]); - retval |= __put_user(rnat, &ppr->ar[PT_AUR_RNAT]); - retval |= __put_user(bsp, &ppr->ar[PT_AUR_BSP]); - retval |= __put_user(cfm, &ppr->cfm); - - /* gr1-gr3 */ - - retval |= __copy_to_user(&ppr->gr[1], &pt->r1, sizeof(long)); - retval |= __copy_to_user(&ppr->gr[2], &pt->r2, sizeof(long) *2); - - /* gr4-gr7 */ - - for (i = 4; i < 8; i++) { - if (unw_access_gr(&info, i, &val, &nat, 0) < 0) - return -EIO; - retval |= __put_user(val, &ppr->gr[i]); - } - - /* gr8-gr11 */ - - retval |= __copy_to_user(&ppr->gr[8], &pt->r8, sizeof(long) * 4); - - /* gr12-gr15 */ - - retval |= __copy_to_user(&ppr->gr[12], &pt->r12, sizeof(long) * 2); - retval |= __copy_to_user(&ppr->gr[14], &pt->r14, sizeof(long)); - retval |= __copy_to_user(&ppr->gr[15], &pt->r15, sizeof(long)); - - /* gr16-gr31 */ - - retval |= __copy_to_user(&ppr->gr[16], &pt->r16, sizeof(long) * 16); - - /* b0 */ - - retval |= __put_user(pt->b0, &ppr->br[0]); - - /* b1-b5 */ - - for (i = 1; i < 6; i++) { - if (unw_access_br(&info, i, &val, 0) < 0) - return -EIO; - __put_user(val, &ppr->br[i]); - } - - /* b6-b7 */ - - retval |= __put_user(pt->b6, &ppr->br[6]); - retval |= __put_user(pt->b7, &ppr->br[7]); - - /* fr2-fr5 */ - - for (i = 2; i < 6; i++) { - if (unw_get_fr(&info, i, &fpval) < 0) - return -EIO; - retval |= __copy_to_user(&ppr->fr[i], &fpval, sizeof (fpval)); - } - - /* fr6-fr11 */ - - retval |= __copy_to_user(&ppr->fr[6], &pt->f6, - sizeof(struct ia64_fpreg) * 6); - - /* fp scratch regs(12-15) */ - - retval |= __copy_to_user(&ppr->fr[12], &sw->f12, - sizeof(struct ia64_fpreg) * 4); - - /* fr16-fr31 */ - - for (i = 16; i < 32; i++) { - if (unw_get_fr(&info, i, &fpval) < 0) - return -EIO; - retval |= __copy_to_user(&ppr->fr[i], &fpval, sizeof (fpval)); - } - - /* fph */ - - ia64_flush_fph(child); - retval |= __copy_to_user(&ppr->fr[32], &child->thread.fph, - sizeof(ppr->fr[32]) * 96); - - /* preds */ - - retval |= __put_user(pt->pr, &ppr->pr); - - /* nat bits */ - - retval |= __put_user(nat_bits, &ppr->nat); - - ret = retval ? -EIO : 0; - return ret; -} - -static long -ptrace_setregs (struct task_struct *child, struct pt_all_user_regs __user *ppr) -{ - unsigned long psr, rsc, ec, lc, rnat, bsp, cfm, nat_bits, val = 0; - struct unw_frame_info info; - struct switch_stack *sw; - struct ia64_fpreg fpval; - struct pt_regs *pt; - long retval = 0; - int i; - - memset(&fpval, 0, sizeof(fpval)); - - if (!access_ok(ppr, sizeof(struct pt_all_user_regs))) - return -EIO; - - pt = task_pt_regs(child); - sw = (struct switch_stack *) (child->thread.ksp + 16); - unw_init_from_blocked_task(&info, child); - if (unw_unwind_to_user(&info) < 0) { - return -EIO; - } - - if (((unsigned long) ppr & 0x7) != 0) { - dprintk("ptrace:unaligned register address %p\n", ppr); - return -EIO; - } - - /* control regs */ - - retval |= __get_user(pt->cr_iip, &ppr->cr_iip); - retval |= __get_user(psr, &ppr->cr_ipsr); - - /* app regs */ - - retval |= __get_user(pt->ar_pfs, &ppr->ar[PT_AUR_PFS]); - retval |= __get_user(rsc, &ppr->ar[PT_AUR_RSC]); - retval |= __get_user(pt->ar_bspstore, &ppr->ar[PT_AUR_BSPSTORE]); - retval |= __get_user(pt->ar_unat, &ppr->ar[PT_AUR_UNAT]); - retval |= __get_user(pt->ar_ccv, &ppr->ar[PT_AUR_CCV]); - retval |= __get_user(pt->ar_fpsr, &ppr->ar[PT_AUR_FPSR]); - - retval |= __get_user(ec, &ppr->ar[PT_AUR_EC]); - retval |= __get_user(lc, &ppr->ar[PT_AUR_LC]); - retval |= __get_user(rnat, &ppr->ar[PT_AUR_RNAT]); - retval |= __get_user(bsp, &ppr->ar[PT_AUR_BSP]); - retval |= __get_user(cfm, &ppr->cfm); - - /* gr1-gr3 */ - - retval |= __copy_from_user(&pt->r1, &ppr->gr[1], sizeof(long)); - retval |= __copy_from_user(&pt->r2, &ppr->gr[2], sizeof(long) * 2); - - /* gr4-gr7 */ - - for (i = 4; i < 8; i++) { - retval |= __get_user(val, &ppr->gr[i]); - /* NaT bit will be set via PT_NAT_BITS: */ - if (unw_set_gr(&info, i, val, 0) < 0) - return -EIO; - } - - /* gr8-gr11 */ - - retval |= __copy_from_user(&pt->r8, &ppr->gr[8], sizeof(long) * 4); - - /* gr12-gr15 */ - - retval |= __copy_from_user(&pt->r12, &ppr->gr[12], sizeof(long) * 2); - retval |= __copy_from_user(&pt->r14, &ppr->gr[14], sizeof(long)); - retval |= __copy_from_user(&pt->r15, &ppr->gr[15], sizeof(long)); - - /* gr16-gr31 */ - - retval |= __copy_from_user(&pt->r16, &ppr->gr[16], sizeof(long) * 16); - - /* b0 */ - - retval |= __get_user(pt->b0, &ppr->br[0]); - - /* b1-b5 */ - - for (i = 1; i < 6; i++) { - retval |= __get_user(val, &ppr->br[i]); - unw_set_br(&info, i, val); - } - - /* b6-b7 */ - - retval |= __get_user(pt->b6, &ppr->br[6]); - retval |= __get_user(pt->b7, &ppr->br[7]); - - /* fr2-fr5 */ - - for (i = 2; i < 6; i++) { - retval |= __copy_from_user(&fpval, &ppr->fr[i], sizeof(fpval)); - if (unw_set_fr(&info, i, fpval) < 0) - return -EIO; - } - - /* fr6-fr11 */ - - retval |= __copy_from_user(&pt->f6, &ppr->fr[6], - sizeof(ppr->fr[6]) * 6); - - /* fp scratch regs(12-15) */ - - retval |= __copy_from_user(&sw->f12, &ppr->fr[12], - sizeof(ppr->fr[12]) * 4); - - /* fr16-fr31 */ - - for (i = 16; i < 32; i++) { - retval |= __copy_from_user(&fpval, &ppr->fr[i], - sizeof(fpval)); - if (unw_set_fr(&info, i, fpval) < 0) - return -EIO; - } - - /* fph */ - - ia64_sync_fph(child); - retval |= __copy_from_user(&child->thread.fph, &ppr->fr[32], - sizeof(ppr->fr[32]) * 96); - - /* preds */ - - retval |= __get_user(pt->pr, &ppr->pr); - - /* nat bits */ - - retval |= __get_user(nat_bits, &ppr->nat); - - retval |= access_elf_reg(child, &info, ELF_CR_IPSR_OFFSET, &psr, 1); - retval |= access_elf_reg(child, &info, ELF_AR_RSC_OFFSET, &rsc, 1); - retval |= access_elf_reg(child, &info, ELF_AR_EC_OFFSET, &ec, 1); - retval |= access_elf_reg(child, &info, ELF_AR_LC_OFFSET, &lc, 1); - retval |= access_elf_reg(child, &info, ELF_AR_RNAT_OFFSET, &rnat, 1); - retval |= access_elf_reg(child, &info, ELF_AR_BSP_OFFSET, &bsp, 1); - retval |= access_elf_reg(child, &info, ELF_CFM_OFFSET, &cfm, 1); - retval |= access_elf_reg(child, &info, ELF_NAT_OFFSET, &nat_bits, 1); - - return retval ? -EIO : 0; -} - -void -user_enable_single_step (struct task_struct *child) -{ - struct ia64_psr *child_psr = ia64_psr(task_pt_regs(child)); - - set_tsk_thread_flag(child, TIF_SINGLESTEP); - child_psr->ss = 1; -} - -void -user_enable_block_step (struct task_struct *child) -{ - struct ia64_psr *child_psr = ia64_psr(task_pt_regs(child)); - - set_tsk_thread_flag(child, TIF_SINGLESTEP); - child_psr->tb = 1; -} - -void -user_disable_single_step (struct task_struct *child) -{ - struct ia64_psr *child_psr = ia64_psr(task_pt_regs(child)); - - /* make sure the single step/taken-branch trap bits are not set: */ - clear_tsk_thread_flag(child, TIF_SINGLESTEP); - child_psr->ss = 0; - child_psr->tb = 0; -} - -/* - * Called by kernel/ptrace.c when detaching.. - * - * Make sure the single step bit is not set. - */ -void -ptrace_disable (struct task_struct *child) -{ - user_disable_single_step(child); -} - -static int -access_uarea (struct task_struct *child, unsigned long addr, - unsigned long *data, int write_access); - -long -arch_ptrace (struct task_struct *child, long request, - unsigned long addr, unsigned long data) -{ - switch (request) { - case PTRACE_PEEKTEXT: - case PTRACE_PEEKDATA: - /* read word at location addr */ - if (ptrace_access_vm(child, addr, &data, sizeof(data), - FOLL_FORCE) - != sizeof(data)) - return -EIO; - /* ensure return value is not mistaken for error code */ - force_successful_syscall_return(); - return data; - - /* PTRACE_POKETEXT and PTRACE_POKEDATA is handled - * by the generic ptrace_request(). - */ - - case PTRACE_PEEKUSR: - /* read the word at addr in the USER area */ - if (access_uarea(child, addr, &data, 0) < 0) - return -EIO; - /* ensure return value is not mistaken for error code */ - force_successful_syscall_return(); - return data; - - case PTRACE_POKEUSR: - /* write the word at addr in the USER area */ - if (access_uarea(child, addr, &data, 1) < 0) - return -EIO; - return 0; - - case PTRACE_OLD_GETSIGINFO: - /* for backwards-compatibility */ - return ptrace_request(child, PTRACE_GETSIGINFO, addr, data); - - case PTRACE_OLD_SETSIGINFO: - /* for backwards-compatibility */ - return ptrace_request(child, PTRACE_SETSIGINFO, addr, data); - - case PTRACE_GETREGS: - return ptrace_getregs(child, - (struct pt_all_user_regs __user *) data); - - case PTRACE_SETREGS: - return ptrace_setregs(child, - (struct pt_all_user_regs __user *) data); - - default: - return ptrace_request(child, request, addr, data); - } -} - - -/* "asmlinkage" so the input arguments are preserved... */ - -asmlinkage long -syscall_trace_enter (long arg0, long arg1, long arg2, long arg3, - long arg4, long arg5, long arg6, long arg7, - struct pt_regs regs) -{ - if (test_thread_flag(TIF_SYSCALL_TRACE)) - if (ptrace_report_syscall_entry(®s)) - return -ENOSYS; - - /* copy user rbs to kernel rbs */ - if (test_thread_flag(TIF_RESTORE_RSE)) - ia64_sync_krbs(); - - - audit_syscall_entry(regs.r15, arg0, arg1, arg2, arg3); - - return 0; -} - -/* "asmlinkage" so the input arguments are preserved... */ - -asmlinkage void -syscall_trace_leave (long arg0, long arg1, long arg2, long arg3, - long arg4, long arg5, long arg6, long arg7, - struct pt_regs regs) -{ - int step; - - audit_syscall_exit(®s); - - step = test_thread_flag(TIF_SINGLESTEP); - if (step || test_thread_flag(TIF_SYSCALL_TRACE)) - ptrace_report_syscall_exit(®s, step); - - /* copy user rbs to kernel rbs */ - if (test_thread_flag(TIF_RESTORE_RSE)) - ia64_sync_krbs(); -} - -/* Utrace implementation starts here */ -struct regset_get { - void *kbuf; - void __user *ubuf; -}; - -struct regset_set { - const void *kbuf; - const void __user *ubuf; -}; - -struct regset_getset { - struct task_struct *target; - const struct user_regset *regset; - union { - struct regset_get get; - struct regset_set set; - } u; - unsigned int pos; - unsigned int count; - int ret; -}; - -static const ptrdiff_t pt_offsets[32] = -{ -#define R(n) offsetof(struct pt_regs, r##n) - [0] = -1, R(1), R(2), R(3), - [4] = -1, [5] = -1, [6] = -1, [7] = -1, - R(8), R(9), R(10), R(11), R(12), R(13), R(14), R(15), - R(16), R(17), R(18), R(19), R(20), R(21), R(22), R(23), - R(24), R(25), R(26), R(27), R(28), R(29), R(30), R(31), -#undef R -}; - -static int -access_elf_gpreg(struct task_struct *target, struct unw_frame_info *info, - unsigned long addr, unsigned long *data, int write_access) -{ - struct pt_regs *pt = task_pt_regs(target); - unsigned reg = addr / sizeof(unsigned long); - ptrdiff_t d = pt_offsets[reg]; - - if (d >= 0) { - unsigned long *ptr = (void *)pt + d; - if (write_access) - *ptr = *data; - else - *data = *ptr; - return 0; - } else { - char nat = 0; - if (write_access) { - /* read NaT bit first: */ - unsigned long dummy; - int ret = unw_get_gr(info, reg, &dummy, &nat); - if (ret < 0) - return ret; - } - return unw_access_gr(info, reg, data, &nat, write_access); - } -} - -static int -access_elf_breg(struct task_struct *target, struct unw_frame_info *info, - unsigned long addr, unsigned long *data, int write_access) -{ - struct pt_regs *pt; - unsigned long *ptr = NULL; - - pt = task_pt_regs(target); - switch (addr) { - case ELF_BR_OFFSET(0): - ptr = &pt->b0; - break; - case ELF_BR_OFFSET(1) ... ELF_BR_OFFSET(5): - return unw_access_br(info, (addr - ELF_BR_OFFSET(0))/8, - data, write_access); - case ELF_BR_OFFSET(6): - ptr = &pt->b6; - break; - case ELF_BR_OFFSET(7): - ptr = &pt->b7; - } - if (write_access) - *ptr = *data; - else - *data = *ptr; - return 0; -} - -static int -access_elf_areg(struct task_struct *target, struct unw_frame_info *info, - unsigned long addr, unsigned long *data, int write_access) -{ - struct pt_regs *pt; - unsigned long cfm, urbs_end; - unsigned long *ptr = NULL; - - pt = task_pt_regs(target); - if (addr >= ELF_AR_RSC_OFFSET && addr <= ELF_AR_SSD_OFFSET) { - switch (addr) { - case ELF_AR_RSC_OFFSET: - /* force PL3 */ - if (write_access) - pt->ar_rsc = *data | (3 << 2); - else - *data = pt->ar_rsc; - return 0; - case ELF_AR_BSP_OFFSET: - /* - * By convention, we use PT_AR_BSP to refer to - * the end of the user-level backing store. - * Use ia64_rse_skip_regs(PT_AR_BSP, -CFM.sof) - * to get the real value of ar.bsp at the time - * the kernel was entered. - * - * Furthermore, when changing the contents of - * PT_AR_BSP (or PT_CFM) while the task is - * blocked in a system call, convert the state - * so that the non-system-call exit - * path is used. This ensures that the proper - * state will be picked up when resuming - * execution. However, it *also* means that - * once we write PT_AR_BSP/PT_CFM, it won't be - * possible to modify the syscall arguments of - * the pending system call any longer. This - * shouldn't be an issue because modifying - * PT_AR_BSP/PT_CFM generally implies that - * we're either abandoning the pending system - * call or that we defer it's re-execution - * (e.g., due to GDB doing an inferior - * function call). - */ - urbs_end = ia64_get_user_rbs_end(target, pt, &cfm); - if (write_access) { - if (*data != urbs_end) { - if (in_syscall(pt)) - convert_to_non_syscall(target, - pt, - cfm); - /* - * Simulate user-level write - * of ar.bsp: - */ - pt->loadrs = 0; - pt->ar_bspstore = *data; - } - } else - *data = urbs_end; - return 0; - case ELF_AR_BSPSTORE_OFFSET: - ptr = &pt->ar_bspstore; - break; - case ELF_AR_RNAT_OFFSET: - ptr = &pt->ar_rnat; - break; - case ELF_AR_CCV_OFFSET: - ptr = &pt->ar_ccv; - break; - case ELF_AR_UNAT_OFFSET: - ptr = &pt->ar_unat; - break; - case ELF_AR_FPSR_OFFSET: - ptr = &pt->ar_fpsr; - break; - case ELF_AR_PFS_OFFSET: - ptr = &pt->ar_pfs; - break; - case ELF_AR_LC_OFFSET: - return unw_access_ar(info, UNW_AR_LC, data, - write_access); - case ELF_AR_EC_OFFSET: - return unw_access_ar(info, UNW_AR_EC, data, - write_access); - case ELF_AR_CSD_OFFSET: - ptr = &pt->ar_csd; - break; - case ELF_AR_SSD_OFFSET: - ptr = &pt->ar_ssd; - } - } else if (addr >= ELF_CR_IIP_OFFSET && addr <= ELF_CR_IPSR_OFFSET) { - switch (addr) { - case ELF_CR_IIP_OFFSET: - ptr = &pt->cr_iip; - break; - case ELF_CFM_OFFSET: - urbs_end = ia64_get_user_rbs_end(target, pt, &cfm); - if (write_access) { - if (((cfm ^ *data) & PFM_MASK) != 0) { - if (in_syscall(pt)) - convert_to_non_syscall(target, - pt, - cfm); - pt->cr_ifs = ((pt->cr_ifs & ~PFM_MASK) - | (*data & PFM_MASK)); - } - } else - *data = cfm; - return 0; - case ELF_CR_IPSR_OFFSET: - if (write_access) { - unsigned long tmp = *data; - /* psr.ri==3 is a reserved value: SDM 2:25 */ - if ((tmp & IA64_PSR_RI) == IA64_PSR_RI) - tmp &= ~IA64_PSR_RI; - pt->cr_ipsr = ((tmp & IPSR_MASK) - | (pt->cr_ipsr & ~IPSR_MASK)); - } else - *data = (pt->cr_ipsr & IPSR_MASK); - return 0; - } - } else if (addr == ELF_NAT_OFFSET) - return access_nat_bits(target, pt, info, - data, write_access); - else if (addr == ELF_PR_OFFSET) - ptr = &pt->pr; - else - return -1; - - if (write_access) - *ptr = *data; - else - *data = *ptr; - - return 0; -} - -static int -access_elf_reg(struct task_struct *target, struct unw_frame_info *info, - unsigned long addr, unsigned long *data, int write_access) -{ - if (addr >= ELF_GR_OFFSET(1) && addr <= ELF_GR_OFFSET(31)) - return access_elf_gpreg(target, info, addr, data, write_access); - else if (addr >= ELF_BR_OFFSET(0) && addr <= ELF_BR_OFFSET(7)) - return access_elf_breg(target, info, addr, data, write_access); - else - return access_elf_areg(target, info, addr, data, write_access); -} - -struct regset_membuf { - struct membuf to; - int ret; -}; - -static void do_gpregs_get(struct unw_frame_info *info, void *arg) -{ - struct regset_membuf *dst = arg; - struct membuf to = dst->to; - unsigned int n; - elf_greg_t reg; - - if (unw_unwind_to_user(info) < 0) - return; - - /* - * coredump format: - * r0-r31 - * NaT bits (for r0-r31; bit N == 1 iff rN is a NaT) - * predicate registers (p0-p63) - * b0-b7 - * ip cfm user-mask - * ar.rsc ar.bsp ar.bspstore ar.rnat - * ar.ccv ar.unat ar.fpsr ar.pfs ar.lc ar.ec - */ - - - /* Skip r0 */ - membuf_zero(&to, 8); - for (n = 8; to.left && n < ELF_AR_END_OFFSET; n += 8) { - if (access_elf_reg(info->task, info, n, ®, 0) < 0) { - dst->ret = -EIO; - return; - } - membuf_store(&to, reg); - } -} - -static void do_gpregs_set(struct unw_frame_info *info, void *arg) -{ - struct regset_getset *dst = arg; - - if (unw_unwind_to_user(info) < 0) - return; - - if (!dst->count) - return; - /* Skip r0 */ - if (dst->pos < ELF_GR_OFFSET(1)) { - user_regset_copyin_ignore(&dst->pos, &dst->count, - &dst->u.set.kbuf, &dst->u.set.ubuf, - 0, ELF_GR_OFFSET(1)); - dst->ret = 0; - } - - while (dst->count && dst->pos < ELF_AR_END_OFFSET) { - unsigned int n, from, to; - elf_greg_t tmp[16]; - - from = dst->pos; - to = from + sizeof(tmp); - if (to > ELF_AR_END_OFFSET) - to = ELF_AR_END_OFFSET; - /* get up to 16 values */ - dst->ret = user_regset_copyin(&dst->pos, &dst->count, - &dst->u.set.kbuf, &dst->u.set.ubuf, tmp, - from, to); - if (dst->ret) - return; - /* now copy them into registers */ - for (n = 0; from < dst->pos; from += sizeof(elf_greg_t), n++) - if (access_elf_reg(dst->target, info, from, - &tmp[n], 1) < 0) { - dst->ret = -EIO; - return; - } - } -} - -#define ELF_FP_OFFSET(i) (i * sizeof(elf_fpreg_t)) - -static void do_fpregs_get(struct unw_frame_info *info, void *arg) -{ - struct task_struct *task = info->task; - struct regset_membuf *dst = arg; - struct membuf to = dst->to; - elf_fpreg_t reg; - unsigned int n; - - if (unw_unwind_to_user(info) < 0) - return; - - /* Skip pos 0 and 1 */ - membuf_zero(&to, 2 * sizeof(elf_fpreg_t)); - - /* fr2-fr31 */ - for (n = 2; to.left && n < 32; n++) { - if (unw_get_fr(info, n, ®)) { - dst->ret = -EIO; - return; - } - membuf_write(&to, ®, sizeof(reg)); - } - - /* fph */ - if (!to.left) - return; - - ia64_flush_fph(task); - if (task->thread.flags & IA64_THREAD_FPH_VALID) - membuf_write(&to, &task->thread.fph, 96 * sizeof(reg)); - else - membuf_zero(&to, 96 * sizeof(reg)); -} - -static void do_fpregs_set(struct unw_frame_info *info, void *arg) -{ - struct regset_getset *dst = arg; - elf_fpreg_t fpreg, tmp[30]; - int index, start, end; - - if (unw_unwind_to_user(info) < 0) - return; - - /* Skip pos 0 and 1 */ - if (dst->count > 0 && dst->pos < ELF_FP_OFFSET(2)) { - user_regset_copyin_ignore(&dst->pos, &dst->count, - &dst->u.set.kbuf, &dst->u.set.ubuf, - 0, ELF_FP_OFFSET(2)); - dst->ret = 0; - if (dst->count == 0) - return; - } - - /* fr2-fr31 */ - if (dst->count > 0 && dst->pos < ELF_FP_OFFSET(32)) { - start = dst->pos; - end = min(((unsigned int)ELF_FP_OFFSET(32)), - dst->pos + dst->count); - dst->ret = user_regset_copyin(&dst->pos, &dst->count, - &dst->u.set.kbuf, &dst->u.set.ubuf, tmp, - ELF_FP_OFFSET(2), ELF_FP_OFFSET(32)); - if (dst->ret) - return; - - if (start & 0xF) { /* only write high part */ - if (unw_get_fr(info, start / sizeof(elf_fpreg_t), - &fpreg)) { - dst->ret = -EIO; - return; - } - tmp[start / sizeof(elf_fpreg_t) - 2].u.bits[0] - = fpreg.u.bits[0]; - start &= ~0xFUL; - } - if (end & 0xF) { /* only write low part */ - if (unw_get_fr(info, end / sizeof(elf_fpreg_t), - &fpreg)) { - dst->ret = -EIO; - return; - } - tmp[end / sizeof(elf_fpreg_t) - 2].u.bits[1] - = fpreg.u.bits[1]; - end = (end + 0xF) & ~0xFUL; - } - - for ( ; start < end ; start += sizeof(elf_fpreg_t)) { - index = start / sizeof(elf_fpreg_t); - if (unw_set_fr(info, index, tmp[index - 2])) { - dst->ret = -EIO; - return; - } - } - if (dst->ret || dst->count == 0) - return; - } - - /* fph */ - if (dst->count > 0 && dst->pos < ELF_FP_OFFSET(128)) { - ia64_sync_fph(dst->target); - dst->ret = user_regset_copyin(&dst->pos, &dst->count, - &dst->u.set.kbuf, - &dst->u.set.ubuf, - &dst->target->thread.fph, - ELF_FP_OFFSET(32), -1); - } -} - -static void -unwind_and_call(void (*call)(struct unw_frame_info *, void *), - struct task_struct *target, void *data) -{ - if (target == current) - unw_init_running(call, data); - else { - struct unw_frame_info info; - memset(&info, 0, sizeof(info)); - unw_init_from_blocked_task(&info, target); - (*call)(&info, data); - } -} - -static int -do_regset_call(void (*call)(struct unw_frame_info *, void *), - struct task_struct *target, - const struct user_regset *regset, - unsigned int pos, unsigned int count, - const void *kbuf, const void __user *ubuf) -{ - struct regset_getset info = { .target = target, .regset = regset, - .pos = pos, .count = count, - .u.set = { .kbuf = kbuf, .ubuf = ubuf }, - .ret = 0 }; - unwind_and_call(call, target, &info); - return info.ret; -} - -static int -gpregs_get(struct task_struct *target, - const struct user_regset *regset, - struct membuf to) -{ - struct regset_membuf info = {.to = to}; - unwind_and_call(do_gpregs_get, target, &info); - return info.ret; -} - -static int gpregs_set(struct task_struct *target, - const struct user_regset *regset, - unsigned int pos, unsigned int count, - const void *kbuf, const void __user *ubuf) -{ - return do_regset_call(do_gpregs_set, target, regset, pos, count, - kbuf, ubuf); -} - -static void do_gpregs_writeback(struct unw_frame_info *info, void *arg) -{ - do_sync_rbs(info, ia64_sync_user_rbs); -} - -/* - * This is called to write back the register backing store. - * ptrace does this before it stops, so that a tracer reading the user - * memory after the thread stops will get the current register data. - */ -static int -gpregs_writeback(struct task_struct *target, - const struct user_regset *regset, - int now) -{ - if (test_and_set_tsk_thread_flag(target, TIF_RESTORE_RSE)) - return 0; - set_notify_resume(target); - return do_regset_call(do_gpregs_writeback, target, regset, 0, 0, - NULL, NULL); -} - -static int -fpregs_active(struct task_struct *target, const struct user_regset *regset) -{ - return (target->thread.flags & IA64_THREAD_FPH_VALID) ? 128 : 32; -} - -static int fpregs_get(struct task_struct *target, - const struct user_regset *regset, - struct membuf to) -{ - struct regset_membuf info = {.to = to}; - unwind_and_call(do_fpregs_get, target, &info); - return info.ret; -} - -static int fpregs_set(struct task_struct *target, - const struct user_regset *regset, - unsigned int pos, unsigned int count, - const void *kbuf, const void __user *ubuf) -{ - return do_regset_call(do_fpregs_set, target, regset, pos, count, - kbuf, ubuf); -} - -static int -access_uarea(struct task_struct *child, unsigned long addr, - unsigned long *data, int write_access) -{ - unsigned int pos = -1; /* an invalid value */ - unsigned long *ptr, regnum; - - if ((addr & 0x7) != 0) { - dprintk("ptrace: unaligned register address 0x%lx\n", addr); - return -1; - } - if ((addr >= PT_NAT_BITS + 8 && addr < PT_F2) || - (addr >= PT_R7 + 8 && addr < PT_B1) || - (addr >= PT_AR_LC + 8 && addr < PT_CR_IPSR) || - (addr >= PT_AR_SSD + 8 && addr < PT_DBR)) { - dprintk("ptrace: rejecting access to register " - "address 0x%lx\n", addr); - return -1; - } - - switch (addr) { - case PT_F32 ... (PT_F127 + 15): - pos = addr - PT_F32 + ELF_FP_OFFSET(32); - break; - case PT_F2 ... (PT_F5 + 15): - pos = addr - PT_F2 + ELF_FP_OFFSET(2); - break; - case PT_F10 ... (PT_F31 + 15): - pos = addr - PT_F10 + ELF_FP_OFFSET(10); - break; - case PT_F6 ... (PT_F9 + 15): - pos = addr - PT_F6 + ELF_FP_OFFSET(6); - break; - } - - if (pos != -1) { - unsigned reg = pos / sizeof(elf_fpreg_t); - int which_half = (pos / sizeof(unsigned long)) & 1; - - if (reg < 32) { /* fr2-fr31 */ - struct unw_frame_info info; - elf_fpreg_t fpreg; - - memset(&info, 0, sizeof(info)); - unw_init_from_blocked_task(&info, child); - if (unw_unwind_to_user(&info) < 0) - return 0; - - if (unw_get_fr(&info, reg, &fpreg)) - return -1; - if (write_access) { - fpreg.u.bits[which_half] = *data; - if (unw_set_fr(&info, reg, fpreg)) - return -1; - } else { - *data = fpreg.u.bits[which_half]; - } - } else { /* fph */ - elf_fpreg_t *p = &child->thread.fph[reg - 32]; - unsigned long *bits = &p->u.bits[which_half]; - - ia64_sync_fph(child); - if (write_access) - *bits = *data; - else if (child->thread.flags & IA64_THREAD_FPH_VALID) - *data = *bits; - else - *data = 0; - } - return 0; - } - - switch (addr) { - case PT_NAT_BITS: - pos = ELF_NAT_OFFSET; - break; - case PT_R4 ... PT_R7: - pos = addr - PT_R4 + ELF_GR_OFFSET(4); - break; - case PT_B1 ... PT_B5: - pos = addr - PT_B1 + ELF_BR_OFFSET(1); - break; - case PT_AR_EC: - pos = ELF_AR_EC_OFFSET; - break; - case PT_AR_LC: - pos = ELF_AR_LC_OFFSET; - break; - case PT_CR_IPSR: - pos = ELF_CR_IPSR_OFFSET; - break; - case PT_CR_IIP: - pos = ELF_CR_IIP_OFFSET; - break; - case PT_CFM: - pos = ELF_CFM_OFFSET; - break; - case PT_AR_UNAT: - pos = ELF_AR_UNAT_OFFSET; - break; - case PT_AR_PFS: - pos = ELF_AR_PFS_OFFSET; - break; - case PT_AR_RSC: - pos = ELF_AR_RSC_OFFSET; - break; - case PT_AR_RNAT: - pos = ELF_AR_RNAT_OFFSET; - break; - case PT_AR_BSPSTORE: - pos = ELF_AR_BSPSTORE_OFFSET; - break; - case PT_PR: - pos = ELF_PR_OFFSET; - break; - case PT_B6: - pos = ELF_BR_OFFSET(6); - break; - case PT_AR_BSP: - pos = ELF_AR_BSP_OFFSET; - break; - case PT_R1 ... PT_R3: - pos = addr - PT_R1 + ELF_GR_OFFSET(1); - break; - case PT_R12 ... PT_R15: - pos = addr - PT_R12 + ELF_GR_OFFSET(12); - break; - case PT_R8 ... PT_R11: - pos = addr - PT_R8 + ELF_GR_OFFSET(8); - break; - case PT_R16 ... PT_R31: - pos = addr - PT_R16 + ELF_GR_OFFSET(16); - break; - case PT_AR_CCV: - pos = ELF_AR_CCV_OFFSET; - break; - case PT_AR_FPSR: - pos = ELF_AR_FPSR_OFFSET; - break; - case PT_B0: - pos = ELF_BR_OFFSET(0); - break; - case PT_B7: - pos = ELF_BR_OFFSET(7); - break; - case PT_AR_CSD: - pos = ELF_AR_CSD_OFFSET; - break; - case PT_AR_SSD: - pos = ELF_AR_SSD_OFFSET; - break; - } - - if (pos != -1) { - struct unw_frame_info info; - - memset(&info, 0, sizeof(info)); - unw_init_from_blocked_task(&info, child); - if (unw_unwind_to_user(&info) < 0) - return 0; - - return access_elf_reg(child, &info, pos, data, write_access); - } - - /* access debug registers */ - if (addr >= PT_IBR) { - regnum = (addr - PT_IBR) >> 3; - ptr = &child->thread.ibr[0]; - } else { - regnum = (addr - PT_DBR) >> 3; - ptr = &child->thread.dbr[0]; - } - - if (regnum >= 8) { - dprintk("ptrace: rejecting access to register " - "address 0x%lx\n", addr); - return -1; - } - - if (!(child->thread.flags & IA64_THREAD_DBG_VALID)) { - child->thread.flags |= IA64_THREAD_DBG_VALID; - memset(child->thread.dbr, 0, - sizeof(child->thread.dbr)); - memset(child->thread.ibr, 0, - sizeof(child->thread.ibr)); - } - - ptr += regnum; - - if ((regnum & 1) && write_access) { - /* don't let the user set kernel-level breakpoints: */ - *ptr = *data & ~(7UL << 56); - return 0; - } - if (write_access) - *ptr = *data; - else - *data = *ptr; - return 0; -} - -static const struct user_regset native_regsets[] = { - { - .core_note_type = NT_PRSTATUS, - .n = ELF_NGREG, - .size = sizeof(elf_greg_t), .align = sizeof(elf_greg_t), - .regset_get = gpregs_get, .set = gpregs_set, - .writeback = gpregs_writeback - }, - { - .core_note_type = NT_PRFPREG, - .n = ELF_NFPREG, - .size = sizeof(elf_fpreg_t), .align = sizeof(elf_fpreg_t), - .regset_get = fpregs_get, .set = fpregs_set, .active = fpregs_active - }, -}; - -static const struct user_regset_view user_ia64_view = { - .name = "ia64", - .e_machine = EM_IA_64, - .regsets = native_regsets, .n = ARRAY_SIZE(native_regsets) -}; - -const struct user_regset_view *task_user_regset_view(struct task_struct *tsk) -{ - return &user_ia64_view; -} - -struct syscall_get_args { - unsigned int i; - unsigned int n; - unsigned long *args; - struct pt_regs *regs; -}; - -static void syscall_get_args_cb(struct unw_frame_info *info, void *data) -{ - struct syscall_get_args *args = data; - struct pt_regs *pt = args->regs; - unsigned long *krbs, cfm, ndirty, nlocals, nouts; - int i, count; - - if (unw_unwind_to_user(info) < 0) - return; - - /* - * We get here via a few paths: - * - break instruction: cfm is shared with caller. - * syscall args are in out= regs, locals are non-empty. - * - epsinstruction: cfm is set by br.call - * locals don't exist. - * - * For both cases arguments are reachable in cfm.sof - cfm.sol. - * CFM: [ ... | sor: 17..14 | sol : 13..7 | sof : 6..0 ] - */ - cfm = pt->cr_ifs; - nlocals = (cfm >> 7) & 0x7f; /* aka sol */ - nouts = (cfm & 0x7f) - nlocals; /* aka sof - sol */ - krbs = (unsigned long *)info->task + IA64_RBS_OFFSET/8; - ndirty = ia64_rse_num_regs(krbs, krbs + (pt->loadrs >> 19)); - - count = 0; - if (in_syscall(pt)) - count = min_t(int, args->n, nouts); - - /* Iterate over outs. */ - for (i = 0; i < count; i++) { - int j = ndirty + nlocals + i + args->i; - args->args[i] = *ia64_rse_skip_regs(krbs, j); - } - - while (i < args->n) { - args->args[i] = 0; - i++; - } -} - -void syscall_get_arguments(struct task_struct *task, - struct pt_regs *regs, unsigned long *args) -{ - struct syscall_get_args data = { - .i = 0, - .n = 6, - .args = args, - .regs = regs, - }; - - if (task == current) - unw_init_running(syscall_get_args_cb, &data); - else { - struct unw_frame_info ufi; - memset(&ufi, 0, sizeof(ufi)); - unw_init_from_blocked_task(&ufi, task); - syscall_get_args_cb(&ufi, &data); - } -} diff --git a/arch/ia64/kernel/relocate_kernel.S b/arch/ia64/kernel/relocate_kernel.S deleted file mode 100644 index 527a7b896a6e..000000000000 --- a/arch/ia64/kernel/relocate_kernel.S +++ /dev/null @@ -1,321 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * arch/ia64/kernel/relocate_kernel.S - * - * Relocate kexec'able kernel and start it - * - * Copyright (C) 2005 Hewlett-Packard Development Company, L.P. - * Copyright (C) 2005 Khalid Aziz - * Copyright (C) 2005 Intel Corp, Zou Nan hai - */ -#include -#include -#include -#include -#include - - /* Must be relocatable PIC code callable as a C function - */ -GLOBAL_ENTRY(relocate_new_kernel) - .prologue - alloc r31=ar.pfs,4,0,0,0 - .body -.reloc_entry: -{ - rsm psr.i| psr.ic - mov r2=ip -} - ;; -{ - flushrs // must be first insn in group - srlz.i -} - ;; - dep r2=0,r2,61,3 //to physical address - ;; - //first switch to physical mode - add r3=1f-.reloc_entry, r2 - movl r16 = IA64_PSR_AC|IA64_PSR_BN|IA64_PSR_IC - mov ar.rsc=0 // put RSE in enforced lazy mode - ;; - add sp=(memory_stack_end - 16 - .reloc_entry),r2 - add r8=(register_stack - .reloc_entry),r2 - ;; - mov r18=ar.rnat - mov ar.bspstore=r8 - ;; - mov cr.ipsr=r16 - mov cr.iip=r3 - mov cr.ifs=r0 - srlz.i - ;; - mov ar.rnat=r18 - rfi // note: this unmask MCA/INIT (psr.mc) - ;; -1: - //physical mode code begin - mov b6=in1 - dep r28=0,in2,61,3 //to physical address - - // purge all TC entries -#define O(member) IA64_CPUINFO_##member##_OFFSET - GET_THIS_PADDR(r2, ia64_cpu_info) // load phys addr of cpu_info into r2 - ;; - addl r17=O(PTCE_STRIDE),r2 - addl r2=O(PTCE_BASE),r2 - ;; - ld8 r18=[r2],(O(PTCE_COUNT)-O(PTCE_BASE));; // r18=ptce_base - ld4 r19=[r2],4 // r19=ptce_count[0] - ld4 r21=[r17],4 // r21=ptce_stride[0] - ;; - ld4 r20=[r2] // r20=ptce_count[1] - ld4 r22=[r17] // r22=ptce_stride[1] - mov r24=r0 - ;; - adds r20=-1,r20 - ;; -#undef O -2: - cmp.ltu p6,p7=r24,r19 -(p7) br.cond.dpnt.few 4f - mov ar.lc=r20 -3: - ptc.e r18 - ;; - add r18=r22,r18 - br.cloop.sptk.few 3b - ;; - add r18=r21,r18 - add r24=1,r24 - ;; - br.sptk.few 2b -4: - srlz.i - ;; - // purge TR entry for kernel text and data - movl r16=KERNEL_START - mov r18=KERNEL_TR_PAGE_SHIFT<<2 - ;; - ptr.i r16, r18 - ptr.d r16, r18 - ;; - srlz.i - ;; - - // purge TR entry for pal code - mov r16=in3 - mov r18=IA64_GRANULE_SHIFT<<2 - ;; - ptr.i r16,r18 - ;; - srlz.i - ;; - - // purge TR entry for stack - mov r16=IA64_KR(CURRENT_STACK) - ;; - shl r16=r16,IA64_GRANULE_SHIFT - movl r19=PAGE_OFFSET - ;; - add r16=r19,r16 - mov r18=IA64_GRANULE_SHIFT<<2 - ;; - ptr.d r16,r18 - ;; - srlz.i - ;; - - //copy segments - movl r16=PAGE_MASK - mov r30=in0 // in0 is page_list - br.sptk.few .dest_page - ;; -.loop: - ld8 r30=[in0], 8;; -.dest_page: - tbit.z p0, p6=r30, 0;; // 0x1 dest page -(p6) and r17=r30, r16 -(p6) br.cond.sptk.few .loop;; - - tbit.z p0, p6=r30, 1;; // 0x2 indirect page -(p6) and in0=r30, r16 -(p6) br.cond.sptk.few .loop;; - - tbit.z p0, p6=r30, 2;; // 0x4 end flag -(p6) br.cond.sptk.few .end_loop;; - - tbit.z p6, p0=r30, 3;; // 0x8 source page -(p6) br.cond.sptk.few .loop - - and r18=r30, r16 - - // simple copy page, may optimize later - movl r14=PAGE_SIZE/8 - 1;; - mov ar.lc=r14;; -1: - ld8 r14=[r18], 8;; - st8 [r17]=r14;; - fc.i r17 - add r17=8, r17 - br.ctop.sptk.few 1b - br.sptk.few .loop - ;; - -.end_loop: - sync.i // for fc.i - ;; - srlz.i - ;; - srlz.d - ;; - br.call.sptk.many b0=b6;; - -.align 32 -memory_stack: - .fill 8192, 1, 0 -memory_stack_end: -register_stack: - .fill 8192, 1, 0 -register_stack_end: -relocate_new_kernel_end: -END(relocate_new_kernel) - -.global relocate_new_kernel_size -relocate_new_kernel_size: - data8 relocate_new_kernel_end - relocate_new_kernel - -GLOBAL_ENTRY(ia64_dump_cpu_regs) - .prologue - alloc loc0=ar.pfs,1,2,0,0 - .body - mov ar.rsc=0 // put RSE in enforced lazy mode - add loc1=4*8, in0 // save r4 and r5 first - ;; -{ - flushrs // flush dirty regs to backing store - srlz.i -} - st8 [loc1]=r4, 8 - ;; - st8 [loc1]=r5, 8 - ;; - add loc1=32*8, in0 - mov r4=ar.rnat - ;; - st8 [in0]=r0, 8 // r0 - st8 [loc1]=r4, 8 // rnat - mov r5=pr - ;; - st8 [in0]=r1, 8 // r1 - st8 [loc1]=r5, 8 // pr - mov r4=b0 - ;; - st8 [in0]=r2, 8 // r2 - st8 [loc1]=r4, 8 // b0 - mov r5=b1; - ;; - st8 [in0]=r3, 24 // r3 - st8 [loc1]=r5, 8 // b1 - mov r4=b2 - ;; - st8 [in0]=r6, 8 // r6 - st8 [loc1]=r4, 8 // b2 - mov r5=b3 - ;; - st8 [in0]=r7, 8 // r7 - st8 [loc1]=r5, 8 // b3 - mov r4=b4 - ;; - st8 [in0]=r8, 8 // r8 - st8 [loc1]=r4, 8 // b4 - mov r5=b5 - ;; - st8 [in0]=r9, 8 // r9 - st8 [loc1]=r5, 8 // b5 - mov r4=b6 - ;; - st8 [in0]=r10, 8 // r10 - st8 [loc1]=r5, 8 // b6 - mov r5=b7 - ;; - st8 [in0]=r11, 8 // r11 - st8 [loc1]=r5, 8 // b7 - mov r4=b0 - ;; - st8 [in0]=r12, 8 // r12 - st8 [loc1]=r4, 8 // ip - mov r5=loc0 - ;; - st8 [in0]=r13, 8 // r13 - extr.u r5=r5, 0, 38 // ar.pfs.pfm - mov r4=r0 // user mask - ;; - st8 [in0]=r14, 8 // r14 - st8 [loc1]=r5, 8 // cfm - ;; - st8 [in0]=r15, 8 // r15 - st8 [loc1]=r4, 8 // user mask - mov r5=ar.rsc - ;; - st8 [in0]=r16, 8 // r16 - st8 [loc1]=r5, 8 // ar.rsc - mov r4=ar.bsp - ;; - st8 [in0]=r17, 8 // r17 - st8 [loc1]=r4, 8 // ar.bsp - mov r5=ar.bspstore - ;; - st8 [in0]=r18, 8 // r18 - st8 [loc1]=r5, 8 // ar.bspstore - mov r4=ar.rnat - ;; - st8 [in0]=r19, 8 // r19 - st8 [loc1]=r4, 8 // ar.rnat - mov r5=ar.ccv - ;; - st8 [in0]=r20, 8 // r20 - st8 [loc1]=r5, 8 // ar.ccv - mov r4=ar.unat - ;; - st8 [in0]=r21, 8 // r21 - st8 [loc1]=r4, 8 // ar.unat - mov r5 = ar.fpsr - ;; - st8 [in0]=r22, 8 // r22 - st8 [loc1]=r5, 8 // ar.fpsr - mov r4 = ar.unat - ;; - st8 [in0]=r23, 8 // r23 - st8 [loc1]=r4, 8 // unat - mov r5 = ar.fpsr - ;; - st8 [in0]=r24, 8 // r24 - st8 [loc1]=r5, 8 // fpsr - mov r4 = ar.pfs - ;; - st8 [in0]=r25, 8 // r25 - st8 [loc1]=r4, 8 // ar.pfs - mov r5 = ar.lc - ;; - st8 [in0]=r26, 8 // r26 - st8 [loc1]=r5, 8 // ar.lc - mov r4 = ar.ec - ;; - st8 [in0]=r27, 8 // r27 - st8 [loc1]=r4, 8 // ar.ec - mov r5 = ar.csd - ;; - st8 [in0]=r28, 8 // r28 - st8 [loc1]=r5, 8 // ar.csd - mov r4 = ar.ssd - ;; - st8 [in0]=r29, 8 // r29 - st8 [loc1]=r4, 8 // ar.ssd - ;; - st8 [in0]=r30, 8 // r30 - ;; - st8 [in0]=r31, 8 // r31 - mov ar.pfs=loc0 - ;; - br.ret.sptk.many rp -END(ia64_dump_cpu_regs) diff --git a/arch/ia64/kernel/sal.c b/arch/ia64/kernel/sal.c deleted file mode 100644 index e4f0705c0282..000000000000 --- a/arch/ia64/kernel/sal.c +++ /dev/null @@ -1,400 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * System Abstraction Layer (SAL) interface routines. - * - * Copyright (C) 1998, 1999, 2001, 2003 Hewlett-Packard Co - * David Mosberger-Tang - * Copyright (C) 1999 VA Linux Systems - * Copyright (C) 1999 Walt Drummond - */ - -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include - - __cacheline_aligned DEFINE_SPINLOCK(sal_lock); -unsigned long sal_platform_features; - -unsigned short sal_revision; -unsigned short sal_version; - -#define SAL_MAJOR(x) ((x) >> 8) -#define SAL_MINOR(x) ((x) & 0xff) - -static struct { - void *addr; /* function entry point */ - void *gpval; /* gp value to use */ -} pdesc; - -static long -default_handler (void) -{ - return -1; -} - -ia64_sal_handler ia64_sal = (ia64_sal_handler) default_handler; -ia64_sal_desc_ptc_t *ia64_ptc_domain_info; - -const char * -ia64_sal_strerror (long status) -{ - const char *str; - switch (status) { - case 0: str = "Call completed without error"; break; - case 1: str = "Effect a warm boot of the system to complete " - "the update"; break; - case -1: str = "Not implemented"; break; - case -2: str = "Invalid argument"; break; - case -3: str = "Call completed with error"; break; - case -4: str = "Virtual address not registered"; break; - case -5: str = "No information available"; break; - case -6: str = "Insufficient space to add the entry"; break; - case -7: str = "Invalid entry_addr value"; break; - case -8: str = "Invalid interrupt vector"; break; - case -9: str = "Requested memory not available"; break; - case -10: str = "Unable to write to the NVM device"; break; - case -11: str = "Invalid partition type specified"; break; - case -12: str = "Invalid NVM_Object id specified"; break; - case -13: str = "NVM_Object already has the maximum number " - "of partitions"; break; - case -14: str = "Insufficient space in partition for the " - "requested write sub-function"; break; - case -15: str = "Insufficient data buffer space for the " - "requested read record sub-function"; break; - case -16: str = "Scratch buffer required for the write/delete " - "sub-function"; break; - case -17: str = "Insufficient space in the NVM_Object for the " - "requested create sub-function"; break; - case -18: str = "Invalid value specified in the partition_rec " - "argument"; break; - case -19: str = "Record oriented I/O not supported for this " - "partition"; break; - case -20: str = "Bad format of record to be written or " - "required keyword variable not " - "specified"; break; - default: str = "Unknown SAL status code"; break; - } - return str; -} - -void __init -ia64_sal_handler_init (void *entry_point, void *gpval) -{ - /* fill in the SAL procedure descriptor and point ia64_sal to it: */ - pdesc.addr = entry_point; - pdesc.gpval = gpval; - ia64_sal = (ia64_sal_handler) &pdesc; -} - -static void __init -check_versions (struct ia64_sal_systab *systab) -{ - sal_revision = (systab->sal_rev_major << 8) | systab->sal_rev_minor; - sal_version = (systab->sal_b_rev_major << 8) | systab->sal_b_rev_minor; - - /* Check for broken firmware */ - if ((sal_revision == SAL_VERSION_CODE(49, 29)) - && (sal_version == SAL_VERSION_CODE(49, 29))) - { - /* - * Old firmware for zx2000 prototypes have this weird version number, - * reset it to something sane. - */ - sal_revision = SAL_VERSION_CODE(2, 8); - sal_version = SAL_VERSION_CODE(0, 0); - } -} - -static void __init -sal_desc_entry_point (void *p) -{ - struct ia64_sal_desc_entry_point *ep = p; - ia64_pal_handler_init(__va(ep->pal_proc)); - ia64_sal_handler_init(__va(ep->sal_proc), __va(ep->gp)); -} - -#ifdef CONFIG_SMP -static void __init -set_smp_redirect (int flag) -{ -#ifndef CONFIG_HOTPLUG_CPU - if (no_int_routing) - smp_int_redirect &= ~flag; - else - smp_int_redirect |= flag; -#else - /* - * For CPU Hotplug we dont want to do any chipset supported - * interrupt redirection. The reason is this would require that - * All interrupts be stopped and hard bind the irq to a cpu. - * Later when the interrupt is fired we need to set the redir hint - * on again in the vector. This is cumbersome for something that the - * user mode irq balancer will solve anyways. - */ - no_int_routing=1; - smp_int_redirect &= ~flag; -#endif -} -#else -#define set_smp_redirect(flag) do { } while (0) -#endif - -static void __init -sal_desc_platform_feature (void *p) -{ - struct ia64_sal_desc_platform_feature *pf = p; - sal_platform_features = pf->feature_mask; - - printk(KERN_INFO "SAL Platform features:"); - if (!sal_platform_features) { - printk(" None\n"); - return; - } - - if (sal_platform_features & IA64_SAL_PLATFORM_FEATURE_BUS_LOCK) - printk(" BusLock"); - if (sal_platform_features & IA64_SAL_PLATFORM_FEATURE_IRQ_REDIR_HINT) { - printk(" IRQ_Redirection"); - set_smp_redirect(SMP_IRQ_REDIRECTION); - } - if (sal_platform_features & IA64_SAL_PLATFORM_FEATURE_IPI_REDIR_HINT) { - printk(" IPI_Redirection"); - set_smp_redirect(SMP_IPI_REDIRECTION); - } - if (sal_platform_features & IA64_SAL_PLATFORM_FEATURE_ITC_DRIFT) - printk(" ITC_Drift"); - printk("\n"); -} - -#ifdef CONFIG_SMP -static void __init -sal_desc_ap_wakeup (void *p) -{ - struct ia64_sal_desc_ap_wakeup *ap = p; - - switch (ap->mechanism) { - case IA64_SAL_AP_EXTERNAL_INT: - ap_wakeup_vector = ap->vector; - printk(KERN_INFO "SAL: AP wakeup using external interrupt " - "vector 0x%lx\n", ap_wakeup_vector); - break; - default: - printk(KERN_ERR "SAL: AP wakeup mechanism unsupported!\n"); - break; - } -} - -static void __init -chk_nointroute_opt(void) -{ - char *cp; - - for (cp = boot_command_line; *cp; ) { - if (memcmp(cp, "nointroute", 10) == 0) { - no_int_routing = 1; - printk ("no_int_routing on\n"); - break; - } else { - while (*cp != ' ' && *cp) - ++cp; - while (*cp == ' ') - ++cp; - } - } -} - -#else -static void __init sal_desc_ap_wakeup(void *p) { } -#endif - -/* - * HP rx5670 firmware polls for interrupts during SAL_CACHE_FLUSH by reading - * cr.ivr, but it never writes cr.eoi. This leaves any interrupt marked as - * "in-service" and masks other interrupts of equal or lower priority. - * - * HP internal defect reports: F1859, F2775, F3031. - */ -static int sal_cache_flush_drops_interrupts; - -static int __init -force_pal_cache_flush(char *str) -{ - sal_cache_flush_drops_interrupts = 1; - return 0; -} -early_param("force_pal_cache_flush", force_pal_cache_flush); - -void __init -check_sal_cache_flush (void) -{ - unsigned long flags; - int cpu; - u64 vector, cache_type = 3; - struct ia64_sal_retval isrv; - - if (sal_cache_flush_drops_interrupts) - return; - - cpu = get_cpu(); - local_irq_save(flags); - - /* - * Send ourselves a timer interrupt, wait until it's reported, and see - * if SAL_CACHE_FLUSH drops it. - */ - ia64_send_ipi(cpu, IA64_TIMER_VECTOR, IA64_IPI_DM_INT, 0); - - while (!ia64_get_irr(IA64_TIMER_VECTOR)) - cpu_relax(); - - SAL_CALL(isrv, SAL_CACHE_FLUSH, cache_type, 0, 0, 0, 0, 0, 0); - - if (isrv.status) - printk(KERN_ERR "SAL_CAL_FLUSH failed with %ld\n", isrv.status); - - if (ia64_get_irr(IA64_TIMER_VECTOR)) { - vector = ia64_get_ivr(); - ia64_eoi(); - WARN_ON(vector != IA64_TIMER_VECTOR); - } else { - sal_cache_flush_drops_interrupts = 1; - printk(KERN_ERR "SAL: SAL_CACHE_FLUSH drops interrupts; " - "PAL_CACHE_FLUSH will be used instead\n"); - ia64_eoi(); - } - - local_irq_restore(flags); - put_cpu(); -} - -s64 -ia64_sal_cache_flush (u64 cache_type) -{ - struct ia64_sal_retval isrv; - - if (sal_cache_flush_drops_interrupts) { - unsigned long flags; - u64 progress; - s64 rc; - - progress = 0; - local_irq_save(flags); - rc = ia64_pal_cache_flush(cache_type, - PAL_CACHE_FLUSH_INVALIDATE, &progress, NULL); - local_irq_restore(flags); - return rc; - } - - SAL_CALL(isrv, SAL_CACHE_FLUSH, cache_type, 0, 0, 0, 0, 0, 0); - return isrv.status; -} -EXPORT_SYMBOL_GPL(ia64_sal_cache_flush); - -void __init -ia64_sal_init (struct ia64_sal_systab *systab) -{ - char *p; - int i; - - if (!systab) { - printk(KERN_WARNING "Hmm, no SAL System Table.\n"); - return; - } - - if (strncmp(systab->signature, "SST_", 4) != 0) - printk(KERN_ERR "bad signature in system table!"); - - check_versions(systab); -#ifdef CONFIG_SMP - chk_nointroute_opt(); -#endif - - /* revisions are coded in BCD, so %x does the job for us */ - printk(KERN_INFO "SAL %x.%x: %.32s %.32s%sversion %x.%x\n", - SAL_MAJOR(sal_revision), SAL_MINOR(sal_revision), - systab->oem_id, systab->product_id, - systab->product_id[0] ? " " : "", - SAL_MAJOR(sal_version), SAL_MINOR(sal_version)); - - p = (char *) (systab + 1); - for (i = 0; i < systab->entry_count; i++) { - /* - * The first byte of each entry type contains the type - * descriptor. - */ - switch (*p) { - case SAL_DESC_ENTRY_POINT: - sal_desc_entry_point(p); - break; - case SAL_DESC_PLATFORM_FEATURE: - sal_desc_platform_feature(p); - break; - case SAL_DESC_PTC: - ia64_ptc_domain_info = (ia64_sal_desc_ptc_t *)p; - break; - case SAL_DESC_AP_WAKEUP: - sal_desc_ap_wakeup(p); - break; - } - p += SAL_DESC_SIZE(*p); - } - -} - -int -ia64_sal_oemcall(struct ia64_sal_retval *isrvp, u64 oemfunc, u64 arg1, - u64 arg2, u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7) -{ - if (oemfunc < IA64_SAL_OEMFUNC_MIN || oemfunc > IA64_SAL_OEMFUNC_MAX) - return -1; - SAL_CALL(*isrvp, oemfunc, arg1, arg2, arg3, arg4, arg5, arg6, arg7); - return 0; -} -EXPORT_SYMBOL(ia64_sal_oemcall); - -int -ia64_sal_oemcall_nolock(struct ia64_sal_retval *isrvp, u64 oemfunc, u64 arg1, - u64 arg2, u64 arg3, u64 arg4, u64 arg5, u64 arg6, - u64 arg7) -{ - if (oemfunc < IA64_SAL_OEMFUNC_MIN || oemfunc > IA64_SAL_OEMFUNC_MAX) - return -1; - SAL_CALL_NOLOCK(*isrvp, oemfunc, arg1, arg2, arg3, arg4, arg5, arg6, - arg7); - return 0; -} -EXPORT_SYMBOL(ia64_sal_oemcall_nolock); - -int -ia64_sal_oemcall_reentrant(struct ia64_sal_retval *isrvp, u64 oemfunc, - u64 arg1, u64 arg2, u64 arg3, u64 arg4, u64 arg5, - u64 arg6, u64 arg7) -{ - if (oemfunc < IA64_SAL_OEMFUNC_MIN || oemfunc > IA64_SAL_OEMFUNC_MAX) - return -1; - SAL_CALL_REENTRANT(*isrvp, oemfunc, arg1, arg2, arg3, arg4, arg5, arg6, - arg7); - return 0; -} -EXPORT_SYMBOL(ia64_sal_oemcall_reentrant); - -long -ia64_sal_freq_base (unsigned long which, unsigned long *ticks_per_second, - unsigned long *drift_info) -{ - struct ia64_sal_retval isrv; - - SAL_CALL(isrv, SAL_FREQ_BASE, which, 0, 0, 0, 0, 0, 0); - *ticks_per_second = isrv.v0; - *drift_info = isrv.v1; - return isrv.status; -} -EXPORT_SYMBOL_GPL(ia64_sal_freq_base); diff --git a/arch/ia64/kernel/salinfo.c b/arch/ia64/kernel/salinfo.c deleted file mode 100644 index 03b632c56899..000000000000 --- a/arch/ia64/kernel/salinfo.c +++ /dev/null @@ -1,646 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * salinfo.c - * - * Creates entries in /proc/sal for various system features. - * - * Copyright (c) 2003, 2006 Silicon Graphics, Inc. All rights reserved. - * Copyright (c) 2003 Hewlett-Packard Co - * Bjorn Helgaas - * - * 10/30/2001 jbarnes@sgi.com copied much of Stephane's palinfo - * code to create this file - * Oct 23 2003 kaos@sgi.com - * Replace IPI with set_cpus_allowed() to read a record from the required cpu. - * Redesign salinfo log processing to separate interrupt and user space - * contexts. - * Cache the record across multi-block reads from user space. - * Support > 64 cpus. - * Delete module_exit and MOD_INC/DEC_COUNT, salinfo cannot be a module. - * - * Jan 28 2004 kaos@sgi.com - * Periodically check for outstanding MCA or INIT records. - * - * Dec 5 2004 kaos@sgi.com - * Standardize which records are cleared automatically. - * - * Aug 18 2005 kaos@sgi.com - * mca.c may not pass a buffer, a NULL buffer just indicates that a new - * record is available in SAL. - * Replace some NR_CPUS by cpus_online, for hotplug cpu. - * - * Jan 5 2006 kaos@sgi.com - * Handle hotplug cpus coming online. - * Handle hotplug cpus going offline while they still have outstanding records. - * Use the cpu_* macros consistently. - * Replace the counting semaphore with a mutex and a test if the cpumask is non-empty. - * Modify the locking to make the test for "work to do" an atomic operation. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -MODULE_AUTHOR("Jesse Barnes "); -MODULE_DESCRIPTION("/proc interface to IA-64 SAL features"); -MODULE_LICENSE("GPL"); - -typedef struct { - const char *name; /* name of the proc entry */ - unsigned long feature; /* feature bit */ - struct proc_dir_entry *entry; /* registered entry (removal) */ -} salinfo_entry_t; - -/* - * List {name,feature} pairs for every entry in /proc/sal/ - * that this module exports - */ -static const salinfo_entry_t salinfo_entries[]={ - { "bus_lock", IA64_SAL_PLATFORM_FEATURE_BUS_LOCK, }, - { "irq_redirection", IA64_SAL_PLATFORM_FEATURE_IRQ_REDIR_HINT, }, - { "ipi_redirection", IA64_SAL_PLATFORM_FEATURE_IPI_REDIR_HINT, }, - { "itc_drift", IA64_SAL_PLATFORM_FEATURE_ITC_DRIFT, }, -}; - -#define NR_SALINFO_ENTRIES ARRAY_SIZE(salinfo_entries) - -static char *salinfo_log_name[] = { - "mca", - "init", - "cmc", - "cpe", -}; - -static struct proc_dir_entry *salinfo_proc_entries[ - ARRAY_SIZE(salinfo_entries) + /* /proc/sal/bus_lock */ - ARRAY_SIZE(salinfo_log_name) + /* /proc/sal/{mca,...} */ - (2 * ARRAY_SIZE(salinfo_log_name)) + /* /proc/sal/mca/{event,data} */ - 1]; /* /proc/sal */ - -/* Some records we get ourselves, some are accessed as saved data in buffers - * that are owned by mca.c. - */ -struct salinfo_data_saved { - u8* buffer; - u64 size; - u64 id; - int cpu; -}; - -/* State transitions. Actions are :- - * Write "read " to the data file. - * Write "clear " to the data file. - * Write "oemdata to the data file. - * Read from the data file. - * Close the data file. - * - * Start state is NO_DATA. - * - * NO_DATA - * write "read " -> NO_DATA or LOG_RECORD. - * write "clear " -> NO_DATA or LOG_RECORD. - * write "oemdata -> return -EINVAL. - * read data -> return EOF. - * close -> unchanged. Free record areas. - * - * LOG_RECORD - * write "read " -> NO_DATA or LOG_RECORD. - * write "clear " -> NO_DATA or LOG_RECORD. - * write "oemdata -> format the oem data, goto OEMDATA. - * read data -> return the INIT/MCA/CMC/CPE record. - * close -> unchanged. Keep record areas. - * - * OEMDATA - * write "read " -> NO_DATA or LOG_RECORD. - * write "clear " -> NO_DATA or LOG_RECORD. - * write "oemdata -> format the oem data, goto OEMDATA. - * read data -> return the formatted oemdata. - * close -> unchanged. Keep record areas. - * - * Closing the data file does not change the state. This allows shell scripts - * to manipulate salinfo data, each shell redirection opens the file, does one - * action then closes it again. The record areas are only freed at close when - * the state is NO_DATA. - */ -enum salinfo_state { - STATE_NO_DATA, - STATE_LOG_RECORD, - STATE_OEMDATA, -}; - -struct salinfo_data { - cpumask_t cpu_event; /* which cpus have outstanding events */ - wait_queue_head_t read_wait; - u8 *log_buffer; - u64 log_size; - u8 *oemdata; /* decoded oem data */ - u64 oemdata_size; - int open; /* single-open to prevent races */ - u8 type; - u8 saved_num; /* using a saved record? */ - enum salinfo_state state :8; /* processing state */ - u8 padding; - int cpu_check; /* next CPU to check */ - struct salinfo_data_saved data_saved[5];/* save last 5 records from mca.c, must be < 255 */ -}; - -static struct salinfo_data salinfo_data[ARRAY_SIZE(salinfo_log_name)]; - -static DEFINE_SPINLOCK(data_lock); -static DEFINE_SPINLOCK(data_saved_lock); - -/** salinfo_platform_oemdata - optional callback to decode oemdata from an error - * record. - * @sect_header: pointer to the start of the section to decode. - * @oemdata: returns vmalloc area containing the decoded output. - * @oemdata_size: returns length of decoded output (strlen). - * - * Description: If user space asks for oem data to be decoded by the kernel - * and/or prom and the platform has set salinfo_platform_oemdata to the address - * of a platform specific routine then call that routine. salinfo_platform_oemdata - * vmalloc's and formats its output area, returning the address of the text - * and its strlen. Returns 0 for success, -ve for error. The callback is - * invoked on the cpu that generated the error record. - */ -int (*salinfo_platform_oemdata)(const u8 *sect_header, u8 **oemdata, u64 *oemdata_size); - -struct salinfo_platform_oemdata_parms { - const u8 *efi_guid; - u8 **oemdata; - u64 *oemdata_size; -}; - -static long -salinfo_platform_oemdata_cpu(void *context) -{ - struct salinfo_platform_oemdata_parms *parms = context; - - return salinfo_platform_oemdata(parms->efi_guid, parms->oemdata, parms->oemdata_size); -} - -static void -shift1_data_saved (struct salinfo_data *data, int shift) -{ - memcpy(data->data_saved+shift, data->data_saved+shift+1, - (ARRAY_SIZE(data->data_saved) - (shift+1)) * sizeof(data->data_saved[0])); - memset(data->data_saved + ARRAY_SIZE(data->data_saved) - 1, 0, - sizeof(data->data_saved[0])); -} - -/* This routine is invoked in interrupt context. Note: mca.c enables - * interrupts before calling this code for CMC/CPE. MCA and INIT events are - * not irq safe, do not call any routines that use spinlocks, they may deadlock. - * MCA and INIT records are recorded, a timer event will look for any - * outstanding events and wake up the user space code. - * - * The buffer passed from mca.c points to the output from ia64_log_get. This is - * a persistent buffer but its contents can change between the interrupt and - * when user space processes the record. Save the record id to identify - * changes. If the buffer is NULL then just update the bitmap. - */ -void -salinfo_log_wakeup(int type, u8 *buffer, u64 size, int irqsafe) -{ - struct salinfo_data *data = salinfo_data + type; - struct salinfo_data_saved *data_saved; - unsigned long flags = 0; - int i; - int saved_size = ARRAY_SIZE(data->data_saved); - - BUG_ON(type >= ARRAY_SIZE(salinfo_log_name)); - - if (irqsafe) - spin_lock_irqsave(&data_saved_lock, flags); - if (buffer) { - for (i = 0, data_saved = data->data_saved; i < saved_size; ++i, ++data_saved) { - if (!data_saved->buffer) - break; - } - if (i == saved_size) { - if (!data->saved_num) { - shift1_data_saved(data, 0); - data_saved = data->data_saved + saved_size - 1; - } else - data_saved = NULL; - } - if (data_saved) { - data_saved->cpu = smp_processor_id(); - data_saved->id = ((sal_log_record_header_t *)buffer)->id; - data_saved->size = size; - data_saved->buffer = buffer; - } - } - cpumask_set_cpu(smp_processor_id(), &data->cpu_event); - if (irqsafe) { - wake_up_interruptible(&data->read_wait); - spin_unlock_irqrestore(&data_saved_lock, flags); - } -} - -/* Check for outstanding MCA/INIT records every minute (arbitrary) */ -#define SALINFO_TIMER_DELAY (60*HZ) -static struct timer_list salinfo_timer; -extern void ia64_mlogbuf_dump(void); - -static void -salinfo_timeout_check(struct salinfo_data *data) -{ - if (!data->open) - return; - if (!cpumask_empty(&data->cpu_event)) - wake_up_interruptible(&data->read_wait); -} - -static void -salinfo_timeout(struct timer_list *unused) -{ - ia64_mlogbuf_dump(); - salinfo_timeout_check(salinfo_data + SAL_INFO_TYPE_MCA); - salinfo_timeout_check(salinfo_data + SAL_INFO_TYPE_INIT); - salinfo_timer.expires = jiffies + SALINFO_TIMER_DELAY; - add_timer(&salinfo_timer); -} - -static int -salinfo_event_open(struct inode *inode, struct file *file) -{ - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - return 0; -} - -static ssize_t -salinfo_event_read(struct file *file, char __user *buffer, size_t count, loff_t *ppos) -{ - struct salinfo_data *data = pde_data(file_inode(file)); - char cmd[32]; - size_t size; - int i, n, cpu = -1; - -retry: - if (cpumask_empty(&data->cpu_event)) { - if (file->f_flags & O_NONBLOCK) - return -EAGAIN; - if (wait_event_interruptible(data->read_wait, - !cpumask_empty(&data->cpu_event))) - return -EINTR; - } - - n = data->cpu_check; - for (i = 0; i < nr_cpu_ids; i++) { - if (cpumask_test_cpu(n, &data->cpu_event)) { - if (!cpu_online(n)) { - cpumask_clear_cpu(n, &data->cpu_event); - continue; - } - cpu = n; - break; - } - if (++n == nr_cpu_ids) - n = 0; - } - - if (cpu == -1) - goto retry; - - ia64_mlogbuf_dump(); - - /* for next read, start checking at next CPU */ - data->cpu_check = cpu; - if (++data->cpu_check == nr_cpu_ids) - data->cpu_check = 0; - - snprintf(cmd, sizeof(cmd), "read %d\n", cpu); - - size = strlen(cmd); - if (size > count) - size = count; - if (copy_to_user(buffer, cmd, size)) - return -EFAULT; - - return size; -} - -static const struct proc_ops salinfo_event_proc_ops = { - .proc_open = salinfo_event_open, - .proc_read = salinfo_event_read, - .proc_lseek = noop_llseek, -}; - -static int -salinfo_log_open(struct inode *inode, struct file *file) -{ - struct salinfo_data *data = pde_data(inode); - - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - - spin_lock(&data_lock); - if (data->open) { - spin_unlock(&data_lock); - return -EBUSY; - } - data->open = 1; - spin_unlock(&data_lock); - - if (data->state == STATE_NO_DATA && - !(data->log_buffer = vmalloc(ia64_sal_get_state_info_size(data->type)))) { - data->open = 0; - return -ENOMEM; - } - - return 0; -} - -static int -salinfo_log_release(struct inode *inode, struct file *file) -{ - struct salinfo_data *data = pde_data(inode); - - if (data->state == STATE_NO_DATA) { - vfree(data->log_buffer); - vfree(data->oemdata); - data->log_buffer = NULL; - data->oemdata = NULL; - } - spin_lock(&data_lock); - data->open = 0; - spin_unlock(&data_lock); - return 0; -} - -static long -salinfo_log_read_cpu(void *context) -{ - struct salinfo_data *data = context; - sal_log_record_header_t *rh; - data->log_size = ia64_sal_get_state_info(data->type, (u64 *) data->log_buffer); - rh = (sal_log_record_header_t *)(data->log_buffer); - /* Clear corrected errors as they are read from SAL */ - if (rh->severity == sal_log_severity_corrected) - ia64_sal_clear_state_info(data->type); - return 0; -} - -static void -salinfo_log_new_read(int cpu, struct salinfo_data *data) -{ - struct salinfo_data_saved *data_saved; - unsigned long flags; - int i; - int saved_size = ARRAY_SIZE(data->data_saved); - - data->saved_num = 0; - spin_lock_irqsave(&data_saved_lock, flags); -retry: - for (i = 0, data_saved = data->data_saved; i < saved_size; ++i, ++data_saved) { - if (data_saved->buffer && data_saved->cpu == cpu) { - sal_log_record_header_t *rh = (sal_log_record_header_t *)(data_saved->buffer); - data->log_size = data_saved->size; - memcpy(data->log_buffer, rh, data->log_size); - barrier(); /* id check must not be moved */ - if (rh->id == data_saved->id) { - data->saved_num = i+1; - break; - } - /* saved record changed by mca.c since interrupt, discard it */ - shift1_data_saved(data, i); - goto retry; - } - } - spin_unlock_irqrestore(&data_saved_lock, flags); - - if (!data->saved_num) - work_on_cpu_safe(cpu, salinfo_log_read_cpu, data); - if (!data->log_size) { - data->state = STATE_NO_DATA; - cpumask_clear_cpu(cpu, &data->cpu_event); - } else { - data->state = STATE_LOG_RECORD; - } -} - -static ssize_t -salinfo_log_read(struct file *file, char __user *buffer, size_t count, loff_t *ppos) -{ - struct salinfo_data *data = pde_data(file_inode(file)); - u8 *buf; - u64 bufsize; - - if (data->state == STATE_LOG_RECORD) { - buf = data->log_buffer; - bufsize = data->log_size; - } else if (data->state == STATE_OEMDATA) { - buf = data->oemdata; - bufsize = data->oemdata_size; - } else { - buf = NULL; - bufsize = 0; - } - return simple_read_from_buffer(buffer, count, ppos, buf, bufsize); -} - -static long -salinfo_log_clear_cpu(void *context) -{ - struct salinfo_data *data = context; - - ia64_sal_clear_state_info(data->type); - return 0; -} - -static int -salinfo_log_clear(struct salinfo_data *data, int cpu) -{ - sal_log_record_header_t *rh; - unsigned long flags; - spin_lock_irqsave(&data_saved_lock, flags); - data->state = STATE_NO_DATA; - if (!cpumask_test_cpu(cpu, &data->cpu_event)) { - spin_unlock_irqrestore(&data_saved_lock, flags); - return 0; - } - cpumask_clear_cpu(cpu, &data->cpu_event); - if (data->saved_num) { - shift1_data_saved(data, data->saved_num - 1); - data->saved_num = 0; - } - spin_unlock_irqrestore(&data_saved_lock, flags); - rh = (sal_log_record_header_t *)(data->log_buffer); - /* Corrected errors have already been cleared from SAL */ - if (rh->severity != sal_log_severity_corrected) - work_on_cpu_safe(cpu, salinfo_log_clear_cpu, data); - /* clearing a record may make a new record visible */ - salinfo_log_new_read(cpu, data); - if (data->state == STATE_LOG_RECORD) { - spin_lock_irqsave(&data_saved_lock, flags); - cpumask_set_cpu(cpu, &data->cpu_event); - wake_up_interruptible(&data->read_wait); - spin_unlock_irqrestore(&data_saved_lock, flags); - } - return 0; -} - -static ssize_t -salinfo_log_write(struct file *file, const char __user *buffer, size_t count, loff_t *ppos) -{ - struct salinfo_data *data = pde_data(file_inode(file)); - char cmd[32]; - size_t size; - u32 offset; - int cpu; - - size = sizeof(cmd); - if (count < size) - size = count; - if (copy_from_user(cmd, buffer, size)) - return -EFAULT; - - if (sscanf(cmd, "read %d", &cpu) == 1) { - salinfo_log_new_read(cpu, data); - } else if (sscanf(cmd, "clear %d", &cpu) == 1) { - int ret; - if ((ret = salinfo_log_clear(data, cpu))) - count = ret; - } else if (sscanf(cmd, "oemdata %d %d", &cpu, &offset) == 2) { - if (data->state != STATE_LOG_RECORD && data->state != STATE_OEMDATA) - return -EINVAL; - if (offset > data->log_size - sizeof(efi_guid_t)) - return -EINVAL; - data->state = STATE_OEMDATA; - if (salinfo_platform_oemdata) { - struct salinfo_platform_oemdata_parms parms = { - .efi_guid = data->log_buffer + offset, - .oemdata = &data->oemdata, - .oemdata_size = &data->oemdata_size - }; - count = work_on_cpu_safe(cpu, salinfo_platform_oemdata_cpu, - &parms); - } else - data->oemdata_size = 0; - } else - return -EINVAL; - - return count; -} - -static const struct proc_ops salinfo_data_proc_ops = { - .proc_open = salinfo_log_open, - .proc_release = salinfo_log_release, - .proc_read = salinfo_log_read, - .proc_write = salinfo_log_write, - .proc_lseek = default_llseek, -}; - -static int salinfo_cpu_online(unsigned int cpu) -{ - unsigned int i, end = ARRAY_SIZE(salinfo_data); - struct salinfo_data *data; - - spin_lock_irq(&data_saved_lock); - for (i = 0, data = salinfo_data; i < end; ++i, ++data) { - cpumask_set_cpu(cpu, &data->cpu_event); - wake_up_interruptible(&data->read_wait); - } - spin_unlock_irq(&data_saved_lock); - return 0; -} - -static int salinfo_cpu_pre_down(unsigned int cpu) -{ - unsigned int i, end = ARRAY_SIZE(salinfo_data); - struct salinfo_data *data; - - spin_lock_irq(&data_saved_lock); - for (i = 0, data = salinfo_data; i < end; ++i, ++data) { - struct salinfo_data_saved *data_saved; - int j = ARRAY_SIZE(data->data_saved) - 1; - - for (data_saved = data->data_saved + j; j >= 0; - --j, --data_saved) { - if (data_saved->buffer && data_saved->cpu == cpu) - shift1_data_saved(data, j); - } - cpumask_clear_cpu(cpu, &data->cpu_event); - } - spin_unlock_irq(&data_saved_lock); - return 0; -} - -/* - * 'data' contains an integer that corresponds to the feature we're - * testing - */ -static int __maybe_unused proc_salinfo_show(struct seq_file *m, void *v) -{ - unsigned long data = (unsigned long)v; - seq_puts(m, (sal_platform_features & data) ? "1\n" : "0\n"); - return 0; -} - -static int __init -salinfo_init(void) -{ - struct proc_dir_entry *salinfo_dir; /* /proc/sal dir entry */ - struct proc_dir_entry **sdir = salinfo_proc_entries; /* keeps track of every entry */ - struct proc_dir_entry *dir, *entry; - struct salinfo_data *data; - int i; - - salinfo_dir = proc_mkdir("sal", NULL); - if (!salinfo_dir) - return 0; - - for (i=0; i < NR_SALINFO_ENTRIES; i++) { - /* pass the feature bit in question as misc data */ - *sdir++ = proc_create_single_data(salinfo_entries[i].name, 0, - salinfo_dir, proc_salinfo_show, - (void *)salinfo_entries[i].feature); - } - - for (i = 0; i < ARRAY_SIZE(salinfo_log_name); i++) { - data = salinfo_data + i; - data->type = i; - init_waitqueue_head(&data->read_wait); - dir = proc_mkdir(salinfo_log_name[i], salinfo_dir); - if (!dir) - continue; - - entry = proc_create_data("event", S_IRUSR, dir, - &salinfo_event_proc_ops, data); - if (!entry) - continue; - *sdir++ = entry; - - entry = proc_create_data("data", S_IRUSR | S_IWUSR, dir, - &salinfo_data_proc_ops, data); - if (!entry) - continue; - *sdir++ = entry; - - *sdir++ = dir; - } - - *sdir++ = salinfo_dir; - - timer_setup(&salinfo_timer, salinfo_timeout, 0); - salinfo_timer.expires = jiffies + SALINFO_TIMER_DELAY; - add_timer(&salinfo_timer); - - i = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "ia64/salinfo:online", - salinfo_cpu_online, salinfo_cpu_pre_down); - WARN_ON(i < 0); - return 0; -} - -module_init(salinfo_init); diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c deleted file mode 100644 index 5a55ac82c13a..000000000000 --- a/arch/ia64/kernel/setup.c +++ /dev/null @@ -1,1081 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Architecture-specific setup. - * - * Copyright (C) 1998-2001, 2003-2004 Hewlett-Packard Co - * David Mosberger-Tang - * Stephane Eranian - * Copyright (C) 2000, 2004 Intel Corp - * Rohit Seth - * Suresh Siddha - * Gordon Jin - * Copyright (C) 1999 VA Linux Systems - * Copyright (C) 1999 Walt Drummond - * - * 12/26/04 S.Siddha, G.Jin, R.Seth - * Add multi-threading and multi-core detection - * 11/12/01 D.Mosberger Convert get_cpuinfo() to seq_file based show_cpuinfo(). - * 04/04/00 D.Mosberger renamed cpu_initialized to cpu_online_map - * 03/31/00 R.Seth cpu_initialized and current->processor fixes - * 02/04/00 D.Mosberger some more get_cpuinfo fixes... - * 02/01/00 R.Seth fixed get_cpuinfo for SMP - * 01/07/99 S.Eranian added the support for command line argument - * 06/24/99 W.Drummond added boot_cpu_data. - * 05/28/05 Z. Menyhart Dynamic stride size for "flush_icache_range()" - */ -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#if defined(CONFIG_SMP) && (IA64_CPU_SIZE > PAGE_SIZE) -# error "struct cpuinfo_ia64 too big!" -#endif - -char ia64_platform_name[64]; - -#ifdef CONFIG_SMP -unsigned long __per_cpu_offset[NR_CPUS]; -EXPORT_SYMBOL(__per_cpu_offset); -#endif - -DEFINE_PER_CPU(struct cpuinfo_ia64, ia64_cpu_info); -EXPORT_SYMBOL(ia64_cpu_info); -DEFINE_PER_CPU(unsigned long, local_per_cpu_offset); -#ifdef CONFIG_SMP -EXPORT_SYMBOL(local_per_cpu_offset); -#endif -unsigned long ia64_cycles_per_usec; -struct ia64_boot_param *ia64_boot_param; -struct screen_info screen_info; -unsigned long vga_console_iobase; -unsigned long vga_console_membase; - -static struct resource data_resource = { - .name = "Kernel data", - .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM -}; - -static struct resource code_resource = { - .name = "Kernel code", - .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM -}; - -static struct resource bss_resource = { - .name = "Kernel bss", - .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM -}; - -unsigned long ia64_max_cacheline_size; - -unsigned long ia64_iobase; /* virtual address for I/O accesses */ -EXPORT_SYMBOL(ia64_iobase); -struct io_space io_space[MAX_IO_SPACES]; -EXPORT_SYMBOL(io_space); -unsigned int num_io_spaces; - -/* - * "flush_icache_range()" needs to know what processor dependent stride size to use - * when it makes i-cache(s) coherent with d-caches. - */ -#define I_CACHE_STRIDE_SHIFT 5 /* Safest way to go: 32 bytes by 32 bytes */ -unsigned long ia64_i_cache_stride_shift = ~0; -/* - * "clflush_cache_range()" needs to know what processor dependent stride size to - * use when it flushes cache lines including both d-cache and i-cache. - */ -/* Safest way to go: 32 bytes by 32 bytes */ -#define CACHE_STRIDE_SHIFT 5 -unsigned long ia64_cache_stride_shift = ~0; - -/* - * We use a special marker for the end of memory and it uses the extra (+1) slot - */ -struct rsvd_region rsvd_region[IA64_MAX_RSVD_REGIONS + 1] __initdata; -static int num_rsvd_regions __initdata; - - -/* - * Filter incoming memory segments based on the primitive map created from the boot - * parameters. Segments contained in the map are removed from the memory ranges. A - * caller-specified function is called with the memory ranges that remain after filtering. - * This routine does not assume the incoming segments are sorted. - */ -int __init -filter_rsvd_memory (u64 start, u64 end, void *arg) -{ - u64 range_start, range_end, prev_start; - void (*func)(unsigned long, unsigned long, int); - int i; - -#if IGNORE_PFN0 - if (start == PAGE_OFFSET) { - printk(KERN_WARNING "warning: skipping physical page 0\n"); - start += PAGE_SIZE; - if (start >= end) return 0; - } -#endif - /* - * lowest possible address(walker uses virtual) - */ - prev_start = PAGE_OFFSET; - func = arg; - - for (i = 0; i < num_rsvd_regions; ++i) { - range_start = max(start, prev_start); - range_end = min(end, rsvd_region[i].start); - - if (range_start < range_end) - call_pernode_memory(__pa(range_start), range_end - range_start, func); - - /* nothing more available in this segment */ - if (range_end == end) return 0; - - prev_start = rsvd_region[i].end; - } - /* end of memory marker allows full processing inside loop body */ - return 0; -} - -/* - * Similar to "filter_rsvd_memory()", but the reserved memory ranges - * are not filtered out. - */ -int __init -filter_memory(u64 start, u64 end, void *arg) -{ - void (*func)(unsigned long, unsigned long, int); - -#if IGNORE_PFN0 - if (start == PAGE_OFFSET) { - printk(KERN_WARNING "warning: skipping physical page 0\n"); - start += PAGE_SIZE; - if (start >= end) - return 0; - } -#endif - func = arg; - if (start < end) - call_pernode_memory(__pa(start), end - start, func); - return 0; -} - -static void __init -sort_regions (struct rsvd_region *rsvd_region, int max) -{ - int j; - - /* simple bubble sorting */ - while (max--) { - for (j = 0; j < max; ++j) { - if (rsvd_region[j].start > rsvd_region[j+1].start) { - swap(rsvd_region[j], rsvd_region[j + 1]); - } - } - } -} - -/* merge overlaps */ -static int __init -merge_regions (struct rsvd_region *rsvd_region, int max) -{ - int i; - for (i = 1; i < max; ++i) { - if (rsvd_region[i].start >= rsvd_region[i-1].end) - continue; - if (rsvd_region[i].end > rsvd_region[i-1].end) - rsvd_region[i-1].end = rsvd_region[i].end; - --max; - memmove(&rsvd_region[i], &rsvd_region[i+1], - (max - i) * sizeof(struct rsvd_region)); - } - return max; -} - -/* - * Request address space for all standard resources - */ -static int __init register_memory(void) -{ - code_resource.start = ia64_tpa(_text); - code_resource.end = ia64_tpa(_etext) - 1; - data_resource.start = ia64_tpa(_etext); - data_resource.end = ia64_tpa(_edata) - 1; - bss_resource.start = ia64_tpa(__bss_start); - bss_resource.end = ia64_tpa(_end) - 1; - efi_initialize_iomem_resources(&code_resource, &data_resource, - &bss_resource); - - return 0; -} - -__initcall(register_memory); - - -#ifdef CONFIG_KEXEC - -/* - * This function checks if the reserved crashkernel is allowed on the specific - * IA64 machine flavour. Machines without an IO TLB use swiotlb and require - * some memory below 4 GB (i.e. in 32 bit area), see the implementation of - * kernel/dma/swiotlb.c. The hpzx1 architecture has an IO TLB but cannot use that - * in kdump case. See the comment in sba_init() in sba_iommu.c. - * - * So, the only machvec that really supports loading the kdump kernel - * over 4 GB is "uv". - */ -static int __init check_crashkernel_memory(unsigned long pbase, size_t size) -{ - if (is_uv_system()) - return 1; - else - return pbase < (1UL << 32); -} - -static void __init setup_crashkernel(unsigned long total, int *n) -{ - unsigned long long base = 0, size = 0; - int ret; - - ret = parse_crashkernel(boot_command_line, total, - &size, &base); - if (ret == 0 && size > 0) { - if (!base) { - sort_regions(rsvd_region, *n); - *n = merge_regions(rsvd_region, *n); - base = kdump_find_rsvd_region(size, - rsvd_region, *n); - } - - if (!check_crashkernel_memory(base, size)) { - pr_warn("crashkernel: There would be kdump memory " - "at %ld GB but this is unusable because it " - "must\nbe below 4 GB. Change the memory " - "configuration of the machine.\n", - (unsigned long)(base >> 30)); - return; - } - - if (base != ~0UL) { - printk(KERN_INFO "Reserving %ldMB of memory at %ldMB " - "for crashkernel (System RAM: %ldMB)\n", - (unsigned long)(size >> 20), - (unsigned long)(base >> 20), - (unsigned long)(total >> 20)); - rsvd_region[*n].start = - (unsigned long)__va(base); - rsvd_region[*n].end = - (unsigned long)__va(base + size); - (*n)++; - crashk_res.start = base; - crashk_res.end = base + size - 1; - } - } - efi_memmap_res.start = ia64_boot_param->efi_memmap; - efi_memmap_res.end = efi_memmap_res.start + - ia64_boot_param->efi_memmap_size; - boot_param_res.start = __pa(ia64_boot_param); - boot_param_res.end = boot_param_res.start + - sizeof(*ia64_boot_param); -} -#else -static inline void __init setup_crashkernel(unsigned long total, int *n) -{} -#endif - -#ifdef CONFIG_CRASH_DUMP -static int __init reserve_elfcorehdr(u64 *start, u64 *end) -{ - u64 length; - - /* We get the address using the kernel command line, - * but the size is extracted from the EFI tables. - * Both address and size are required for reservation - * to work properly. - */ - - if (!is_vmcore_usable()) - return -EINVAL; - - if ((length = vmcore_find_descriptor_size(elfcorehdr_addr)) == 0) { - vmcore_unusable(); - return -EINVAL; - } - - *start = (unsigned long)__va(elfcorehdr_addr); - *end = *start + length; - return 0; -} -#endif /* CONFIG_CRASH_DUMP */ - -/** - * reserve_memory - setup reserved memory areas - * - * Setup the reserved memory areas set aside for the boot parameters, - * initrd, etc. There are currently %IA64_MAX_RSVD_REGIONS defined, - * see arch/ia64/include/asm/meminit.h if you need to define more. - */ -void __init -reserve_memory (void) -{ - int n = 0; - unsigned long total_memory; - - /* - * none of the entries in this table overlap - */ - rsvd_region[n].start = (unsigned long) ia64_boot_param; - rsvd_region[n].end = rsvd_region[n].start + sizeof(*ia64_boot_param); - n++; - - rsvd_region[n].start = (unsigned long) __va(ia64_boot_param->efi_memmap); - rsvd_region[n].end = rsvd_region[n].start + ia64_boot_param->efi_memmap_size; - n++; - - rsvd_region[n].start = (unsigned long) __va(ia64_boot_param->command_line); - rsvd_region[n].end = (rsvd_region[n].start - + strlen(__va(ia64_boot_param->command_line)) + 1); - n++; - - rsvd_region[n].start = (unsigned long) ia64_imva((void *)KERNEL_START); - rsvd_region[n].end = (unsigned long) ia64_imva(_end); - n++; - -#ifdef CONFIG_BLK_DEV_INITRD - if (ia64_boot_param->initrd_start) { - rsvd_region[n].start = (unsigned long)__va(ia64_boot_param->initrd_start); - rsvd_region[n].end = rsvd_region[n].start + ia64_boot_param->initrd_size; - n++; - } -#endif - -#ifdef CONFIG_CRASH_DUMP - if (reserve_elfcorehdr(&rsvd_region[n].start, - &rsvd_region[n].end) == 0) - n++; -#endif - - total_memory = efi_memmap_init(&rsvd_region[n].start, &rsvd_region[n].end); - n++; - - setup_crashkernel(total_memory, &n); - - /* end of memory marker */ - rsvd_region[n].start = ~0UL; - rsvd_region[n].end = ~0UL; - n++; - - num_rsvd_regions = n; - BUG_ON(IA64_MAX_RSVD_REGIONS + 1 < n); - - sort_regions(rsvd_region, num_rsvd_regions); - num_rsvd_regions = merge_regions(rsvd_region, num_rsvd_regions); - - /* reserve all regions except the end of memory marker with memblock */ - for (n = 0; n < num_rsvd_regions - 1; n++) { - struct rsvd_region *region = &rsvd_region[n]; - phys_addr_t addr = __pa(region->start); - phys_addr_t size = region->end - region->start; - - memblock_reserve(addr, size); - } -} - -/** - * find_initrd - get initrd parameters from the boot parameter structure - * - * Grab the initrd start and end from the boot parameter struct given us by - * the boot loader. - */ -void __init -find_initrd (void) -{ -#ifdef CONFIG_BLK_DEV_INITRD - if (ia64_boot_param->initrd_start) { - initrd_start = (unsigned long)__va(ia64_boot_param->initrd_start); - initrd_end = initrd_start+ia64_boot_param->initrd_size; - - printk(KERN_INFO "Initial ramdisk at: 0x%lx (%llu bytes)\n", - initrd_start, ia64_boot_param->initrd_size); - } -#endif -} - -static void __init -io_port_init (void) -{ - unsigned long phys_iobase; - - /* - * Set `iobase' based on the EFI memory map or, failing that, the - * value firmware left in ar.k0. - * - * Note that in ia32 mode, IN/OUT instructions use ar.k0 to compute - * the port's virtual address, so ia32_load_state() loads it with a - * user virtual address. But in ia64 mode, glibc uses the - * *physical* address in ar.k0 to mmap the appropriate area from - * /dev/mem, and the inX()/outX() interfaces use MMIO. In both - * cases, user-mode can only use the legacy 0-64K I/O port space. - * - * ar.k0 is not involved in kernel I/O port accesses, which can use - * any of the I/O port spaces and are done via MMIO using the - * virtual mmio_base from the appropriate io_space[]. - */ - phys_iobase = efi_get_iobase(); - if (!phys_iobase) { - phys_iobase = ia64_get_kr(IA64_KR_IO_BASE); - printk(KERN_INFO "No I/O port range found in EFI memory map, " - "falling back to AR.KR0 (0x%lx)\n", phys_iobase); - } - ia64_iobase = (unsigned long) ioremap(phys_iobase, 0); - ia64_set_kr(IA64_KR_IO_BASE, __pa(ia64_iobase)); - - /* setup legacy IO port space */ - io_space[0].mmio_base = ia64_iobase; - io_space[0].sparse = 1; - num_io_spaces = 1; -} - -/** - * early_console_setup - setup debugging console - * - * Consoles started here require little enough setup that we can start using - * them very early in the boot process, either right after the machine - * vector initialization, or even before if the drivers can detect their hw. - * - * Returns non-zero if a console couldn't be setup. - */ -static inline int __init -early_console_setup (char *cmdline) -{ -#ifdef CONFIG_EFI_PCDP - if (!efi_setup_pcdp_console(cmdline)) - return 0; -#endif - return -1; -} - -static void __init -screen_info_setup(void) -{ - unsigned int orig_x, orig_y, num_cols, num_rows, font_height; - - memset(&screen_info, 0, sizeof(screen_info)); - - if (!ia64_boot_param->console_info.num_rows || - !ia64_boot_param->console_info.num_cols) { - printk(KERN_WARNING "invalid screen-info, guessing 80x25\n"); - orig_x = 0; - orig_y = 0; - num_cols = 80; - num_rows = 25; - font_height = 16; - } else { - orig_x = ia64_boot_param->console_info.orig_x; - orig_y = ia64_boot_param->console_info.orig_y; - num_cols = ia64_boot_param->console_info.num_cols; - num_rows = ia64_boot_param->console_info.num_rows; - font_height = 400 / num_rows; - } - - screen_info.orig_x = orig_x; - screen_info.orig_y = orig_y; - screen_info.orig_video_cols = num_cols; - screen_info.orig_video_lines = num_rows; - screen_info.orig_video_points = font_height; - screen_info.orig_video_mode = 3; /* XXX fake */ - screen_info.orig_video_isVGA = 1; /* XXX fake */ - screen_info.orig_video_ega_bx = 3; /* XXX fake */ -} - -static inline void -mark_bsp_online (void) -{ -#ifdef CONFIG_SMP - /* If we register an early console, allow CPU 0 to printk */ - set_cpu_online(smp_processor_id(), true); -#endif -} - -static __initdata int nomca; -static __init int setup_nomca(char *s) -{ - nomca = 1; - return 0; -} -early_param("nomca", setup_nomca); - -void __init -setup_arch (char **cmdline_p) -{ - unw_init(); - - ia64_patch_vtop((u64) __start___vtop_patchlist, (u64) __end___vtop_patchlist); - - *cmdline_p = __va(ia64_boot_param->command_line); - strscpy(boot_command_line, *cmdline_p, COMMAND_LINE_SIZE); - - efi_init(); - io_port_init(); - - uv_probe_system_type(); - parse_early_param(); - - if (early_console_setup(*cmdline_p) == 0) - mark_bsp_online(); - - /* Initialize the ACPI boot-time table parser */ - acpi_table_init(); - early_acpi_boot_init(); -#ifdef CONFIG_ACPI_NUMA - acpi_numa_init(); - acpi_numa_fixup(); -#ifdef CONFIG_ACPI_HOTPLUG_CPU - prefill_possible_map(); -#endif - per_cpu_scan_finalize((cpumask_empty(&early_cpu_possible_map) ? - 32 : cpumask_weight(&early_cpu_possible_map)), - additional_cpus > 0 ? additional_cpus : 0); -#endif /* CONFIG_ACPI_NUMA */ - -#ifdef CONFIG_SMP - smp_build_cpu_map(); -#endif - find_memory(); - - /* process SAL system table: */ - ia64_sal_init(__va(sal_systab_phys)); - -#ifdef CONFIG_ITANIUM - ia64_patch_rse((u64) __start___rse_patchlist, (u64) __end___rse_patchlist); -#else - { - unsigned long num_phys_stacked; - - if (ia64_pal_rse_info(&num_phys_stacked, 0) == 0 && num_phys_stacked > 96) - ia64_patch_rse((u64) __start___rse_patchlist, (u64) __end___rse_patchlist); - } -#endif - -#ifdef CONFIG_SMP - cpu_physical_id(0) = hard_smp_processor_id(); -#endif - - cpu_init(); /* initialize the bootstrap CPU */ - mmu_context_init(); /* initialize context_id bitmap */ - -#ifdef CONFIG_VT - if (!conswitchp) { -# if defined(CONFIG_VGA_CONSOLE) - /* - * Non-legacy systems may route legacy VGA MMIO range to system - * memory. vga_con probes the MMIO hole, so memory looks like - * a VGA device to it. The EFI memory map can tell us if it's - * memory so we can avoid this problem. - */ - if (efi_mem_type(0xA0000) != EFI_CONVENTIONAL_MEMORY) - conswitchp = &vga_con; -# endif - } -#endif - - /* enable IA-64 Machine Check Abort Handling unless disabled */ - if (!nomca) - ia64_mca_init(); - - /* - * Default to /dev/sda2. This assumes that the EFI partition - * is physical disk 1 partition 1 and the Linux root disk is - * physical disk 1 partition 2. - */ - ROOT_DEV = MKDEV(SCSI_DISK0_MAJOR, 2); - - if (is_uv_system()) - uv_setup(cmdline_p); -#ifdef CONFIG_SMP - else - init_smp_config(); -#endif - - screen_info_setup(); - paging_init(); - - clear_sched_clock_stable(); -} - -/* - * Display cpu info for all CPUs. - */ -static int -show_cpuinfo (struct seq_file *m, void *v) -{ -#ifdef CONFIG_SMP -# define lpj c->loops_per_jiffy -# define cpunum c->cpu -#else -# define lpj loops_per_jiffy -# define cpunum 0 -#endif - static struct { - unsigned long mask; - const char *feature_name; - } feature_bits[] = { - { 1UL << 0, "branchlong" }, - { 1UL << 1, "spontaneous deferral"}, - { 1UL << 2, "16-byte atomic ops" } - }; - char features[128], *cp, *sep; - struct cpuinfo_ia64 *c = v; - unsigned long mask; - unsigned long proc_freq; - int i, size; - - mask = c->features; - - /* build the feature string: */ - memcpy(features, "standard", 9); - cp = features; - size = sizeof(features); - sep = ""; - for (i = 0; i < ARRAY_SIZE(feature_bits) && size > 1; ++i) { - if (mask & feature_bits[i].mask) { - cp += snprintf(cp, size, "%s%s", sep, - feature_bits[i].feature_name), - sep = ", "; - mask &= ~feature_bits[i].mask; - size = sizeof(features) - (cp - features); - } - } - if (mask && size > 1) { - /* print unknown features as a hex value */ - snprintf(cp, size, "%s0x%lx", sep, mask); - } - - proc_freq = cpufreq_quick_get(cpunum); - if (!proc_freq) - proc_freq = c->proc_freq / 1000; - - seq_printf(m, - "processor : %d\n" - "vendor : %s\n" - "arch : IA-64\n" - "family : %u\n" - "model : %u\n" - "model name : %s\n" - "revision : %u\n" - "archrev : %u\n" - "features : %s\n" - "cpu number : %lu\n" - "cpu regs : %u\n" - "cpu MHz : %lu.%03lu\n" - "itc MHz : %lu.%06lu\n" - "BogoMIPS : %lu.%02lu\n", - cpunum, c->vendor, c->family, c->model, - c->model_name, c->revision, c->archrev, - features, c->ppn, c->number, - proc_freq / 1000, proc_freq % 1000, - c->itc_freq / 1000000, c->itc_freq % 1000000, - lpj*HZ/500000, (lpj*HZ/5000) % 100); -#ifdef CONFIG_SMP - seq_printf(m, "siblings : %u\n", - cpumask_weight(&cpu_core_map[cpunum])); - if (c->socket_id != -1) - seq_printf(m, "physical id: %u\n", c->socket_id); - if (c->threads_per_core > 1 || c->cores_per_socket > 1) - seq_printf(m, - "core id : %u\n" - "thread id : %u\n", - c->core_id, c->thread_id); -#endif - seq_printf(m,"\n"); - - return 0; -} - -static void * -c_start (struct seq_file *m, loff_t *pos) -{ -#ifdef CONFIG_SMP - while (*pos < nr_cpu_ids && !cpu_online(*pos)) - ++*pos; -#endif - return *pos < nr_cpu_ids ? cpu_data(*pos) : NULL; -} - -static void * -c_next (struct seq_file *m, void *v, loff_t *pos) -{ - ++*pos; - return c_start(m, pos); -} - -static void -c_stop (struct seq_file *m, void *v) -{ -} - -const struct seq_operations cpuinfo_op = { - .start = c_start, - .next = c_next, - .stop = c_stop, - .show = show_cpuinfo -}; - -#define MAX_BRANDS 8 -static char brandname[MAX_BRANDS][128]; - -static char * -get_model_name(__u8 family, __u8 model) -{ - static int overflow; - char brand[128]; - int i; - - memcpy(brand, "Unknown", 8); - if (ia64_pal_get_brand_info(brand)) { - if (family == 0x7) - memcpy(brand, "Merced", 7); - else if (family == 0x1f) switch (model) { - case 0: memcpy(brand, "McKinley", 9); break; - case 1: memcpy(brand, "Madison", 8); break; - case 2: memcpy(brand, "Madison up to 9M cache", 23); break; - } - } - for (i = 0; i < MAX_BRANDS; i++) - if (strcmp(brandname[i], brand) == 0) - return brandname[i]; - for (i = 0; i < MAX_BRANDS; i++) - if (brandname[i][0] == '\0') - return strcpy(brandname[i], brand); - if (overflow++ == 0) - printk(KERN_ERR - "%s: Table overflow. Some processor model information will be missing\n", - __func__); - return "Unknown"; -} - -static void -identify_cpu (struct cpuinfo_ia64 *c) -{ - union { - unsigned long bits[5]; - struct { - /* id 0 & 1: */ - char vendor[16]; - - /* id 2 */ - u64 ppn; /* processor serial number */ - - /* id 3: */ - unsigned number : 8; - unsigned revision : 8; - unsigned model : 8; - unsigned family : 8; - unsigned archrev : 8; - unsigned reserved : 24; - - /* id 4: */ - u64 features; - } field; - } cpuid; - pal_vm_info_1_u_t vm1; - pal_vm_info_2_u_t vm2; - pal_status_t status; - unsigned long impl_va_msb = 50, phys_addr_size = 44; /* Itanium defaults */ - int i; - for (i = 0; i < 5; ++i) - cpuid.bits[i] = ia64_get_cpuid(i); - - memcpy(c->vendor, cpuid.field.vendor, 16); -#ifdef CONFIG_SMP - c->cpu = smp_processor_id(); - - /* below default values will be overwritten by identify_siblings() - * for Multi-Threading/Multi-Core capable CPUs - */ - c->threads_per_core = c->cores_per_socket = c->num_log = 1; - c->socket_id = -1; - - identify_siblings(c); - - if (c->threads_per_core > smp_num_siblings) - smp_num_siblings = c->threads_per_core; -#endif - c->ppn = cpuid.field.ppn; - c->number = cpuid.field.number; - c->revision = cpuid.field.revision; - c->model = cpuid.field.model; - c->family = cpuid.field.family; - c->archrev = cpuid.field.archrev; - c->features = cpuid.field.features; - c->model_name = get_model_name(c->family, c->model); - - status = ia64_pal_vm_summary(&vm1, &vm2); - if (status == PAL_STATUS_SUCCESS) { - impl_va_msb = vm2.pal_vm_info_2_s.impl_va_msb; - phys_addr_size = vm1.pal_vm_info_1_s.phys_add_size; - } - c->unimpl_va_mask = ~((7L<<61) | ((1L << (impl_va_msb + 1)) - 1)); - c->unimpl_pa_mask = ~((1L<<63) | ((1L << phys_addr_size) - 1)); -} - -/* - * Do the following calculations: - * - * 1. the max. cache line size. - * 2. the minimum of the i-cache stride sizes for "flush_icache_range()". - * 3. the minimum of the cache stride sizes for "clflush_cache_range()". - */ -static void -get_cache_info(void) -{ - unsigned long line_size, max = 1; - unsigned long l, levels, unique_caches; - pal_cache_config_info_t cci; - long status; - - status = ia64_pal_cache_summary(&levels, &unique_caches); - if (status != 0) { - printk(KERN_ERR "%s: ia64_pal_cache_summary() failed (status=%ld)\n", - __func__, status); - max = SMP_CACHE_BYTES; - /* Safest setup for "flush_icache_range()" */ - ia64_i_cache_stride_shift = I_CACHE_STRIDE_SHIFT; - /* Safest setup for "clflush_cache_range()" */ - ia64_cache_stride_shift = CACHE_STRIDE_SHIFT; - goto out; - } - - for (l = 0; l < levels; ++l) { - /* cache_type (data_or_unified)=2 */ - status = ia64_pal_cache_config_info(l, 2, &cci); - if (status != 0) { - printk(KERN_ERR "%s: ia64_pal_cache_config_info" - "(l=%lu, 2) failed (status=%ld)\n", - __func__, l, status); - max = SMP_CACHE_BYTES; - /* The safest setup for "flush_icache_range()" */ - cci.pcci_stride = I_CACHE_STRIDE_SHIFT; - /* The safest setup for "clflush_cache_range()" */ - ia64_cache_stride_shift = CACHE_STRIDE_SHIFT; - cci.pcci_unified = 1; - } else { - if (cci.pcci_stride < ia64_cache_stride_shift) - ia64_cache_stride_shift = cci.pcci_stride; - - line_size = 1 << cci.pcci_line_size; - if (line_size > max) - max = line_size; - } - - if (!cci.pcci_unified) { - /* cache_type (instruction)=1*/ - status = ia64_pal_cache_config_info(l, 1, &cci); - if (status != 0) { - printk(KERN_ERR "%s: ia64_pal_cache_config_info" - "(l=%lu, 1) failed (status=%ld)\n", - __func__, l, status); - /* The safest setup for flush_icache_range() */ - cci.pcci_stride = I_CACHE_STRIDE_SHIFT; - } - } - if (cci.pcci_stride < ia64_i_cache_stride_shift) - ia64_i_cache_stride_shift = cci.pcci_stride; - } - out: - if (max > ia64_max_cacheline_size) - ia64_max_cacheline_size = max; -} - -/* - * cpu_init() initializes state that is per-CPU. This function acts - * as a 'CPU state barrier', nothing should get across. - */ -void -cpu_init (void) -{ - extern void ia64_mmu_init(void *); - static unsigned long max_num_phys_stacked = IA64_NUM_PHYS_STACK_REG; - unsigned long num_phys_stacked; - pal_vm_info_2_u_t vmi; - unsigned int max_ctx; - struct cpuinfo_ia64 *cpu_info; - void *cpu_data; - - cpu_data = per_cpu_init(); -#ifdef CONFIG_SMP - /* - * insert boot cpu into sibling and core mapes - * (must be done after per_cpu area is setup) - */ - if (smp_processor_id() == 0) { - cpumask_set_cpu(0, &per_cpu(cpu_sibling_map, 0)); - cpumask_set_cpu(0, &cpu_core_map[0]); - } else { - /* - * Set ar.k3 so that assembly code in MCA handler can compute - * physical addresses of per cpu variables with a simple: - * phys = ar.k3 + &per_cpu_var - * and the alt-dtlb-miss handler can set per-cpu mapping into - * the TLB when needed. head.S already did this for cpu0. - */ - ia64_set_kr(IA64_KR_PER_CPU_DATA, - ia64_tpa(cpu_data) - (long) __per_cpu_start); - } -#endif - - get_cache_info(); - - /* - * We can't pass "local_cpu_data" to identify_cpu() because we haven't called - * ia64_mmu_init() yet. And we can't call ia64_mmu_init() first because it - * depends on the data returned by identify_cpu(). We break the dependency by - * accessing cpu_data() through the canonical per-CPU address. - */ - cpu_info = cpu_data + ((char *) &__ia64_per_cpu_var(ia64_cpu_info) - __per_cpu_start); - identify_cpu(cpu_info); - -#ifdef CONFIG_MCKINLEY - { -# define FEATURE_SET 16 - struct ia64_pal_retval iprv; - - if (cpu_info->family == 0x1f) { - PAL_CALL_PHYS(iprv, PAL_PROC_GET_FEATURES, 0, FEATURE_SET, 0); - if ((iprv.status == 0) && (iprv.v0 & 0x80) && (iprv.v2 & 0x80)) - PAL_CALL_PHYS(iprv, PAL_PROC_SET_FEATURES, - (iprv.v1 | 0x80), FEATURE_SET, 0); - } - } -#endif - - /* Clear the stack memory reserved for pt_regs: */ - memset(task_pt_regs(current), 0, sizeof(struct pt_regs)); - - ia64_set_kr(IA64_KR_FPU_OWNER, 0); - - /* - * Initialize the page-table base register to a global - * directory with all zeroes. This ensure that we can handle - * TLB-misses to user address-space even before we created the - * first user address-space. This may happen, e.g., due to - * aggressive use of lfetch.fault. - */ - ia64_set_kr(IA64_KR_PT_BASE, __pa(ia64_imva(empty_zero_page))); - - /* - * Initialize default control register to defer speculative faults except - * for those arising from TLB misses, which are not deferred. The - * kernel MUST NOT depend on a particular setting of these bits (in other words, - * the kernel must have recovery code for all speculative accesses). Turn on - * dcr.lc as per recommendation by the architecture team. Most IA-32 apps - * shouldn't be affected by this (moral: keep your ia32 locks aligned and you'll - * be fine). - */ - ia64_setreg(_IA64_REG_CR_DCR, ( IA64_DCR_DP | IA64_DCR_DK | IA64_DCR_DX | IA64_DCR_DR - | IA64_DCR_DA | IA64_DCR_DD | IA64_DCR_LC)); - mmgrab(&init_mm); - current->active_mm = &init_mm; - BUG_ON(current->mm); - - ia64_mmu_init(ia64_imva(cpu_data)); - ia64_mca_cpu_init(ia64_imva(cpu_data)); - - /* Clear ITC to eliminate sched_clock() overflows in human time. */ - ia64_set_itc(0); - - /* disable all local interrupt sources: */ - ia64_set_itv(1 << 16); - ia64_set_lrr0(1 << 16); - ia64_set_lrr1(1 << 16); - ia64_setreg(_IA64_REG_CR_PMV, 1 << 16); - ia64_setreg(_IA64_REG_CR_CMCV, 1 << 16); - - /* clear TPR & XTP to enable all interrupt classes: */ - ia64_setreg(_IA64_REG_CR_TPR, 0); - - /* Clear any pending interrupts left by SAL/EFI */ - while (ia64_get_ivr() != IA64_SPURIOUS_INT_VECTOR) - ia64_eoi(); - -#ifdef CONFIG_SMP - normal_xtp(); -#endif - - /* set ia64_ctx.max_rid to the maximum RID that is supported by all CPUs: */ - if (ia64_pal_vm_summary(NULL, &vmi) == 0) { - max_ctx = (1U << (vmi.pal_vm_info_2_s.rid_size - 3)) - 1; - setup_ptcg_sem(vmi.pal_vm_info_2_s.max_purges, NPTCG_FROM_PAL); - } else { - printk(KERN_WARNING "cpu_init: PAL VM summary failed, assuming 18 RID bits\n"); - max_ctx = (1U << 15) - 1; /* use architected minimum */ - } - while (max_ctx < ia64_ctx.max_ctx) { - unsigned int old = ia64_ctx.max_ctx; - if (cmpxchg(&ia64_ctx.max_ctx, old, max_ctx) == old) - break; - } - - if (ia64_pal_rse_info(&num_phys_stacked, NULL) != 0) { - printk(KERN_WARNING "cpu_init: PAL RSE info failed; assuming 96 physical " - "stacked regs\n"); - num_phys_stacked = 96; - } - /* size of physical stacked register partition plus 8 bytes: */ - if (num_phys_stacked > max_num_phys_stacked) { - ia64_patch_phys_stack_reg(num_phys_stacked*8 + 8); - max_num_phys_stacked = num_phys_stacked; - } -} - -void __init arch_cpu_finalize_init(void) -{ - ia64_patch_mckinley_e9((unsigned long) __start___mckinley_e9_bundles, - (unsigned long) __end___mckinley_e9_bundles); -} - -static int __init run_dmi_scan(void) -{ - dmi_setup(); - return 0; -} -core_initcall(run_dmi_scan); diff --git a/arch/ia64/kernel/sigframe.h b/arch/ia64/kernel/sigframe.h deleted file mode 100644 index 58a36ce6c26e..000000000000 --- a/arch/ia64/kernel/sigframe.h +++ /dev/null @@ -1,26 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -struct sigscratch { - unsigned long scratch_unat; /* ar.unat for the general registers saved in pt */ - unsigned long ar_pfs; /* for syscalls, the user-level function-state */ - struct pt_regs pt; -}; - -struct sigframe { - /* - * Place signal handler args where user-level unwinder can find them easily. - * DO NOT MOVE THESE. They are part of the IA-64 Linux ABI and there is - * user-level code that depends on their presence! - */ - unsigned long arg0; /* signum */ - unsigned long arg1; /* siginfo pointer */ - unsigned long arg2; /* sigcontext pointer */ - /* - * End of architected state. - */ - - void __user *handler; /* pointer to the plabel of the signal handler */ - struct siginfo info; - struct sigcontext sc; -}; - -extern void ia64_do_signal (struct sigscratch *, long); diff --git a/arch/ia64/kernel/signal.c b/arch/ia64/kernel/signal.c deleted file mode 100644 index 51cf6a7ec158..000000000000 --- a/arch/ia64/kernel/signal.c +++ /dev/null @@ -1,412 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Architecture-specific signal handling support. - * - * Copyright (C) 1999-2004 Hewlett-Packard Co - * David Mosberger-Tang - * - * Derived from i386 and Alpha versions. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -#include "sigframe.h" - -#define DEBUG_SIG 0 -#define STACK_ALIGN 16 /* minimal alignment for stack pointer */ - -#if _NSIG_WORDS > 1 -# define PUT_SIGSET(k,u) __copy_to_user((u)->sig, (k)->sig, sizeof(sigset_t)) -# define GET_SIGSET(k,u) __copy_from_user((k)->sig, (u)->sig, sizeof(sigset_t)) -#else -# define PUT_SIGSET(k,u) __put_user((k)->sig[0], &(u)->sig[0]) -# define GET_SIGSET(k,u) __get_user((k)->sig[0], &(u)->sig[0]) -#endif - -static long -restore_sigcontext (struct sigcontext __user *sc, struct sigscratch *scr) -{ - unsigned long ip, flags, nat, um, cfm, rsc; - long err; - - /* Always make any pending restarted system calls return -EINTR */ - current->restart_block.fn = do_no_restart_syscall; - - /* restore scratch that always needs gets updated during signal delivery: */ - err = __get_user(flags, &sc->sc_flags); - err |= __get_user(nat, &sc->sc_nat); - err |= __get_user(ip, &sc->sc_ip); /* instruction pointer */ - err |= __get_user(cfm, &sc->sc_cfm); - err |= __get_user(um, &sc->sc_um); /* user mask */ - err |= __get_user(rsc, &sc->sc_ar_rsc); - err |= __get_user(scr->pt.ar_unat, &sc->sc_ar_unat); - err |= __get_user(scr->pt.ar_fpsr, &sc->sc_ar_fpsr); - err |= __get_user(scr->pt.ar_pfs, &sc->sc_ar_pfs); - err |= __get_user(scr->pt.pr, &sc->sc_pr); /* predicates */ - err |= __get_user(scr->pt.b0, &sc->sc_br[0]); /* b0 (rp) */ - err |= __get_user(scr->pt.b6, &sc->sc_br[6]); /* b6 */ - err |= __copy_from_user(&scr->pt.r1, &sc->sc_gr[1], 8); /* r1 */ - err |= __copy_from_user(&scr->pt.r8, &sc->sc_gr[8], 4*8); /* r8-r11 */ - err |= __copy_from_user(&scr->pt.r12, &sc->sc_gr[12], 2*8); /* r12-r13 */ - err |= __copy_from_user(&scr->pt.r15, &sc->sc_gr[15], 8); /* r15 */ - - scr->pt.cr_ifs = cfm | (1UL << 63); - scr->pt.ar_rsc = rsc | (3 << 2); /* force PL3 */ - - /* establish new instruction pointer: */ - scr->pt.cr_iip = ip & ~0x3UL; - ia64_psr(&scr->pt)->ri = ip & 0x3; - scr->pt.cr_ipsr = (scr->pt.cr_ipsr & ~IA64_PSR_UM) | (um & IA64_PSR_UM); - - scr->scratch_unat = ia64_put_scratch_nat_bits(&scr->pt, nat); - - if (!(flags & IA64_SC_FLAG_IN_SYSCALL)) { - /* Restore most scratch-state only when not in syscall. */ - err |= __get_user(scr->pt.ar_ccv, &sc->sc_ar_ccv); /* ar.ccv */ - err |= __get_user(scr->pt.b7, &sc->sc_br[7]); /* b7 */ - err |= __get_user(scr->pt.r14, &sc->sc_gr[14]); /* r14 */ - err |= __copy_from_user(&scr->pt.ar_csd, &sc->sc_ar25, 2*8); /* ar.csd & ar.ssd */ - err |= __copy_from_user(&scr->pt.r2, &sc->sc_gr[2], 2*8); /* r2-r3 */ - err |= __copy_from_user(&scr->pt.r16, &sc->sc_gr[16], 16*8); /* r16-r31 */ - } - - if ((flags & IA64_SC_FLAG_FPH_VALID) != 0) { - struct ia64_psr *psr = ia64_psr(&scr->pt); - - err |= __copy_from_user(current->thread.fph, &sc->sc_fr[32], 96*16); - psr->mfh = 0; /* drop signal handler's fph contents... */ - preempt_disable(); - if (psr->dfh) - ia64_drop_fpu(current); - else { - /* We already own the local fph, otherwise psr->dfh wouldn't be 0. */ - __ia64_load_fpu(current->thread.fph); - ia64_set_local_fpu_owner(current); - } - preempt_enable(); - } - return err; -} - -long -ia64_rt_sigreturn (struct sigscratch *scr) -{ - extern char ia64_strace_leave_kernel, ia64_leave_kernel; - struct sigcontext __user *sc; - sigset_t set; - long retval; - - sc = &((struct sigframe __user *) (scr->pt.r12 + 16))->sc; - - /* - * When we return to the previously executing context, r8 and r10 have already - * been setup the way we want them. Indeed, if the signal wasn't delivered while - * in a system call, we must not touch r8 or r10 as otherwise user-level state - * could be corrupted. - */ - retval = (long) &ia64_leave_kernel; - if (test_thread_flag(TIF_SYSCALL_TRACE) - || test_thread_flag(TIF_SYSCALL_AUDIT)) - /* - * strace expects to be notified after sigreturn returns even though the - * context to which we return may not be in the middle of a syscall. - * Thus, the return-value that strace displays for sigreturn is - * meaningless. - */ - retval = (long) &ia64_strace_leave_kernel; - - if (!access_ok(sc, sizeof(*sc))) - goto give_sigsegv; - - if (GET_SIGSET(&set, &sc->sc_mask)) - goto give_sigsegv; - - set_current_blocked(&set); - - if (restore_sigcontext(sc, scr)) - goto give_sigsegv; - -#if DEBUG_SIG - printk("SIG return (%s:%d): sp=%lx ip=%lx\n", - current->comm, current->pid, scr->pt.r12, scr->pt.cr_iip); -#endif - if (restore_altstack(&sc->sc_stack)) - goto give_sigsegv; - return retval; - - give_sigsegv: - force_sig(SIGSEGV); - return retval; -} - -/* - * This does just the minimum required setup of sigcontext. - * Specifically, it only installs data that is either not knowable at - * the user-level or that gets modified before execution in the - * trampoline starts. Everything else is done at the user-level. - */ -static long -setup_sigcontext (struct sigcontext __user *sc, sigset_t *mask, struct sigscratch *scr) -{ - unsigned long flags = 0, ifs, cfm, nat; - long err = 0; - - ifs = scr->pt.cr_ifs; - - if (on_sig_stack((unsigned long) sc)) - flags |= IA64_SC_FLAG_ONSTACK; - if ((ifs & (1UL << 63)) == 0) - /* if cr_ifs doesn't have the valid bit set, we got here through a syscall */ - flags |= IA64_SC_FLAG_IN_SYSCALL; - cfm = ifs & ((1UL << 38) - 1); - ia64_flush_fph(current); - if ((current->thread.flags & IA64_THREAD_FPH_VALID)) { - flags |= IA64_SC_FLAG_FPH_VALID; - err = __copy_to_user(&sc->sc_fr[32], current->thread.fph, 96*16); - } - - nat = ia64_get_scratch_nat_bits(&scr->pt, scr->scratch_unat); - - err |= __put_user(flags, &sc->sc_flags); - err |= __put_user(nat, &sc->sc_nat); - err |= PUT_SIGSET(mask, &sc->sc_mask); - err |= __put_user(cfm, &sc->sc_cfm); - err |= __put_user(scr->pt.cr_ipsr & IA64_PSR_UM, &sc->sc_um); - err |= __put_user(scr->pt.ar_rsc, &sc->sc_ar_rsc); - err |= __put_user(scr->pt.ar_unat, &sc->sc_ar_unat); /* ar.unat */ - err |= __put_user(scr->pt.ar_fpsr, &sc->sc_ar_fpsr); /* ar.fpsr */ - err |= __put_user(scr->pt.ar_pfs, &sc->sc_ar_pfs); - err |= __put_user(scr->pt.pr, &sc->sc_pr); /* predicates */ - err |= __put_user(scr->pt.b0, &sc->sc_br[0]); /* b0 (rp) */ - err |= __put_user(scr->pt.b6, &sc->sc_br[6]); /* b6 */ - err |= __copy_to_user(&sc->sc_gr[1], &scr->pt.r1, 8); /* r1 */ - err |= __copy_to_user(&sc->sc_gr[8], &scr->pt.r8, 4*8); /* r8-r11 */ - err |= __copy_to_user(&sc->sc_gr[12], &scr->pt.r12, 2*8); /* r12-r13 */ - err |= __copy_to_user(&sc->sc_gr[15], &scr->pt.r15, 8); /* r15 */ - err |= __put_user(scr->pt.cr_iip + ia64_psr(&scr->pt)->ri, &sc->sc_ip); - - if (!(flags & IA64_SC_FLAG_IN_SYSCALL)) { - /* Copy scratch regs to sigcontext if the signal didn't interrupt a syscall. */ - err |= __put_user(scr->pt.ar_ccv, &sc->sc_ar_ccv); /* ar.ccv */ - err |= __put_user(scr->pt.b7, &sc->sc_br[7]); /* b7 */ - err |= __put_user(scr->pt.r14, &sc->sc_gr[14]); /* r14 */ - err |= __copy_to_user(&sc->sc_ar25, &scr->pt.ar_csd, 2*8); /* ar.csd & ar.ssd */ - err |= __copy_to_user(&sc->sc_gr[2], &scr->pt.r2, 2*8); /* r2-r3 */ - err |= __copy_to_user(&sc->sc_gr[16], &scr->pt.r16, 16*8); /* r16-r31 */ - } - return err; -} - -/* - * Check whether the register-backing store is already on the signal stack. - */ -static inline int -rbs_on_sig_stack (unsigned long bsp) -{ - return (bsp - current->sas_ss_sp < current->sas_ss_size); -} - -static long -setup_frame(struct ksignal *ksig, sigset_t *set, struct sigscratch *scr) -{ - extern char __kernel_sigtramp[]; - unsigned long tramp_addr, new_rbs = 0, new_sp; - struct sigframe __user *frame; - long err; - - new_sp = scr->pt.r12; - tramp_addr = (unsigned long) __kernel_sigtramp; - if (ksig->ka.sa.sa_flags & SA_ONSTACK) { - int onstack = sas_ss_flags(new_sp); - - if (onstack == 0) { - new_sp = current->sas_ss_sp + current->sas_ss_size; - /* - * We need to check for the register stack being on the - * signal stack separately, because it's switched - * separately (memory stack is switched in the kernel, - * register stack is switched in the signal trampoline). - */ - if (!rbs_on_sig_stack(scr->pt.ar_bspstore)) - new_rbs = ALIGN(current->sas_ss_sp, - sizeof(long)); - } else if (onstack == SS_ONSTACK) { - unsigned long check_sp; - - /* - * If we are on the alternate signal stack and would - * overflow it, don't. Return an always-bogus address - * instead so we will die with SIGSEGV. - */ - check_sp = (new_sp - sizeof(*frame)) & -STACK_ALIGN; - if (!likely(on_sig_stack(check_sp))) { - force_sigsegv(ksig->sig); - return 1; - } - } - } - frame = (void __user *) ((new_sp - sizeof(*frame)) & -STACK_ALIGN); - - if (!access_ok(frame, sizeof(*frame))) { - force_sigsegv(ksig->sig); - return 1; - } - - err = __put_user(ksig->sig, &frame->arg0); - err |= __put_user(&frame->info, &frame->arg1); - err |= __put_user(&frame->sc, &frame->arg2); - err |= __put_user(new_rbs, &frame->sc.sc_rbs_base); - err |= __put_user(0, &frame->sc.sc_loadrs); /* initialize to zero */ - err |= __put_user(ksig->ka.sa.sa_handler, &frame->handler); - - err |= copy_siginfo_to_user(&frame->info, &ksig->info); - - err |= __save_altstack(&frame->sc.sc_stack, scr->pt.r12); - err |= setup_sigcontext(&frame->sc, set, scr); - - if (unlikely(err)) { - force_sigsegv(ksig->sig); - return 1; - } - - scr->pt.r12 = (unsigned long) frame - 16; /* new stack pointer */ - scr->pt.ar_fpsr = FPSR_DEFAULT; /* reset fpsr for signal handler */ - scr->pt.cr_iip = tramp_addr; - ia64_psr(&scr->pt)->ri = 0; /* start executing in first slot */ - ia64_psr(&scr->pt)->be = 0; /* force little-endian byte-order */ - /* - * Force the interruption function mask to zero. This has no effect when a - * system-call got interrupted by a signal (since, in that case, scr->pt_cr_ifs is - * ignored), but it has the desirable effect of making it possible to deliver a - * signal with an incomplete register frame (which happens when a mandatory RSE - * load faults). Furthermore, it has no negative effect on the getting the user's - * dirty partition preserved, because that's governed by scr->pt.loadrs. - */ - scr->pt.cr_ifs = (1UL << 63); - - /* - * Note: this affects only the NaT bits of the scratch regs (the ones saved in - * pt_regs), which is exactly what we want. - */ - scr->scratch_unat = 0; /* ensure NaT bits of r12 is clear */ - -#if DEBUG_SIG - printk("SIG deliver (%s:%d): sig=%d sp=%lx ip=%lx handler=%p\n", - current->comm, current->pid, ksig->sig, scr->pt.r12, frame->sc.sc_ip, frame->handler); -#endif - return 0; -} - -static long -handle_signal (struct ksignal *ksig, struct sigscratch *scr) -{ - int ret = setup_frame(ksig, sigmask_to_save(), scr); - - if (!ret) - signal_setup_done(ret, ksig, test_thread_flag(TIF_SINGLESTEP)); - - return ret; -} - -/* - * Note that `init' is a special process: it doesn't get signals it doesn't want to - * handle. Thus you cannot kill init even with a SIGKILL even by mistake. - */ -void -ia64_do_signal (struct sigscratch *scr, long in_syscall) -{ - long restart = in_syscall; - long errno = scr->pt.r8; - struct ksignal ksig; - - /* - * This only loops in the rare cases of handle_signal() failing, in which case we - * need to push through a forced SIGSEGV. - */ - while (1) { - if (!get_signal(&ksig)) - break; - - /* - * get_signal() may have run a debugger (via notify_parent()) - * and the debugger may have modified the state (e.g., to arrange for an - * inferior call), thus it's important to check for restarting _after_ - * get_signal(). - */ - if ((long) scr->pt.r10 != -1) - /* - * A system calls has to be restarted only if one of the error codes - * ERESTARTNOHAND, ERESTARTSYS, or ERESTARTNOINTR is returned. If r10 - * isn't -1 then r8 doesn't hold an error code and we don't need to - * restart the syscall, so we can clear the "restart" flag here. - */ - restart = 0; - - if (ksig.sig <= 0) - break; - - if (unlikely(restart)) { - switch (errno) { - case ERESTART_RESTARTBLOCK: - case ERESTARTNOHAND: - scr->pt.r8 = EINTR; - /* note: scr->pt.r10 is already -1 */ - break; - case ERESTARTSYS: - if ((ksig.ka.sa.sa_flags & SA_RESTART) == 0) { - scr->pt.r8 = EINTR; - /* note: scr->pt.r10 is already -1 */ - break; - } - fallthrough; - case ERESTARTNOINTR: - ia64_decrement_ip(&scr->pt); - restart = 0; /* don't restart twice if handle_signal() fails... */ - } - } - - /* - * Whee! Actually deliver the signal. If the delivery failed, we need to - * continue to iterate in this loop so we can deliver the SIGSEGV... - */ - if (handle_signal(&ksig, scr)) - return; - } - - /* Did we come from a system call? */ - if (restart) { - /* Restart the system call - no handlers present */ - if (errno == ERESTARTNOHAND || errno == ERESTARTSYS || errno == ERESTARTNOINTR - || errno == ERESTART_RESTARTBLOCK) - { - /* - * Note: the syscall number is in r15 which is saved in - * pt_regs so all we need to do here is adjust ip so that - * the "break" instruction gets re-executed. - */ - ia64_decrement_ip(&scr->pt); - if (errno == ERESTART_RESTARTBLOCK) - scr->pt.r15 = __NR_restart_syscall; - } - } - - /* if there's no signal to deliver, we just put the saved sigmask - * back */ - restore_saved_sigmask(); -} diff --git a/arch/ia64/kernel/smp.c b/arch/ia64/kernel/smp.c deleted file mode 100644 index ea4f009a232b..000000000000 --- a/arch/ia64/kernel/smp.c +++ /dev/null @@ -1,335 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * SMP Support - * - * Copyright (C) 1999 Walt Drummond - * Copyright (C) 1999, 2001, 2003 David Mosberger-Tang - * - * Lots of stuff stolen from arch/alpha/kernel/smp.c - * - * 01/05/16 Rohit Seth IA64-SMP functions. Reorganized - * the existing code (on the lines of x86 port). - * 00/09/11 David Mosberger Do loops_per_jiffy - * calibration on each CPU. - * 00/08/23 Asit Mallick fixed logical processor id - * 00/03/31 Rohit Seth Fixes for Bootstrap Processor - * & cpu_online_map now gets done here (instead of setup.c) - * 99/10/05 davidm Update to bring it in sync with new command-line processing - * scheme. - * 10/13/00 Goutham Rao Updated smp_call_function and - * smp_call_function_single to resend IPI on timeouts - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -/* - * Note: alignment of 4 entries/cacheline was empirically determined - * to be a good tradeoff between hot cachelines & spreading the array - * across too many cacheline. - */ -static struct local_tlb_flush_counts { - unsigned int count; -} __attribute__((__aligned__(32))) local_tlb_flush_counts[NR_CPUS]; - -static DEFINE_PER_CPU_SHARED_ALIGNED(unsigned short [NR_CPUS], - shadow_flush_counts); - -#define IPI_CALL_FUNC 0 -#define IPI_CPU_STOP 1 -#define IPI_CALL_FUNC_SINGLE 2 -#define IPI_KDUMP_CPU_STOP 3 - -/* This needs to be cacheline aligned because it is written to by *other* CPUs. */ -static DEFINE_PER_CPU_SHARED_ALIGNED(unsigned long, ipi_operation); - -extern void cpu_halt (void); - -static void -stop_this_cpu(void) -{ - /* - * Remove this CPU: - */ - set_cpu_online(smp_processor_id(), false); - max_xtp(); - local_irq_disable(); - cpu_halt(); -} - -void -cpu_die(void) -{ - max_xtp(); - local_irq_disable(); - cpu_halt(); - /* Should never be here */ - BUG(); - for (;;); -} - -irqreturn_t -handle_IPI (int irq, void *dev_id) -{ - int this_cpu = get_cpu(); - unsigned long *pending_ipis = &__ia64_per_cpu_var(ipi_operation); - unsigned long ops; - - mb(); /* Order interrupt and bit testing. */ - while ((ops = xchg(pending_ipis, 0)) != 0) { - mb(); /* Order bit clearing and data access. */ - do { - unsigned long which; - - which = ffz(~ops); - ops &= ~(1 << which); - - switch (which) { - case IPI_CPU_STOP: - stop_this_cpu(); - break; - case IPI_CALL_FUNC: - generic_smp_call_function_interrupt(); - break; - case IPI_CALL_FUNC_SINGLE: - generic_smp_call_function_single_interrupt(); - break; -#ifdef CONFIG_KEXEC - case IPI_KDUMP_CPU_STOP: - unw_init_running(kdump_cpu_freeze, NULL); - break; -#endif - default: - printk(KERN_CRIT "Unknown IPI on CPU %d: %lu\n", - this_cpu, which); - break; - } - } while (ops); - mb(); /* Order data access and bit testing. */ - } - put_cpu(); - return IRQ_HANDLED; -} - - - -/* - * Called with preemption disabled. - */ -static inline void -send_IPI_single (int dest_cpu, int op) -{ - set_bit(op, &per_cpu(ipi_operation, dest_cpu)); - ia64_send_ipi(dest_cpu, IA64_IPI_VECTOR, IA64_IPI_DM_INT, 0); -} - -/* - * Called with preemption disabled. - */ -static inline void -send_IPI_allbutself (int op) -{ - unsigned int i; - - for_each_online_cpu(i) { - if (i != smp_processor_id()) - send_IPI_single(i, op); - } -} - -/* - * Called with preemption disabled. - */ -static inline void -send_IPI_mask(const struct cpumask *mask, int op) -{ - unsigned int cpu; - - for_each_cpu(cpu, mask) { - send_IPI_single(cpu, op); - } -} - -/* - * Called with preemption disabled. - */ -static inline void -send_IPI_all (int op) -{ - int i; - - for_each_online_cpu(i) { - send_IPI_single(i, op); - } -} - -/* - * Called with preemption disabled. - */ -static inline void -send_IPI_self (int op) -{ - send_IPI_single(smp_processor_id(), op); -} - -#ifdef CONFIG_KEXEC -void -kdump_smp_send_stop(void) -{ - send_IPI_allbutself(IPI_KDUMP_CPU_STOP); -} - -void -kdump_smp_send_init(void) -{ - unsigned int cpu, self_cpu; - self_cpu = smp_processor_id(); - for_each_online_cpu(cpu) { - if (cpu != self_cpu) { - if(kdump_status[cpu] == 0) - ia64_send_ipi(cpu, 0, IA64_IPI_DM_INIT, 0); - } - } -} -#endif -/* - * Called with preemption disabled. - */ -void -arch_smp_send_reschedule (int cpu) -{ - ia64_send_ipi(cpu, IA64_IPI_RESCHEDULE, IA64_IPI_DM_INT, 0); -} -EXPORT_SYMBOL_GPL(arch_smp_send_reschedule); - -/* - * Called with preemption disabled. - */ -static void -smp_send_local_flush_tlb (int cpu) -{ - ia64_send_ipi(cpu, IA64_IPI_LOCAL_TLB_FLUSH, IA64_IPI_DM_INT, 0); -} - -void -smp_local_flush_tlb(void) -{ - /* - * Use atomic ops. Otherwise, the load/increment/store sequence from - * a "++" operation can have the line stolen between the load & store. - * The overhead of the atomic op in negligible in this case & offers - * significant benefit for the brief periods where lots of cpus - * are simultaneously flushing TLBs. - */ - ia64_fetchadd(1, &local_tlb_flush_counts[smp_processor_id()].count, acq); - local_flush_tlb_all(); -} - -#define FLUSH_DELAY 5 /* Usec backoff to eliminate excessive cacheline bouncing */ - -void -smp_flush_tlb_cpumask(cpumask_t xcpumask) -{ - unsigned short *counts = __ia64_per_cpu_var(shadow_flush_counts); - cpumask_t cpumask = xcpumask; - int mycpu, cpu, flush_mycpu = 0; - - preempt_disable(); - mycpu = smp_processor_id(); - - for_each_cpu(cpu, &cpumask) - counts[cpu] = local_tlb_flush_counts[cpu].count & 0xffff; - - mb(); - for_each_cpu(cpu, &cpumask) { - if (cpu == mycpu) - flush_mycpu = 1; - else - smp_send_local_flush_tlb(cpu); - } - - if (flush_mycpu) - smp_local_flush_tlb(); - - for_each_cpu(cpu, &cpumask) - while(counts[cpu] == (local_tlb_flush_counts[cpu].count & 0xffff)) - udelay(FLUSH_DELAY); - - preempt_enable(); -} - -void -smp_flush_tlb_all (void) -{ - on_each_cpu((void (*)(void *))local_flush_tlb_all, NULL, 1); -} - -void -smp_flush_tlb_mm (struct mm_struct *mm) -{ - cpumask_var_t cpus; - preempt_disable(); - /* this happens for the common case of a single-threaded fork(): */ - if (likely(mm == current->active_mm && atomic_read(&mm->mm_users) == 1)) - { - local_finish_flush_tlb_mm(mm); - preempt_enable(); - return; - } - if (!alloc_cpumask_var(&cpus, GFP_ATOMIC)) { - smp_call_function((void (*)(void *))local_finish_flush_tlb_mm, - mm, 1); - } else { - cpumask_copy(cpus, mm_cpumask(mm)); - smp_call_function_many(cpus, - (void (*)(void *))local_finish_flush_tlb_mm, mm, 1); - free_cpumask_var(cpus); - } - local_irq_disable(); - local_finish_flush_tlb_mm(mm); - local_irq_enable(); - preempt_enable(); -} - -void arch_send_call_function_single_ipi(int cpu) -{ - send_IPI_single(cpu, IPI_CALL_FUNC_SINGLE); -} - -void arch_send_call_function_ipi_mask(const struct cpumask *mask) -{ - send_IPI_mask(mask, IPI_CALL_FUNC); -} - -/* - * this function calls the 'stop' function on all other CPUs in the system. - */ -void -smp_send_stop (void) -{ - send_IPI_allbutself(IPI_CPU_STOP); -} diff --git a/arch/ia64/kernel/smpboot.c b/arch/ia64/kernel/smpboot.c deleted file mode 100644 index d0e935cf2093..000000000000 --- a/arch/ia64/kernel/smpboot.c +++ /dev/null @@ -1,839 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * SMP boot-related support - * - * Copyright (C) 1998-2003, 2005 Hewlett-Packard Co - * David Mosberger-Tang - * Copyright (C) 2001, 2004-2005 Intel Corp - * Rohit Seth - * Suresh Siddha - * Gordon Jin - * Ashok Raj - * - * 01/05/16 Rohit Seth Moved SMP booting functions from smp.c to here. - * 01/04/27 David Mosberger Added ITC synching code. - * 02/07/31 David Mosberger Switch over to hotplug-CPU boot-sequence. - * smp_boot_cpus()/smp_commence() is replaced by - * smp_prepare_cpus()/__cpu_up()/smp_cpus_done(). - * 04/06/21 Ashok Raj Added CPU Hotplug Support - * 04/12/26 Jin Gordon - * 04/12/26 Rohit Seth - * Add multi-threading and multi-core detection - * 05/01/30 Suresh Siddha - * Setup cpu_sibling_map and cpu_core_map - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#define SMP_DEBUG 0 - -#if SMP_DEBUG -#define Dprintk(x...) printk(x) -#else -#define Dprintk(x...) -#endif - -#ifdef CONFIG_HOTPLUG_CPU -#ifdef CONFIG_PERMIT_BSP_REMOVE -#define bsp_remove_ok 1 -#else -#define bsp_remove_ok 0 -#endif - -/* - * Global array allocated for NR_CPUS at boot time - */ -struct sal_to_os_boot sal_boot_rendez_state[NR_CPUS]; - -/* - * start_ap in head.S uses this to store current booting cpu - * info. - */ -struct sal_to_os_boot *sal_state_for_booting_cpu = &sal_boot_rendez_state[0]; - -#define set_brendez_area(x) (sal_state_for_booting_cpu = &sal_boot_rendez_state[(x)]); - -#else -#define set_brendez_area(x) -#endif - - -/* - * ITC synchronization related stuff: - */ -#define MASTER (0) -#define SLAVE (SMP_CACHE_BYTES/8) - -#define NUM_ROUNDS 64 /* magic value */ -#define NUM_ITERS 5 /* likewise */ - -static DEFINE_SPINLOCK(itc_sync_lock); -static volatile unsigned long go[SLAVE + 1]; - -#define DEBUG_ITC_SYNC 0 - -extern void start_ap (void); -extern unsigned long ia64_iobase; - -struct task_struct *task_for_booting_cpu; - -/* - * State for each CPU - */ -DEFINE_PER_CPU(int, cpu_state); - -cpumask_t cpu_core_map[NR_CPUS] __cacheline_aligned; -EXPORT_SYMBOL(cpu_core_map); -DEFINE_PER_CPU_SHARED_ALIGNED(cpumask_t, cpu_sibling_map); -EXPORT_PER_CPU_SYMBOL(cpu_sibling_map); - -int smp_num_siblings = 1; - -/* which logical CPU number maps to which CPU (physical APIC ID) */ -volatile int ia64_cpu_to_sapicid[NR_CPUS]; -EXPORT_SYMBOL(ia64_cpu_to_sapicid); - -static cpumask_t cpu_callin_map; - -struct smp_boot_data smp_boot_data __initdata; - -unsigned long ap_wakeup_vector = -1; /* External Int use to wakeup APs */ - -char __initdata no_int_routing; - -unsigned char smp_int_redirect; /* are INT and IPI redirectable by the chipset? */ - -#ifdef CONFIG_FORCE_CPEI_RETARGET -#define CPEI_OVERRIDE_DEFAULT (1) -#else -#define CPEI_OVERRIDE_DEFAULT (0) -#endif - -unsigned int force_cpei_retarget = CPEI_OVERRIDE_DEFAULT; - -static int __init -cmdl_force_cpei(char *str) -{ - int value=0; - - get_option (&str, &value); - force_cpei_retarget = value; - - return 1; -} - -__setup("force_cpei=", cmdl_force_cpei); - -static int __init -nointroute (char *str) -{ - no_int_routing = 1; - printk ("no_int_routing on\n"); - return 1; -} - -__setup("nointroute", nointroute); - -static void fix_b0_for_bsp(void) -{ -#ifdef CONFIG_HOTPLUG_CPU - int cpuid; - static int fix_bsp_b0 = 1; - - cpuid = smp_processor_id(); - - /* - * Cache the b0 value on the first AP that comes up - */ - if (!(fix_bsp_b0 && cpuid)) - return; - - sal_boot_rendez_state[0].br[0] = sal_boot_rendez_state[cpuid].br[0]; - printk ("Fixed BSP b0 value from CPU %d\n", cpuid); - - fix_bsp_b0 = 0; -#endif -} - -void -sync_master (void *arg) -{ - unsigned long flags, i; - - go[MASTER] = 0; - - local_irq_save(flags); - { - for (i = 0; i < NUM_ROUNDS*NUM_ITERS; ++i) { - while (!go[MASTER]) - cpu_relax(); - go[MASTER] = 0; - go[SLAVE] = ia64_get_itc(); - } - } - local_irq_restore(flags); -} - -/* - * Return the number of cycles by which our itc differs from the itc on the master - * (time-keeper) CPU. A positive number indicates our itc is ahead of the master, - * negative that it is behind. - */ -static inline long -get_delta (long *rt, long *master) -{ - unsigned long best_t0 = 0, best_t1 = ~0UL, best_tm = 0; - unsigned long tcenter, t0, t1, tm; - long i; - - for (i = 0; i < NUM_ITERS; ++i) { - t0 = ia64_get_itc(); - go[MASTER] = 1; - while (!(tm = go[SLAVE])) - cpu_relax(); - go[SLAVE] = 0; - t1 = ia64_get_itc(); - - if (t1 - t0 < best_t1 - best_t0) - best_t0 = t0, best_t1 = t1, best_tm = tm; - } - - *rt = best_t1 - best_t0; - *master = best_tm - best_t0; - - /* average best_t0 and best_t1 without overflow: */ - tcenter = (best_t0/2 + best_t1/2); - if (best_t0 % 2 + best_t1 % 2 == 2) - ++tcenter; - return tcenter - best_tm; -} - -/* - * Synchronize ar.itc of the current (slave) CPU with the ar.itc of the MASTER CPU - * (normally the time-keeper CPU). We use a closed loop to eliminate the possibility of - * unaccounted-for errors (such as getting a machine check in the middle of a calibration - * step). The basic idea is for the slave to ask the master what itc value it has and to - * read its own itc before and after the master responds. Each iteration gives us three - * timestamps: - * - * slave master - * - * t0 ---\ - * ---\ - * ---> - * tm - * /--- - * /--- - * t1 <--- - * - * - * The goal is to adjust the slave's ar.itc such that tm falls exactly half-way between t0 - * and t1. If we achieve this, the clocks are synchronized provided the interconnect - * between the slave and the master is symmetric. Even if the interconnect were - * asymmetric, we would still know that the synchronization error is smaller than the - * roundtrip latency (t0 - t1). - * - * When the interconnect is quiet and symmetric, this lets us synchronize the itc to - * within one or two cycles. However, we can only *guarantee* that the synchronization is - * accurate to within a round-trip time, which is typically in the range of several - * hundred cycles (e.g., ~500 cycles). In practice, this means that the itc's are usually - * almost perfectly synchronized, but we shouldn't assume that the accuracy is much better - * than half a micro second or so. - */ -void -ia64_sync_itc (unsigned int master) -{ - long i, delta, adj, adjust_latency = 0, done = 0; - unsigned long flags, rt, master_time_stamp, bound; -#if DEBUG_ITC_SYNC - struct { - long rt; /* roundtrip time */ - long master; /* master's timestamp */ - long diff; /* difference between midpoint and master's timestamp */ - long lat; /* estimate of itc adjustment latency */ - } t[NUM_ROUNDS]; -#endif - - /* - * Make sure local timer ticks are disabled while we sync. If - * they were enabled, we'd have to worry about nasty issues - * like setting the ITC ahead of (or a long time before) the - * next scheduled tick. - */ - BUG_ON((ia64_get_itv() & (1 << 16)) == 0); - - go[MASTER] = 1; - - if (smp_call_function_single(master, sync_master, NULL, 0) < 0) { - printk(KERN_ERR "sync_itc: failed to get attention of CPU %u!\n", master); - return; - } - - while (go[MASTER]) - cpu_relax(); /* wait for master to be ready */ - - spin_lock_irqsave(&itc_sync_lock, flags); - { - for (i = 0; i < NUM_ROUNDS; ++i) { - delta = get_delta(&rt, &master_time_stamp); - if (delta == 0) { - done = 1; /* let's lock on to this... */ - bound = rt; - } - - if (!done) { - if (i > 0) { - adjust_latency += -delta; - adj = -delta + adjust_latency/4; - } else - adj = -delta; - - ia64_set_itc(ia64_get_itc() + adj); - } -#if DEBUG_ITC_SYNC - t[i].rt = rt; - t[i].master = master_time_stamp; - t[i].diff = delta; - t[i].lat = adjust_latency/4; -#endif - } - } - spin_unlock_irqrestore(&itc_sync_lock, flags); - -#if DEBUG_ITC_SYNC - for (i = 0; i < NUM_ROUNDS; ++i) - printk("rt=%5ld master=%5ld diff=%5ld adjlat=%5ld\n", - t[i].rt, t[i].master, t[i].diff, t[i].lat); -#endif - - printk(KERN_INFO "CPU %d: synchronized ITC with CPU %u (last diff %ld cycles, " - "maxerr %lu cycles)\n", smp_processor_id(), master, delta, rt); -} - -/* - * Ideally sets up per-cpu profiling hooks. Doesn't do much now... - */ -static inline void smp_setup_percpu_timer(void) -{ -} - -static void -smp_callin (void) -{ - int cpuid, phys_id, itc_master; - struct cpuinfo_ia64 *last_cpuinfo, *this_cpuinfo; - extern void ia64_init_itm(void); - extern volatile int time_keeper_id; - - cpuid = smp_processor_id(); - phys_id = hard_smp_processor_id(); - itc_master = time_keeper_id; - - if (cpu_online(cpuid)) { - printk(KERN_ERR "huh, phys CPU#0x%x, CPU#0x%x already present??\n", - phys_id, cpuid); - BUG(); - } - - fix_b0_for_bsp(); - - /* - * numa_node_id() works after this. - */ - set_numa_node(cpu_to_node_map[cpuid]); - set_numa_mem(local_memory_node(cpu_to_node_map[cpuid])); - - spin_lock(&vector_lock); - /* Setup the per cpu irq handling data structures */ - __setup_vector_irq(cpuid); - notify_cpu_starting(cpuid); - set_cpu_online(cpuid, true); - per_cpu(cpu_state, cpuid) = CPU_ONLINE; - spin_unlock(&vector_lock); - - smp_setup_percpu_timer(); - - ia64_mca_cmc_vector_setup(); /* Setup vector on AP */ - - local_irq_enable(); - - if (!(sal_platform_features & IA64_SAL_PLATFORM_FEATURE_ITC_DRIFT)) { - /* - * Synchronize the ITC with the BP. Need to do this after irqs are - * enabled because ia64_sync_itc() calls smp_call_function_single(), which - * calls spin_unlock_bh(), which calls spin_unlock_bh(), which calls - * local_bh_enable(), which bugs out if irqs are not enabled... - */ - Dprintk("Going to syncup ITC with ITC Master.\n"); - ia64_sync_itc(itc_master); - } - - /* - * Get our bogomips. - */ - ia64_init_itm(); - - /* - * Delay calibration can be skipped if new processor is identical to the - * previous processor. - */ - last_cpuinfo = cpu_data(cpuid - 1); - this_cpuinfo = local_cpu_data; - if (last_cpuinfo->itc_freq != this_cpuinfo->itc_freq || - last_cpuinfo->proc_freq != this_cpuinfo->proc_freq || - last_cpuinfo->features != this_cpuinfo->features || - last_cpuinfo->revision != this_cpuinfo->revision || - last_cpuinfo->family != this_cpuinfo->family || - last_cpuinfo->archrev != this_cpuinfo->archrev || - last_cpuinfo->model != this_cpuinfo->model) - calibrate_delay(); - local_cpu_data->loops_per_jiffy = loops_per_jiffy; - - /* - * Allow the master to continue. - */ - cpumask_set_cpu(cpuid, &cpu_callin_map); - Dprintk("Stack on CPU %d at about %p\n",cpuid, &cpuid); -} - - -/* - * Activate a secondary processor. head.S calls this. - */ -int -start_secondary (void *unused) -{ - /* Early console may use I/O ports */ - ia64_set_kr(IA64_KR_IO_BASE, __pa(ia64_iobase)); -#ifndef CONFIG_PRINTK_TIME - Dprintk("start_secondary: starting CPU 0x%x\n", hard_smp_processor_id()); -#endif - efi_map_pal_code(); - cpu_init(); - smp_callin(); - - cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); - return 0; -} - -static int -do_boot_cpu (int sapicid, int cpu, struct task_struct *idle) -{ - int timeout; - - task_for_booting_cpu = idle; - Dprintk("Sending wakeup vector %lu to AP 0x%x/0x%x.\n", ap_wakeup_vector, cpu, sapicid); - - set_brendez_area(cpu); - ia64_send_ipi(cpu, ap_wakeup_vector, IA64_IPI_DM_INT, 0); - - /* - * Wait 10s total for the AP to start - */ - Dprintk("Waiting on callin_map ..."); - for (timeout = 0; timeout < 100000; timeout++) { - if (cpumask_test_cpu(cpu, &cpu_callin_map)) - break; /* It has booted */ - barrier(); /* Make sure we re-read cpu_callin_map */ - udelay(100); - } - Dprintk("\n"); - - if (!cpumask_test_cpu(cpu, &cpu_callin_map)) { - printk(KERN_ERR "Processor 0x%x/0x%x is stuck.\n", cpu, sapicid); - ia64_cpu_to_sapicid[cpu] = -1; - set_cpu_online(cpu, false); /* was set in smp_callin() */ - return -EINVAL; - } - return 0; -} - -static int __init -decay (char *str) -{ - int ticks; - get_option (&str, &ticks); - return 1; -} - -__setup("decay=", decay); - -/* - * Initialize the logical CPU number to SAPICID mapping - */ -void __init -smp_build_cpu_map (void) -{ - int sapicid, cpu, i; - int boot_cpu_id = hard_smp_processor_id(); - - for (cpu = 0; cpu < NR_CPUS; cpu++) { - ia64_cpu_to_sapicid[cpu] = -1; - } - - ia64_cpu_to_sapicid[0] = boot_cpu_id; - init_cpu_present(cpumask_of(0)); - set_cpu_possible(0, true); - for (cpu = 1, i = 0; i < smp_boot_data.cpu_count; i++) { - sapicid = smp_boot_data.cpu_phys_id[i]; - if (sapicid == boot_cpu_id) - continue; - set_cpu_present(cpu, true); - set_cpu_possible(cpu, true); - ia64_cpu_to_sapicid[cpu] = sapicid; - cpu++; - } -} - -/* - * Cycle through the APs sending Wakeup IPIs to boot each. - */ -void __init -smp_prepare_cpus (unsigned int max_cpus) -{ - int boot_cpu_id = hard_smp_processor_id(); - - /* - * Initialize the per-CPU profiling counter/multiplier - */ - - smp_setup_percpu_timer(); - - cpumask_set_cpu(0, &cpu_callin_map); - - local_cpu_data->loops_per_jiffy = loops_per_jiffy; - ia64_cpu_to_sapicid[0] = boot_cpu_id; - - printk(KERN_INFO "Boot processor id 0x%x/0x%x\n", 0, boot_cpu_id); - - current_thread_info()->cpu = 0; - - /* - * If SMP should be disabled, then really disable it! - */ - if (!max_cpus) { - printk(KERN_INFO "SMP mode deactivated.\n"); - init_cpu_online(cpumask_of(0)); - init_cpu_present(cpumask_of(0)); - init_cpu_possible(cpumask_of(0)); - return; - } -} - -void smp_prepare_boot_cpu(void) -{ - set_cpu_online(smp_processor_id(), true); - cpumask_set_cpu(smp_processor_id(), &cpu_callin_map); - set_numa_node(cpu_to_node_map[smp_processor_id()]); - per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE; -} - -#ifdef CONFIG_HOTPLUG_CPU -static inline void -clear_cpu_sibling_map(int cpu) -{ - int i; - - for_each_cpu(i, &per_cpu(cpu_sibling_map, cpu)) - cpumask_clear_cpu(cpu, &per_cpu(cpu_sibling_map, i)); - for_each_cpu(i, &cpu_core_map[cpu]) - cpumask_clear_cpu(cpu, &cpu_core_map[i]); - - per_cpu(cpu_sibling_map, cpu) = cpu_core_map[cpu] = CPU_MASK_NONE; -} - -static void -remove_siblinginfo(int cpu) -{ - if (cpu_data(cpu)->threads_per_core == 1 && - cpu_data(cpu)->cores_per_socket == 1) { - cpumask_clear_cpu(cpu, &cpu_core_map[cpu]); - cpumask_clear_cpu(cpu, &per_cpu(cpu_sibling_map, cpu)); - return; - } - - /* remove it from all sibling map's */ - clear_cpu_sibling_map(cpu); -} - -extern void fixup_irqs(void); - -int migrate_platform_irqs(unsigned int cpu) -{ - int new_cpei_cpu; - struct irq_data *data = NULL; - const struct cpumask *mask; - int retval = 0; - - /* - * dont permit CPEI target to removed. - */ - if (cpe_vector > 0 && is_cpu_cpei_target(cpu)) { - printk ("CPU (%d) is CPEI Target\n", cpu); - if (can_cpei_retarget()) { - /* - * Now re-target the CPEI to a different processor - */ - new_cpei_cpu = cpumask_any(cpu_online_mask); - mask = cpumask_of(new_cpei_cpu); - set_cpei_target_cpu(new_cpei_cpu); - data = irq_get_irq_data(ia64_cpe_irq); - /* - * Switch for now, immediately, we need to do fake intr - * as other interrupts, but need to study CPEI behaviour with - * polling before making changes. - */ - if (data && data->chip) { - data->chip->irq_disable(data); - data->chip->irq_set_affinity(data, mask, false); - data->chip->irq_enable(data); - printk ("Re-targeting CPEI to cpu %d\n", new_cpei_cpu); - } - } - if (!data) { - printk ("Unable to retarget CPEI, offline cpu [%d] failed\n", cpu); - retval = -EBUSY; - } - } - return retval; -} - -/* must be called with cpucontrol mutex held */ -int __cpu_disable(void) -{ - int cpu = smp_processor_id(); - - /* - * dont permit boot processor for now - */ - if (cpu == 0 && !bsp_remove_ok) { - printk ("Your platform does not support removal of BSP\n"); - return (-EBUSY); - } - - set_cpu_online(cpu, false); - - if (migrate_platform_irqs(cpu)) { - set_cpu_online(cpu, true); - return -EBUSY; - } - - remove_siblinginfo(cpu); - fixup_irqs(); - local_flush_tlb_all(); - cpumask_clear_cpu(cpu, &cpu_callin_map); - return 0; -} - -void __cpu_die(unsigned int cpu) -{ - unsigned int i; - - for (i = 0; i < 100; i++) { - /* They ack this in play_dead by setting CPU_DEAD */ - if (per_cpu(cpu_state, cpu) == CPU_DEAD) - { - printk ("CPU %d is now offline\n", cpu); - return; - } - msleep(100); - } - printk(KERN_ERR "CPU %u didn't die...\n", cpu); -} -#endif /* CONFIG_HOTPLUG_CPU */ - -void -smp_cpus_done (unsigned int dummy) -{ - int cpu; - unsigned long bogosum = 0; - - /* - * Allow the user to impress friends. - */ - - for_each_online_cpu(cpu) { - bogosum += cpu_data(cpu)->loops_per_jiffy; - } - - printk(KERN_INFO "Total of %d processors activated (%lu.%02lu BogoMIPS).\n", - (int)num_online_cpus(), bogosum/(500000/HZ), (bogosum/(5000/HZ))%100); -} - -static inline void set_cpu_sibling_map(int cpu) -{ - int i; - - for_each_online_cpu(i) { - if ((cpu_data(cpu)->socket_id == cpu_data(i)->socket_id)) { - cpumask_set_cpu(i, &cpu_core_map[cpu]); - cpumask_set_cpu(cpu, &cpu_core_map[i]); - if (cpu_data(cpu)->core_id == cpu_data(i)->core_id) { - cpumask_set_cpu(i, - &per_cpu(cpu_sibling_map, cpu)); - cpumask_set_cpu(cpu, - &per_cpu(cpu_sibling_map, i)); - } - } - } -} - -int -__cpu_up(unsigned int cpu, struct task_struct *tidle) -{ - int ret; - int sapicid; - - sapicid = ia64_cpu_to_sapicid[cpu]; - if (sapicid == -1) - return -EINVAL; - - /* - * Already booted cpu? not valid anymore since we dont - * do idle loop tightspin anymore. - */ - if (cpumask_test_cpu(cpu, &cpu_callin_map)) - return -EINVAL; - - per_cpu(cpu_state, cpu) = CPU_UP_PREPARE; - /* Processor goes to start_secondary(), sets online flag */ - ret = do_boot_cpu(sapicid, cpu, tidle); - if (ret < 0) - return ret; - - if (cpu_data(cpu)->threads_per_core == 1 && - cpu_data(cpu)->cores_per_socket == 1) { - cpumask_set_cpu(cpu, &per_cpu(cpu_sibling_map, cpu)); - cpumask_set_cpu(cpu, &cpu_core_map[cpu]); - return 0; - } - - set_cpu_sibling_map(cpu); - - return 0; -} - -/* - * Assume that CPUs have been discovered by some platform-dependent interface. For - * SoftSDV/Lion, that would be ACPI. - * - * Setup of the IPI irq handler is done in irq.c:init_IRQ_SMP(). - */ -void __init -init_smp_config(void) -{ - struct fptr { - unsigned long fp; - unsigned long gp; - } *ap_startup; - long sal_ret; - - /* Tell SAL where to drop the APs. */ - ap_startup = (struct fptr *) start_ap; - sal_ret = ia64_sal_set_vectors(SAL_VECTOR_OS_BOOT_RENDEZ, - ia64_tpa(ap_startup->fp), ia64_tpa(ap_startup->gp), 0, 0, 0, 0); - if (sal_ret < 0) - printk(KERN_ERR "SMP: Can't set SAL AP Boot Rendezvous: %s\n", - ia64_sal_strerror(sal_ret)); -} - -/* - * identify_siblings(cpu) gets called from identify_cpu. This populates the - * information related to logical execution units in per_cpu_data structure. - */ -void identify_siblings(struct cpuinfo_ia64 *c) -{ - long status; - u16 pltid; - pal_logical_to_physical_t info; - - status = ia64_pal_logical_to_phys(-1, &info); - if (status != PAL_STATUS_SUCCESS) { - if (status != PAL_STATUS_UNIMPLEMENTED) { - printk(KERN_ERR - "ia64_pal_logical_to_phys failed with %ld\n", - status); - return; - } - - info.overview_ppid = 0; - info.overview_cpp = 1; - info.overview_tpc = 1; - } - - status = ia64_sal_physical_id_info(&pltid); - if (status != PAL_STATUS_SUCCESS) { - if (status != PAL_STATUS_UNIMPLEMENTED) - printk(KERN_ERR - "ia64_sal_pltid failed with %ld\n", - status); - return; - } - - c->socket_id = (pltid << 8) | info.overview_ppid; - - if (info.overview_cpp == 1 && info.overview_tpc == 1) - return; - - c->cores_per_socket = info.overview_cpp; - c->threads_per_core = info.overview_tpc; - c->num_log = info.overview_num_log; - - c->core_id = info.log1_cid; - c->thread_id = info.log1_tid; -} - -/* - * returns non zero, if multi-threading is enabled - * on at least one physical package. Due to hotplug cpu - * and (maxcpus=), all threads may not necessarily be enabled - * even though the processor supports multi-threading. - */ -int is_multithreading_enabled(void) -{ - int i, j; - - for_each_present_cpu(i) { - for_each_present_cpu(j) { - if (j == i) - continue; - if ((cpu_data(j)->socket_id == cpu_data(i)->socket_id)) { - if (cpu_data(j)->core_id == cpu_data(i)->core_id) - return 1; - } - } - } - return 0; -} -EXPORT_SYMBOL_GPL(is_multithreading_enabled); diff --git a/arch/ia64/kernel/stacktrace.c b/arch/ia64/kernel/stacktrace.c deleted file mode 100644 index 6e583a6bd2f6..000000000000 --- a/arch/ia64/kernel/stacktrace.c +++ /dev/null @@ -1,40 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * arch/ia64/kernel/stacktrace.c - * - * Stack trace management functions - * - */ -#include -#include -#include - -static void -ia64_do_save_stack(struct unw_frame_info *info, void *arg) -{ - struct stack_trace *trace = arg; - unsigned long ip; - int skip = trace->skip; - - trace->nr_entries = 0; - do { - unw_get_ip(info, &ip); - if (ip == 0) - break; - if (skip == 0) { - trace->entries[trace->nr_entries++] = ip; - if (trace->nr_entries == trace->max_entries) - break; - } else - skip--; - } while (unw_unwind(info) >= 0); -} - -/* - * Save stack-backtrace addresses into a stack_trace buffer. - */ -void save_stack_trace(struct stack_trace *trace) -{ - unw_init_running(ia64_do_save_stack, trace); -} -EXPORT_SYMBOL(save_stack_trace); diff --git a/arch/ia64/kernel/sys_ia64.c b/arch/ia64/kernel/sys_ia64.c deleted file mode 100644 index eb561cc93632..000000000000 --- a/arch/ia64/kernel/sys_ia64.c +++ /dev/null @@ -1,197 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * This file contains various system calls that have different calling - * conventions on different platforms. - * - * Copyright (C) 1999-2000, 2002-2003, 2005 Hewlett-Packard Co - * David Mosberger-Tang - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include /* doh, must come after sched.h... */ -#include -#include -#include -#include - -#include -#include - -unsigned long -arch_get_unmapped_area (struct file *filp, unsigned long addr, unsigned long len, - unsigned long pgoff, unsigned long flags) -{ - long map_shared = (flags & MAP_SHARED); - unsigned long align_mask = 0; - struct mm_struct *mm = current->mm; - struct vm_unmapped_area_info info; - - if (len > RGN_MAP_LIMIT) - return -ENOMEM; - - /* handle fixed mapping: prevent overlap with huge pages */ - if (flags & MAP_FIXED) { - if (is_hugepage_only_range(mm, addr, len)) - return -EINVAL; - return addr; - } - -#ifdef CONFIG_HUGETLB_PAGE - if (REGION_NUMBER(addr) == RGN_HPAGE) - addr = 0; -#endif - if (!addr) - addr = TASK_UNMAPPED_BASE; - - if (map_shared && (TASK_SIZE > 0xfffffffful)) - /* - * For 64-bit tasks, align shared segments to 1MB to avoid potential - * performance penalty due to virtual aliasing (see ASDM). For 32-bit - * tasks, we prefer to avoid exhausting the address space too quickly by - * limiting alignment to a single page. - */ - align_mask = PAGE_MASK & (SHMLBA - 1); - - info.flags = 0; - info.length = len; - info.low_limit = addr; - info.high_limit = TASK_SIZE; - info.align_mask = align_mask; - info.align_offset = pgoff << PAGE_SHIFT; - return vm_unmapped_area(&info); -} - -asmlinkage long -ia64_getpriority (int which, int who) -{ - long prio; - - prio = sys_getpriority(which, who); - if (prio >= 0) { - force_successful_syscall_return(); - prio = 20 - prio; - } - return prio; -} - -/* XXX obsolete, but leave it here until the old libc is gone... */ -asmlinkage unsigned long -sys_getpagesize (void) -{ - return PAGE_SIZE; -} - -asmlinkage unsigned long -ia64_brk (unsigned long brk) -{ - unsigned long retval = sys_brk(brk); - force_successful_syscall_return(); - return retval; -} - -/* - * On IA-64, we return the two file descriptors in ret0 and ret1 (r8 - * and r9) as this is faster than doing a copy_to_user(). - */ -asmlinkage long -sys_ia64_pipe (void) -{ - struct pt_regs *regs = task_pt_regs(current); - int fd[2]; - int retval; - - retval = do_pipe_flags(fd, 0); - if (retval) - goto out; - retval = fd[0]; - regs->r9 = fd[1]; - out: - return retval; -} - -int ia64_mmap_check(unsigned long addr, unsigned long len, - unsigned long flags) -{ - unsigned long roff; - - /* - * Don't permit mappings into unmapped space, the virtual page table - * of a region, or across a region boundary. Note: RGN_MAP_LIMIT is - * equal to 2^n-PAGE_SIZE (for some integer n <= 61) and len > 0. - */ - roff = REGION_OFFSET(addr); - if ((len > RGN_MAP_LIMIT) || (roff > (RGN_MAP_LIMIT - len))) - return -EINVAL; - return 0; -} - -/* - * mmap2() is like mmap() except that the offset is expressed in units - * of PAGE_SIZE (instead of bytes). This allows to mmap2() (pieces - * of) files that are larger than the address space of the CPU. - */ -asmlinkage unsigned long -sys_mmap2 (unsigned long addr, unsigned long len, int prot, int flags, int fd, long pgoff) -{ - addr = ksys_mmap_pgoff(addr, len, prot, flags, fd, pgoff); - if (!IS_ERR_VALUE(addr)) - force_successful_syscall_return(); - return addr; -} - -asmlinkage unsigned long -sys_mmap (unsigned long addr, unsigned long len, int prot, int flags, int fd, long off) -{ - if (offset_in_page(off) != 0) - return -EINVAL; - - addr = ksys_mmap_pgoff(addr, len, prot, flags, fd, off >> PAGE_SHIFT); - if (!IS_ERR_VALUE(addr)) - force_successful_syscall_return(); - return addr; -} - -asmlinkage unsigned long -ia64_mremap (unsigned long addr, unsigned long old_len, unsigned long new_len, unsigned long flags, - unsigned long new_addr) -{ - addr = sys_mremap(addr, old_len, new_len, flags, new_addr); - if (!IS_ERR_VALUE(addr)) - force_successful_syscall_return(); - return addr; -} - -asmlinkage long -ia64_clock_getres(const clockid_t which_clock, struct __kernel_timespec __user *tp) -{ - struct timespec64 rtn_tp; - s64 tick_ns; - - /* - * ia64's clock_gettime() syscall is implemented as a vdso call - * fsys_clock_gettime(). Currently it handles only - * CLOCK_REALTIME and CLOCK_MONOTONIC. Both are based on - * 'ar.itc' counter which gets incremented at a constant - * frequency. It's usually 400MHz, ~2.5x times slower than CPU - * clock frequency. Which is almost a 1ns hrtimer, but not quite. - * - * Let's special-case these timers to report correct precision - * based on ITC frequency and not HZ frequency for supported - * clocks. - */ - switch (which_clock) { - case CLOCK_REALTIME: - case CLOCK_MONOTONIC: - tick_ns = DIV_ROUND_UP(NSEC_PER_SEC, local_cpu_data->itc_freq); - rtn_tp = ns_to_timespec64(tick_ns); - return put_timespec64(&rtn_tp, tp); - } - - return sys_clock_getres(which_clock, tp); -} diff --git a/arch/ia64/kernel/syscalls/Makefile b/arch/ia64/kernel/syscalls/Makefile deleted file mode 100644 index d009f927a048..000000000000 --- a/arch/ia64/kernel/syscalls/Makefile +++ /dev/null @@ -1,32 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0 -kapi := arch/$(SRCARCH)/include/generated/asm -uapi := arch/$(SRCARCH)/include/generated/uapi/asm - -$(shell mkdir -p $(uapi) $(kapi)) - -syscall := $(src)/syscall.tbl -syshdr := $(srctree)/scripts/syscallhdr.sh -systbl := $(srctree)/scripts/syscalltbl.sh - -quiet_cmd_syshdr = SYSHDR $@ - cmd_syshdr = $(CONFIG_SHELL) $(syshdr) --emit-nr --offset __NR_Linux $< $@ - -quiet_cmd_systbl = SYSTBL $@ - cmd_systbl = $(CONFIG_SHELL) $(systbl) $< $@ - -$(uapi)/unistd_64.h: $(syscall) $(syshdr) FORCE - $(call if_changed,syshdr) - -$(kapi)/syscall_table.h: $(syscall) $(systbl) FORCE - $(call if_changed,systbl) - -uapisyshdr-y += unistd_64.h -kapisyshdr-y += syscall_table.h - -uapisyshdr-y := $(addprefix $(uapi)/, $(uapisyshdr-y)) -kapisyshdr-y := $(addprefix $(kapi)/, $(kapisyshdr-y)) -targets += $(addprefix ../../../../, $(uapisyshdr-y) $(kapisyshdr-y)) - -PHONY += all -all: $(uapisyshdr-y) $(kapisyshdr-y) - @: diff --git a/arch/ia64/kernel/syscalls/syscall.tbl b/arch/ia64/kernel/syscalls/syscall.tbl deleted file mode 100644 index 83d8609aec03..000000000000 --- a/arch/ia64/kernel/syscalls/syscall.tbl +++ /dev/null @@ -1,375 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note -# -# Linux system call numbers and entry vectors for ia64 -# -# The format is: -# -# -# Add 1024 to will get the actual system call number -# -# The is always "common" for this file -# -0 common ni_syscall sys_ni_syscall -1 common exit sys_exit -2 common read sys_read -3 common write sys_write -4 common open sys_open -5 common close sys_close -6 common creat sys_creat -7 common link sys_link -8 common unlink sys_unlink -9 common execve ia64_execve -10 common chdir sys_chdir -11 common fchdir sys_fchdir -12 common utimes sys_utimes -13 common mknod sys_mknod -14 common chmod sys_chmod -15 common chown sys_chown -16 common lseek sys_lseek -17 common getpid sys_getpid -18 common getppid sys_getppid -19 common mount sys_mount -20 common umount2 sys_umount -21 common setuid sys_setuid -22 common getuid sys_getuid -23 common geteuid sys_geteuid -24 common ptrace sys_ptrace -25 common access sys_access -26 common sync sys_sync -27 common fsync sys_fsync -28 common fdatasync sys_fdatasync -29 common kill sys_kill -30 common rename sys_rename -31 common mkdir sys_mkdir -32 common rmdir sys_rmdir -33 common dup sys_dup -34 common pipe sys_ia64_pipe -35 common times sys_times -36 common brk ia64_brk -37 common setgid sys_setgid -38 common getgid sys_getgid -39 common getegid sys_getegid -40 common acct sys_acct -41 common ioctl sys_ioctl -42 common fcntl sys_fcntl -43 common umask sys_umask -44 common chroot sys_chroot -45 common ustat sys_ustat -46 common dup2 sys_dup2 -47 common setreuid sys_setreuid -48 common setregid sys_setregid -49 common getresuid sys_getresuid -50 common setresuid sys_setresuid -51 common getresgid sys_getresgid -52 common setresgid sys_setresgid -53 common getgroups sys_getgroups -54 common setgroups sys_setgroups -55 common getpgid sys_getpgid -56 common setpgid sys_setpgid -57 common setsid sys_setsid -58 common getsid sys_getsid -59 common sethostname sys_sethostname -60 common setrlimit sys_setrlimit -61 common getrlimit sys_getrlimit -62 common getrusage sys_getrusage -63 common gettimeofday sys_gettimeofday -64 common settimeofday sys_settimeofday -65 common select sys_select -66 common poll sys_poll -67 common symlink sys_symlink -68 common readlink sys_readlink -69 common uselib sys_uselib -70 common swapon sys_swapon -71 common swapoff sys_swapoff -72 common reboot sys_reboot -73 common truncate sys_truncate -74 common ftruncate sys_ftruncate -75 common fchmod sys_fchmod -76 common fchown sys_fchown -77 common getpriority ia64_getpriority -78 common setpriority sys_setpriority -79 common statfs sys_statfs -80 common fstatfs sys_fstatfs -81 common gettid sys_gettid -82 common semget sys_semget -83 common semop sys_semop -84 common semctl sys_semctl -85 common msgget sys_msgget -86 common msgsnd sys_msgsnd -87 common msgrcv sys_msgrcv -88 common msgctl sys_msgctl -89 common shmget sys_shmget -90 common shmat sys_shmat -91 common shmdt sys_shmdt -92 common shmctl sys_shmctl -93 common syslog sys_syslog -94 common setitimer sys_setitimer -95 common getitimer sys_getitimer -# 1120 was old_stat -# 1121 was old_lstat -# 1122 was old_fstat -99 common vhangup sys_vhangup -100 common lchown sys_lchown -101 common remap_file_pages sys_remap_file_pages -102 common wait4 sys_wait4 -103 common sysinfo sys_sysinfo -104 common clone sys_clone -105 common setdomainname sys_setdomainname -106 common uname sys_newuname -107 common adjtimex sys_adjtimex -# 1132 was create_module -109 common init_module sys_init_module -110 common delete_module sys_delete_module -# 1135 was get_kernel_syms -# 1136 was query_module -113 common quotactl sys_quotactl -114 common bdflush sys_ni_syscall -115 common sysfs sys_sysfs -116 common personality sys_personality -117 common afs_syscall sys_ni_syscall -118 common setfsuid sys_setfsuid -119 common setfsgid sys_setfsgid -120 common getdents sys_getdents -121 common flock sys_flock -122 common readv sys_readv -123 common writev sys_writev -124 common pread64 sys_pread64 -125 common pwrite64 sys_pwrite64 -126 common _sysctl sys_ni_syscall -127 common mmap sys_mmap -128 common munmap sys_munmap -129 common mlock sys_mlock -130 common mlockall sys_mlockall -131 common mprotect sys_mprotect -132 common mremap ia64_mremap -133 common msync sys_msync -134 common munlock sys_munlock -135 common munlockall sys_munlockall -136 common sched_getparam sys_sched_getparam -137 common sched_setparam sys_sched_setparam -138 common sched_getscheduler sys_sched_getscheduler -139 common sched_setscheduler sys_sched_setscheduler -140 common sched_yield sys_sched_yield -141 common sched_get_priority_max sys_sched_get_priority_max -142 common sched_get_priority_min sys_sched_get_priority_min -143 common sched_rr_get_interval sys_sched_rr_get_interval -144 common nanosleep sys_nanosleep -145 common nfsservctl sys_ni_syscall -146 common prctl sys_prctl -147 common old_getpagesize sys_getpagesize -148 common mmap2 sys_mmap2 -149 common pciconfig_read sys_pciconfig_read -150 common pciconfig_write sys_pciconfig_write -151 common perfmonctl sys_ni_syscall -152 common sigaltstack sys_sigaltstack -153 common rt_sigaction sys_rt_sigaction -154 common rt_sigpending sys_rt_sigpending -155 common rt_sigprocmask sys_rt_sigprocmask -156 common rt_sigqueueinfo sys_rt_sigqueueinfo -157 common rt_sigreturn sys_rt_sigreturn -158 common rt_sigsuspend sys_rt_sigsuspend -159 common rt_sigtimedwait sys_rt_sigtimedwait -160 common getcwd sys_getcwd -161 common capget sys_capget -162 common capset sys_capset -163 common sendfile sys_sendfile64 -164 common getpmsg sys_ni_syscall -165 common putpmsg sys_ni_syscall -166 common socket sys_socket -167 common bind sys_bind -168 common connect sys_connect -169 common listen sys_listen -170 common accept sys_accept -171 common getsockname sys_getsockname -172 common getpeername sys_getpeername -173 common socketpair sys_socketpair -174 common send sys_send -175 common sendto sys_sendto -176 common recv sys_recv -177 common recvfrom sys_recvfrom -178 common shutdown sys_shutdown -179 common setsockopt sys_setsockopt -180 common getsockopt sys_getsockopt -181 common sendmsg sys_sendmsg -182 common recvmsg sys_recvmsg -183 common pivot_root sys_pivot_root -184 common mincore sys_mincore -185 common madvise sys_madvise -186 common stat sys_newstat -187 common lstat sys_newlstat -188 common fstat sys_newfstat -189 common clone2 sys_clone2 -190 common getdents64 sys_getdents64 -191 common getunwind sys_getunwind -192 common readahead sys_readahead -193 common setxattr sys_setxattr -194 common lsetxattr sys_lsetxattr -195 common fsetxattr sys_fsetxattr -196 common getxattr sys_getxattr -197 common lgetxattr sys_lgetxattr -198 common fgetxattr sys_fgetxattr -199 common listxattr sys_listxattr -200 common llistxattr sys_llistxattr -201 common flistxattr sys_flistxattr -202 common removexattr sys_removexattr -203 common lremovexattr sys_lremovexattr -204 common fremovexattr sys_fremovexattr -205 common tkill sys_tkill -206 common futex sys_futex -207 common sched_setaffinity sys_sched_setaffinity -208 common sched_getaffinity sys_sched_getaffinity -209 common set_tid_address sys_set_tid_address -210 common fadvise64 sys_fadvise64_64 -211 common tgkill sys_tgkill -212 common exit_group sys_exit_group -213 common lookup_dcookie sys_lookup_dcookie -214 common io_setup sys_io_setup -215 common io_destroy sys_io_destroy -216 common io_getevents sys_io_getevents -217 common io_submit sys_io_submit -218 common io_cancel sys_io_cancel -219 common epoll_create sys_epoll_create -220 common epoll_ctl sys_epoll_ctl -221 common epoll_wait sys_epoll_wait -222 common restart_syscall sys_restart_syscall -223 common semtimedop sys_semtimedop -224 common timer_create sys_timer_create -225 common timer_settime sys_timer_settime -226 common timer_gettime sys_timer_gettime -227 common timer_getoverrun sys_timer_getoverrun -228 common timer_delete sys_timer_delete -229 common clock_settime sys_clock_settime -230 common clock_gettime sys_clock_gettime -231 common clock_getres ia64_clock_getres -232 common clock_nanosleep sys_clock_nanosleep -233 common fstatfs64 sys_fstatfs64 -234 common statfs64 sys_statfs64 -235 common mbind sys_mbind -236 common get_mempolicy sys_get_mempolicy -237 common set_mempolicy sys_set_mempolicy -238 common mq_open sys_mq_open -239 common mq_unlink sys_mq_unlink -240 common mq_timedsend sys_mq_timedsend -241 common mq_timedreceive sys_mq_timedreceive -242 common mq_notify sys_mq_notify -243 common mq_getsetattr sys_mq_getsetattr -244 common kexec_load sys_kexec_load -245 common vserver sys_ni_syscall -246 common waitid sys_waitid -247 common add_key sys_add_key -248 common request_key sys_request_key -249 common keyctl sys_keyctl -250 common ioprio_set sys_ioprio_set -251 common ioprio_get sys_ioprio_get -252 common move_pages sys_move_pages -253 common inotify_init sys_inotify_init -254 common inotify_add_watch sys_inotify_add_watch -255 common inotify_rm_watch sys_inotify_rm_watch -256 common migrate_pages sys_migrate_pages -257 common openat sys_openat -258 common mkdirat sys_mkdirat -259 common mknodat sys_mknodat -260 common fchownat sys_fchownat -261 common futimesat sys_futimesat -262 common newfstatat sys_newfstatat -263 common unlinkat sys_unlinkat -264 common renameat sys_renameat -265 common linkat sys_linkat -266 common symlinkat sys_symlinkat -267 common readlinkat sys_readlinkat -268 common fchmodat sys_fchmodat -269 common faccessat sys_faccessat -270 common pselect6 sys_pselect6 -271 common ppoll sys_ppoll -272 common unshare sys_unshare -273 common splice sys_splice -274 common set_robust_list sys_set_robust_list -275 common get_robust_list sys_get_robust_list -276 common sync_file_range sys_sync_file_range -277 common tee sys_tee -278 common vmsplice sys_vmsplice -279 common fallocate sys_fallocate -280 common getcpu sys_getcpu -281 common epoll_pwait sys_epoll_pwait -282 common utimensat sys_utimensat -283 common signalfd sys_signalfd -284 common timerfd sys_ni_syscall -285 common eventfd sys_eventfd -286 common timerfd_create sys_timerfd_create -287 common timerfd_settime sys_timerfd_settime -288 common timerfd_gettime sys_timerfd_gettime -289 common signalfd4 sys_signalfd4 -290 common eventfd2 sys_eventfd2 -291 common epoll_create1 sys_epoll_create1 -292 common dup3 sys_dup3 -293 common pipe2 sys_pipe2 -294 common inotify_init1 sys_inotify_init1 -295 common preadv sys_preadv -296 common pwritev sys_pwritev -297 common rt_tgsigqueueinfo sys_rt_tgsigqueueinfo -298 common recvmmsg sys_recvmmsg -299 common fanotify_init sys_fanotify_init -300 common fanotify_mark sys_fanotify_mark -301 common prlimit64 sys_prlimit64 -302 common name_to_handle_at sys_name_to_handle_at -303 common open_by_handle_at sys_open_by_handle_at -304 common clock_adjtime sys_clock_adjtime -305 common syncfs sys_syncfs -306 common setns sys_setns -307 common sendmmsg sys_sendmmsg -308 common process_vm_readv sys_process_vm_readv -309 common process_vm_writev sys_process_vm_writev -310 common accept4 sys_accept4 -311 common finit_module sys_finit_module -312 common sched_setattr sys_sched_setattr -313 common sched_getattr sys_sched_getattr -314 common renameat2 sys_renameat2 -315 common getrandom sys_getrandom -316 common memfd_create sys_memfd_create -317 common bpf sys_bpf -318 common execveat sys_execveat -319 common userfaultfd sys_userfaultfd -320 common membarrier sys_membarrier -321 common kcmp sys_kcmp -322 common mlock2 sys_mlock2 -323 common copy_file_range sys_copy_file_range -324 common preadv2 sys_preadv2 -325 common pwritev2 sys_pwritev2 -326 common statx sys_statx -327 common io_pgetevents sys_io_pgetevents -328 common perf_event_open sys_perf_event_open -329 common seccomp sys_seccomp -330 common pkey_mprotect sys_pkey_mprotect -331 common pkey_alloc sys_pkey_alloc -332 common pkey_free sys_pkey_free -333 common rseq sys_rseq -# 334 through 423 are reserved to sync up with other architectures -424 common pidfd_send_signal sys_pidfd_send_signal -425 common io_uring_setup sys_io_uring_setup -426 common io_uring_enter sys_io_uring_enter -427 common io_uring_register sys_io_uring_register -428 common open_tree sys_open_tree -429 common move_mount sys_move_mount -430 common fsopen sys_fsopen -431 common fsconfig sys_fsconfig -432 common fsmount sys_fsmount -433 common fspick sys_fspick -434 common pidfd_open sys_pidfd_open -# 435 reserved for clone3 -436 common close_range sys_close_range -437 common openat2 sys_openat2 -438 common pidfd_getfd sys_pidfd_getfd -439 common faccessat2 sys_faccessat2 -440 common process_madvise sys_process_madvise -441 common epoll_pwait2 sys_epoll_pwait2 -442 common mount_setattr sys_mount_setattr -443 common quotactl_fd sys_quotactl_fd -444 common landlock_create_ruleset sys_landlock_create_ruleset -445 common landlock_add_rule sys_landlock_add_rule -446 common landlock_restrict_self sys_landlock_restrict_self -# 447 reserved for memfd_secret -448 common process_mrelease sys_process_mrelease -449 common futex_waitv sys_futex_waitv -450 common set_mempolicy_home_node sys_set_mempolicy_home_node -451 common cachestat sys_cachestat -452 common fchmodat2 sys_fchmodat2 diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c deleted file mode 100644 index 83ef044b63ef..000000000000 --- a/arch/ia64/kernel/time.c +++ /dev/null @@ -1,463 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * linux/arch/ia64/kernel/time.c - * - * Copyright (C) 1998-2003 Hewlett-Packard Co - * Stephane Eranian - * David Mosberger - * Copyright (C) 1999 Don Dugger - * Copyright (C) 1999-2000 VA Linux Systems - * Copyright (C) 1999-2000 Walt Drummond - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include - -#include "fsyscall_gtod_data.h" -#include "irq.h" - -static u64 itc_get_cycles(struct clocksource *cs); - -struct fsyscall_gtod_data_t fsyscall_gtod_data; - -struct itc_jitter_data_t itc_jitter_data; - -volatile int time_keeper_id = 0; /* smp_processor_id() of time-keeper */ - -#ifdef CONFIG_IA64_DEBUG_IRQ - -unsigned long last_cli_ip; -EXPORT_SYMBOL(last_cli_ip); - -#endif - -static struct clocksource clocksource_itc = { - .name = "itc", - .rating = 350, - .read = itc_get_cycles, - .mask = CLOCKSOURCE_MASK(64), - .flags = CLOCK_SOURCE_IS_CONTINUOUS, -}; -static struct clocksource *itc_clocksource; - -#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE - -#include - -extern u64 cycle_to_nsec(u64 cyc); - -void vtime_flush(struct task_struct *tsk) -{ - struct thread_info *ti = task_thread_info(tsk); - u64 delta; - - if (ti->utime) - account_user_time(tsk, cycle_to_nsec(ti->utime)); - - if (ti->gtime) - account_guest_time(tsk, cycle_to_nsec(ti->gtime)); - - if (ti->idle_time) - account_idle_time(cycle_to_nsec(ti->idle_time)); - - if (ti->stime) { - delta = cycle_to_nsec(ti->stime); - account_system_index_time(tsk, delta, CPUTIME_SYSTEM); - } - - if (ti->hardirq_time) { - delta = cycle_to_nsec(ti->hardirq_time); - account_system_index_time(tsk, delta, CPUTIME_IRQ); - } - - if (ti->softirq_time) { - delta = cycle_to_nsec(ti->softirq_time); - account_system_index_time(tsk, delta, CPUTIME_SOFTIRQ); - } - - ti->utime = 0; - ti->gtime = 0; - ti->idle_time = 0; - ti->stime = 0; - ti->hardirq_time = 0; - ti->softirq_time = 0; -} - -/* - * Called from the context switch with interrupts disabled, to charge all - * accumulated times to the current process, and to prepare accounting on - * the next process. - */ -void arch_vtime_task_switch(struct task_struct *prev) -{ - struct thread_info *pi = task_thread_info(prev); - struct thread_info *ni = task_thread_info(current); - - ni->ac_stamp = pi->ac_stamp; - ni->ac_stime = ni->ac_utime = 0; -} - -/* - * Account time for a transition between system, hard irq or soft irq state. - * Note that this function is called with interrupts enabled. - */ -static __u64 vtime_delta(struct task_struct *tsk) -{ - struct thread_info *ti = task_thread_info(tsk); - __u64 now, delta_stime; - - WARN_ON_ONCE(!irqs_disabled()); - - now = ia64_get_itc(); - delta_stime = now - ti->ac_stamp; - ti->ac_stamp = now; - - return delta_stime; -} - -void vtime_account_kernel(struct task_struct *tsk) -{ - struct thread_info *ti = task_thread_info(tsk); - __u64 stime = vtime_delta(tsk); - - if (tsk->flags & PF_VCPU) - ti->gtime += stime; - else - ti->stime += stime; -} -EXPORT_SYMBOL_GPL(vtime_account_kernel); - -void vtime_account_idle(struct task_struct *tsk) -{ - struct thread_info *ti = task_thread_info(tsk); - - ti->idle_time += vtime_delta(tsk); -} - -void vtime_account_softirq(struct task_struct *tsk) -{ - struct thread_info *ti = task_thread_info(tsk); - - ti->softirq_time += vtime_delta(tsk); -} - -void vtime_account_hardirq(struct task_struct *tsk) -{ - struct thread_info *ti = task_thread_info(tsk); - - ti->hardirq_time += vtime_delta(tsk); -} - -#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ - -static irqreturn_t -timer_interrupt (int irq, void *dev_id) -{ - unsigned long new_itm; - - if (cpu_is_offline(smp_processor_id())) { - return IRQ_HANDLED; - } - - new_itm = local_cpu_data->itm_next; - - if (!time_after(ia64_get_itc(), new_itm)) - printk(KERN_ERR "Oops: timer tick before it's due (itc=%lx,itm=%lx)\n", - ia64_get_itc(), new_itm); - - while (1) { - new_itm += local_cpu_data->itm_delta; - - legacy_timer_tick(smp_processor_id() == time_keeper_id); - - local_cpu_data->itm_next = new_itm; - - if (time_after(new_itm, ia64_get_itc())) - break; - - /* - * Allow IPIs to interrupt the timer loop. - */ - local_irq_enable(); - local_irq_disable(); - } - - do { - /* - * If we're too close to the next clock tick for - * comfort, we increase the safety margin by - * intentionally dropping the next tick(s). We do NOT - * update itm.next because that would force us to call - * xtime_update() which in turn would let our clock run - * too fast (with the potentially devastating effect - * of losing monotony of time). - */ - while (!time_after(new_itm, ia64_get_itc() + local_cpu_data->itm_delta/2)) - new_itm += local_cpu_data->itm_delta; - ia64_set_itm(new_itm); - /* double check, in case we got hit by a (slow) PMI: */ - } while (time_after_eq(ia64_get_itc(), new_itm)); - return IRQ_HANDLED; -} - -/* - * Encapsulate access to the itm structure for SMP. - */ -void -ia64_cpu_local_tick (void) -{ - int cpu = smp_processor_id(); - unsigned long shift = 0, delta; - - /* arrange for the cycle counter to generate a timer interrupt: */ - ia64_set_itv(IA64_TIMER_VECTOR); - - delta = local_cpu_data->itm_delta; - /* - * Stagger the timer tick for each CPU so they don't occur all at (almost) the - * same time: - */ - if (cpu) { - unsigned long hi = 1UL << ia64_fls(cpu); - shift = (2*(cpu - hi) + 1) * delta/hi/2; - } - local_cpu_data->itm_next = ia64_get_itc() + delta + shift; - ia64_set_itm(local_cpu_data->itm_next); -} - -static int nojitter; - -static int __init nojitter_setup(char *str) -{ - nojitter = 1; - printk("Jitter checking for ITC timers disabled\n"); - return 1; -} - -__setup("nojitter", nojitter_setup); - - -void ia64_init_itm(void) -{ - unsigned long platform_base_freq, itc_freq; - struct pal_freq_ratio itc_ratio, proc_ratio; - long status, platform_base_drift, itc_drift; - - /* - * According to SAL v2.6, we need to use a SAL call to determine the platform base - * frequency and then a PAL call to determine the frequency ratio between the ITC - * and the base frequency. - */ - status = ia64_sal_freq_base(SAL_FREQ_BASE_PLATFORM, - &platform_base_freq, &platform_base_drift); - if (status != 0) { - printk(KERN_ERR "SAL_FREQ_BASE_PLATFORM failed: %s\n", ia64_sal_strerror(status)); - } else { - status = ia64_pal_freq_ratios(&proc_ratio, NULL, &itc_ratio); - if (status != 0) - printk(KERN_ERR "PAL_FREQ_RATIOS failed with status=%ld\n", status); - } - if (status != 0) { - /* invent "random" values */ - printk(KERN_ERR - "SAL/PAL failed to obtain frequency info---inventing reasonable values\n"); - platform_base_freq = 100000000; - platform_base_drift = -1; /* no drift info */ - itc_ratio.num = 3; - itc_ratio.den = 1; - } - if (platform_base_freq < 40000000) { - printk(KERN_ERR "Platform base frequency %lu bogus---resetting to 75MHz!\n", - platform_base_freq); - platform_base_freq = 75000000; - platform_base_drift = -1; - } - if (!proc_ratio.den) - proc_ratio.den = 1; /* avoid division by zero */ - if (!itc_ratio.den) - itc_ratio.den = 1; /* avoid division by zero */ - - itc_freq = (platform_base_freq*itc_ratio.num)/itc_ratio.den; - - local_cpu_data->itm_delta = (itc_freq + HZ/2) / HZ; - printk(KERN_DEBUG "CPU %d: base freq=%lu.%03luMHz, ITC ratio=%u/%u, " - "ITC freq=%lu.%03luMHz", smp_processor_id(), - platform_base_freq / 1000000, (platform_base_freq / 1000) % 1000, - itc_ratio.num, itc_ratio.den, itc_freq / 1000000, (itc_freq / 1000) % 1000); - - if (platform_base_drift != -1) { - itc_drift = platform_base_drift*itc_ratio.num/itc_ratio.den; - printk("+/-%ldppm\n", itc_drift); - } else { - itc_drift = -1; - printk("\n"); - } - - local_cpu_data->proc_freq = (platform_base_freq*proc_ratio.num)/proc_ratio.den; - local_cpu_data->itc_freq = itc_freq; - local_cpu_data->cyc_per_usec = (itc_freq + USEC_PER_SEC/2) / USEC_PER_SEC; - local_cpu_data->nsec_per_cyc = ((NSEC_PER_SEC<itc_freq); - itc_clocksource = &clocksource_itc; - } -} - -static u64 itc_get_cycles(struct clocksource *cs) -{ - unsigned long lcycle, now, ret; - - if (!itc_jitter_data.itc_jitter) - return get_cycles(); - - lcycle = itc_jitter_data.itc_lastcycle; - now = get_cycles(); - if (lcycle && time_after(lcycle, now)) - return lcycle; - - /* - * Keep track of the last timer value returned. - * In an SMP environment, you could lose out in contention of - * cmpxchg. If so, your cmpxchg returns new value which the - * winner of contention updated to. Use the new value instead. - */ - ret = cmpxchg(&itc_jitter_data.itc_lastcycle, lcycle, now); - if (unlikely(ret != lcycle)) - return ret; - - return now; -} - -void read_persistent_clock64(struct timespec64 *ts) -{ - efi_gettimeofday(ts); -} - -void __init -time_init (void) -{ - register_percpu_irq(IA64_TIMER_VECTOR, timer_interrupt, IRQF_IRQPOLL, - "timer"); - ia64_init_itm(); -} - -/* - * Generic udelay assumes that if preemption is allowed and the thread - * migrates to another CPU, that the ITC values are synchronized across - * all CPUs. - */ -static void -ia64_itc_udelay (unsigned long usecs) -{ - unsigned long start = ia64_get_itc(); - unsigned long end = start + usecs*local_cpu_data->cyc_per_usec; - - while (time_before(ia64_get_itc(), end)) - cpu_relax(); -} - -void (*ia64_udelay)(unsigned long usecs) = &ia64_itc_udelay; - -void -udelay (unsigned long usecs) -{ - (*ia64_udelay)(usecs); -} -EXPORT_SYMBOL(udelay); - -/* IA64 doesn't cache the timezone */ -void update_vsyscall_tz(void) -{ -} - -void update_vsyscall(struct timekeeper *tk) -{ - write_seqcount_begin(&fsyscall_gtod_data.seq); - - /* copy vsyscall data */ - fsyscall_gtod_data.clk_mask = tk->tkr_mono.mask; - fsyscall_gtod_data.clk_mult = tk->tkr_mono.mult; - fsyscall_gtod_data.clk_shift = tk->tkr_mono.shift; - fsyscall_gtod_data.clk_fsys_mmio = tk->tkr_mono.clock->archdata.fsys_mmio; - fsyscall_gtod_data.clk_cycle_last = tk->tkr_mono.cycle_last; - - fsyscall_gtod_data.wall_time.sec = tk->xtime_sec; - fsyscall_gtod_data.wall_time.snsec = tk->tkr_mono.xtime_nsec; - - fsyscall_gtod_data.monotonic_time.sec = tk->xtime_sec - + tk->wall_to_monotonic.tv_sec; - fsyscall_gtod_data.monotonic_time.snsec = tk->tkr_mono.xtime_nsec - + ((u64)tk->wall_to_monotonic.tv_nsec - << tk->tkr_mono.shift); - - /* normalize */ - while (fsyscall_gtod_data.monotonic_time.snsec >= - (((u64)NSEC_PER_SEC) << tk->tkr_mono.shift)) { - fsyscall_gtod_data.monotonic_time.snsec -= - ((u64)NSEC_PER_SEC) << tk->tkr_mono.shift; - fsyscall_gtod_data.monotonic_time.sec++; - } - - write_seqcount_end(&fsyscall_gtod_data.seq); -} - diff --git a/arch/ia64/kernel/topology.c b/arch/ia64/kernel/topology.c deleted file mode 100644 index 94a848b06f15..000000000000 --- a/arch/ia64/kernel/topology.c +++ /dev/null @@ -1,410 +0,0 @@ -/* - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - * - * This file contains NUMA specific variables and functions which are used on - * NUMA machines with contiguous memory. - * 2002/08/07 Erich Focht - * Populate cpu entries in sysfs for non-numa systems as well - * Intel Corporation - Ashok Raj - * 02/27/2006 Zhang, Yanmin - * Populate cpu cache entries in sysfs for cpu cache info - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -static struct ia64_cpu *sysfs_cpus; - -void arch_fix_phys_package_id(int num, u32 slot) -{ -#ifdef CONFIG_SMP - if (cpu_data(num)->socket_id == -1) - cpu_data(num)->socket_id = slot; -#endif -} -EXPORT_SYMBOL_GPL(arch_fix_phys_package_id); - - -#ifdef CONFIG_HOTPLUG_CPU -int __ref arch_register_cpu(int num) -{ - /* - * If CPEI can be re-targeted or if this is not - * CPEI target, then it is hotpluggable - */ - if (can_cpei_retarget() || !is_cpu_cpei_target(num)) - sysfs_cpus[num].cpu.hotpluggable = 1; - map_cpu_to_node(num, node_cpuid[num].nid); - return register_cpu(&sysfs_cpus[num].cpu, num); -} -EXPORT_SYMBOL(arch_register_cpu); - -void __ref arch_unregister_cpu(int num) -{ - unregister_cpu(&sysfs_cpus[num].cpu); - unmap_cpu_from_node(num, cpu_to_node(num)); -} -EXPORT_SYMBOL(arch_unregister_cpu); -#else -static int __init arch_register_cpu(int num) -{ - return register_cpu(&sysfs_cpus[num].cpu, num); -} -#endif /*CONFIG_HOTPLUG_CPU*/ - - -static int __init topology_init(void) -{ - int i, err = 0; - - sysfs_cpus = kcalloc(NR_CPUS, sizeof(struct ia64_cpu), GFP_KERNEL); - if (!sysfs_cpus) - panic("kzalloc in topology_init failed - NR_CPUS too big?"); - - for_each_present_cpu(i) { - if((err = arch_register_cpu(i))) - goto out; - } -out: - return err; -} - -subsys_initcall(topology_init); - - -/* - * Export cpu cache information through sysfs - */ - -/* - * A bunch of string array to get pretty printing - */ -static const char *cache_types[] = { - "", /* not used */ - "Instruction", - "Data", - "Unified" /* unified */ -}; - -static const char *cache_mattrib[]={ - "WriteThrough", - "WriteBack", - "", /* reserved */ - "" /* reserved */ -}; - -struct cache_info { - pal_cache_config_info_t cci; - cpumask_t shared_cpu_map; - int level; - int type; - struct kobject kobj; -}; - -struct cpu_cache_info { - struct cache_info *cache_leaves; - int num_cache_leaves; - struct kobject kobj; -}; - -static struct cpu_cache_info all_cpu_cache_info[NR_CPUS]; -#define LEAF_KOBJECT_PTR(x,y) (&all_cpu_cache_info[x].cache_leaves[y]) - -#ifdef CONFIG_SMP -static void cache_shared_cpu_map_setup(unsigned int cpu, - struct cache_info * this_leaf) -{ - pal_cache_shared_info_t csi; - int num_shared, i = 0; - unsigned int j; - - if (cpu_data(cpu)->threads_per_core <= 1 && - cpu_data(cpu)->cores_per_socket <= 1) { - cpumask_set_cpu(cpu, &this_leaf->shared_cpu_map); - return; - } - - if (ia64_pal_cache_shared_info(this_leaf->level, - this_leaf->type, - 0, - &csi) != PAL_STATUS_SUCCESS) - return; - - num_shared = (int) csi.num_shared; - do { - for_each_possible_cpu(j) - if (cpu_data(cpu)->socket_id == cpu_data(j)->socket_id - && cpu_data(j)->core_id == csi.log1_cid - && cpu_data(j)->thread_id == csi.log1_tid) - cpumask_set_cpu(j, &this_leaf->shared_cpu_map); - - i++; - } while (i < num_shared && - ia64_pal_cache_shared_info(this_leaf->level, - this_leaf->type, - i, - &csi) == PAL_STATUS_SUCCESS); -} -#else -static void cache_shared_cpu_map_setup(unsigned int cpu, - struct cache_info * this_leaf) -{ - cpumask_set_cpu(cpu, &this_leaf->shared_cpu_map); - return; -} -#endif - -static ssize_t show_coherency_line_size(struct cache_info *this_leaf, - char *buf) -{ - return sprintf(buf, "%u\n", 1 << this_leaf->cci.pcci_line_size); -} - -static ssize_t show_ways_of_associativity(struct cache_info *this_leaf, - char *buf) -{ - return sprintf(buf, "%u\n", this_leaf->cci.pcci_assoc); -} - -static ssize_t show_attributes(struct cache_info *this_leaf, char *buf) -{ - return sprintf(buf, - "%s\n", - cache_mattrib[this_leaf->cci.pcci_cache_attr]); -} - -static ssize_t show_size(struct cache_info *this_leaf, char *buf) -{ - return sprintf(buf, "%uK\n", this_leaf->cci.pcci_cache_size / 1024); -} - -static ssize_t show_number_of_sets(struct cache_info *this_leaf, char *buf) -{ - unsigned number_of_sets = this_leaf->cci.pcci_cache_size; - number_of_sets /= this_leaf->cci.pcci_assoc; - number_of_sets /= 1 << this_leaf->cci.pcci_line_size; - - return sprintf(buf, "%u\n", number_of_sets); -} - -static ssize_t show_shared_cpu_map(struct cache_info *this_leaf, char *buf) -{ - cpumask_t shared_cpu_map; - - cpumask_and(&shared_cpu_map, - &this_leaf->shared_cpu_map, cpu_online_mask); - return scnprintf(buf, PAGE_SIZE, "%*pb\n", - cpumask_pr_args(&shared_cpu_map)); -} - -static ssize_t show_type(struct cache_info *this_leaf, char *buf) -{ - int type = this_leaf->type + this_leaf->cci.pcci_unified; - return sprintf(buf, "%s\n", cache_types[type]); -} - -static ssize_t show_level(struct cache_info *this_leaf, char *buf) -{ - return sprintf(buf, "%u\n", this_leaf->level); -} - -struct cache_attr { - struct attribute attr; - ssize_t (*show)(struct cache_info *, char *); - ssize_t (*store)(struct cache_info *, const char *, size_t count); -}; - -#ifdef define_one_ro - #undef define_one_ro -#endif -#define define_one_ro(_name) \ - static struct cache_attr _name = \ -__ATTR(_name, 0444, show_##_name, NULL) - -define_one_ro(level); -define_one_ro(type); -define_one_ro(coherency_line_size); -define_one_ro(ways_of_associativity); -define_one_ro(size); -define_one_ro(number_of_sets); -define_one_ro(shared_cpu_map); -define_one_ro(attributes); - -static struct attribute * cache_default_attrs[] = { - &type.attr, - &level.attr, - &coherency_line_size.attr, - &ways_of_associativity.attr, - &attributes.attr, - &size.attr, - &number_of_sets.attr, - &shared_cpu_map.attr, - NULL -}; -ATTRIBUTE_GROUPS(cache_default); - -#define to_object(k) container_of(k, struct cache_info, kobj) -#define to_attr(a) container_of(a, struct cache_attr, attr) - -static ssize_t ia64_cache_show(struct kobject * kobj, struct attribute * attr, char * buf) -{ - struct cache_attr *fattr = to_attr(attr); - struct cache_info *this_leaf = to_object(kobj); - ssize_t ret; - - ret = fattr->show ? fattr->show(this_leaf, buf) : 0; - return ret; -} - -static const struct sysfs_ops cache_sysfs_ops = { - .show = ia64_cache_show -}; - -static struct kobj_type cache_ktype = { - .sysfs_ops = &cache_sysfs_ops, - .default_groups = cache_default_groups, -}; - -static struct kobj_type cache_ktype_percpu_entry = { - .sysfs_ops = &cache_sysfs_ops, -}; - -static void cpu_cache_sysfs_exit(unsigned int cpu) -{ - kfree(all_cpu_cache_info[cpu].cache_leaves); - all_cpu_cache_info[cpu].cache_leaves = NULL; - all_cpu_cache_info[cpu].num_cache_leaves = 0; - memset(&all_cpu_cache_info[cpu].kobj, 0, sizeof(struct kobject)); - return; -} - -static int cpu_cache_sysfs_init(unsigned int cpu) -{ - unsigned long i, levels, unique_caches; - pal_cache_config_info_t cci; - int j; - long status; - struct cache_info *this_cache; - int num_cache_leaves = 0; - - if ((status = ia64_pal_cache_summary(&levels, &unique_caches)) != 0) { - printk(KERN_ERR "ia64_pal_cache_summary=%ld\n", status); - return -1; - } - - this_cache=kcalloc(unique_caches, sizeof(struct cache_info), - GFP_KERNEL); - if (this_cache == NULL) - return -ENOMEM; - - for (i=0; i < levels; i++) { - for (j=2; j >0 ; j--) { - if ((status=ia64_pal_cache_config_info(i,j, &cci)) != - PAL_STATUS_SUCCESS) - continue; - - this_cache[num_cache_leaves].cci = cci; - this_cache[num_cache_leaves].level = i + 1; - this_cache[num_cache_leaves].type = j; - - cache_shared_cpu_map_setup(cpu, - &this_cache[num_cache_leaves]); - num_cache_leaves ++; - } - } - - all_cpu_cache_info[cpu].cache_leaves = this_cache; - all_cpu_cache_info[cpu].num_cache_leaves = num_cache_leaves; - - memset(&all_cpu_cache_info[cpu].kobj, 0, sizeof(struct kobject)); - - return 0; -} - -/* Add cache interface for CPU device */ -static int cache_add_dev(unsigned int cpu) -{ - struct device *sys_dev = get_cpu_device(cpu); - unsigned long i, j; - struct cache_info *this_object; - int retval = 0; - - if (all_cpu_cache_info[cpu].kobj.parent) - return 0; - - - retval = cpu_cache_sysfs_init(cpu); - if (unlikely(retval < 0)) - return retval; - - retval = kobject_init_and_add(&all_cpu_cache_info[cpu].kobj, - &cache_ktype_percpu_entry, &sys_dev->kobj, - "%s", "cache"); - if (unlikely(retval < 0)) { - cpu_cache_sysfs_exit(cpu); - return retval; - } - - for (i = 0; i < all_cpu_cache_info[cpu].num_cache_leaves; i++) { - this_object = LEAF_KOBJECT_PTR(cpu,i); - retval = kobject_init_and_add(&(this_object->kobj), - &cache_ktype, - &all_cpu_cache_info[cpu].kobj, - "index%1lu", i); - if (unlikely(retval)) { - for (j = 0; j < i; j++) { - kobject_put(&(LEAF_KOBJECT_PTR(cpu,j)->kobj)); - } - kobject_put(&all_cpu_cache_info[cpu].kobj); - cpu_cache_sysfs_exit(cpu); - return retval; - } - kobject_uevent(&(this_object->kobj), KOBJ_ADD); - } - kobject_uevent(&all_cpu_cache_info[cpu].kobj, KOBJ_ADD); - return retval; -} - -/* Remove cache interface for CPU device */ -static int cache_remove_dev(unsigned int cpu) -{ - unsigned long i; - - for (i = 0; i < all_cpu_cache_info[cpu].num_cache_leaves; i++) - kobject_put(&(LEAF_KOBJECT_PTR(cpu,i)->kobj)); - - if (all_cpu_cache_info[cpu].kobj.parent) { - kobject_put(&all_cpu_cache_info[cpu].kobj); - memset(&all_cpu_cache_info[cpu].kobj, - 0, - sizeof(struct kobject)); - } - - cpu_cache_sysfs_exit(cpu); - - return 0; -} - -static int __init cache_sysfs_init(void) -{ - int ret; - - ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "ia64/topology:online", - cache_add_dev, cache_remove_dev); - WARN_ON(ret < 0); - return 0; -} -device_initcall(cache_sysfs_init); diff --git a/arch/ia64/kernel/traps.c b/arch/ia64/kernel/traps.c deleted file mode 100644 index 53735b1d1be3..000000000000 --- a/arch/ia64/kernel/traps.c +++ /dev/null @@ -1,612 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Architecture-specific trap handling. - * - * Copyright (C) 1998-2003 Hewlett-Packard Co - * David Mosberger-Tang - * - * 05/12/00 grao : added isr in siginfo for SIGFPE - */ - -#include -#include -#include -#include -#include -#include /* For unblank_screen() */ -#include -#include -#include -#include -#include /* for ssleep() */ -#include -#include - -#include -#include -#include -#include -#include - -fpswa_interface_t *fpswa_interface; -EXPORT_SYMBOL(fpswa_interface); - -void __init -trap_init (void) -{ - if (ia64_boot_param->fpswa) - /* FPSWA fixup: make the interface pointer a kernel virtual address: */ - fpswa_interface = __va(ia64_boot_param->fpswa); -} - -int -die (const char *str, struct pt_regs *regs, long err) -{ - static struct { - spinlock_t lock; - u32 lock_owner; - int lock_owner_depth; - } die = { - .lock = __SPIN_LOCK_UNLOCKED(die.lock), - .lock_owner = -1, - .lock_owner_depth = 0 - }; - static int die_counter; - int cpu = get_cpu(); - - if (die.lock_owner != cpu) { - console_verbose(); - spin_lock_irq(&die.lock); - die.lock_owner = cpu; - die.lock_owner_depth = 0; - bust_spinlocks(1); - } - put_cpu(); - - if (++die.lock_owner_depth < 3) { - printk("%s[%d]: %s %ld [%d]\n", - current->comm, task_pid_nr(current), str, err, ++die_counter); - if (notify_die(DIE_OOPS, str, regs, err, 255, SIGSEGV) - != NOTIFY_STOP) - show_regs(regs); - else - regs = NULL; - } else - printk(KERN_ERR "Recursive die() failure, output suppressed\n"); - - bust_spinlocks(0); - die.lock_owner = -1; - add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE); - spin_unlock_irq(&die.lock); - - if (!regs) - return 1; - - if (panic_on_oops) - panic("Fatal exception"); - - make_task_dead(SIGSEGV); - return 0; -} - -int -die_if_kernel (char *str, struct pt_regs *regs, long err) -{ - if (!user_mode(regs)) - return die(str, regs, err); - return 0; -} - -void -__kprobes ia64_bad_break (unsigned long break_num, struct pt_regs *regs) -{ - int sig, code; - - switch (break_num) { - case 0: /* unknown error (used by GCC for __builtin_abort()) */ - if (notify_die(DIE_BREAK, "break 0", regs, break_num, TRAP_BRKPT, SIGTRAP) - == NOTIFY_STOP) - return; - if (die_if_kernel("bugcheck!", regs, break_num)) - return; - sig = SIGILL; code = ILL_ILLOPC; - break; - - case 1: /* integer divide by zero */ - sig = SIGFPE; code = FPE_INTDIV; - break; - - case 2: /* integer overflow */ - sig = SIGFPE; code = FPE_INTOVF; - break; - - case 3: /* range check/bounds check */ - sig = SIGFPE; code = FPE_FLTSUB; - break; - - case 4: /* null pointer dereference */ - sig = SIGSEGV; code = SEGV_MAPERR; - break; - - case 5: /* misaligned data */ - sig = SIGSEGV; code = BUS_ADRALN; - break; - - case 6: /* decimal overflow */ - sig = SIGFPE; code = __FPE_DECOVF; - break; - - case 7: /* decimal divide by zero */ - sig = SIGFPE; code = __FPE_DECDIV; - break; - - case 8: /* packed decimal error */ - sig = SIGFPE; code = __FPE_DECERR; - break; - - case 9: /* invalid ASCII digit */ - sig = SIGFPE; code = __FPE_INVASC; - break; - - case 10: /* invalid decimal digit */ - sig = SIGFPE; code = __FPE_INVDEC; - break; - - case 11: /* paragraph stack overflow */ - sig = SIGSEGV; code = __SEGV_PSTKOVF; - break; - - case 0x3f000 ... 0x3ffff: /* bundle-update in progress */ - sig = SIGILL; code = __ILL_BNDMOD; - break; - - default: - if ((break_num < 0x40000 || break_num > 0x100000) - && die_if_kernel("Bad break", regs, break_num)) - return; - - if (break_num < 0x80000) { - sig = SIGILL; code = __ILL_BREAK; - } else { - if (notify_die(DIE_BREAK, "bad break", regs, break_num, TRAP_BRKPT, SIGTRAP) - == NOTIFY_STOP) - return; - sig = SIGTRAP; code = TRAP_BRKPT; - } - } - force_sig_fault(sig, code, - (void __user *) (regs->cr_iip + ia64_psr(regs)->ri), - break_num, 0 /* clear __ISR_VALID */, 0); -} - -/* - * disabled_fph_fault() is called when a user-level process attempts to access f32..f127 - * and it doesn't own the fp-high register partition. When this happens, we save the - * current fph partition in the task_struct of the fpu-owner (if necessary) and then load - * the fp-high partition of the current task (if necessary). Note that the kernel has - * access to fph by the time we get here, as the IVT's "Disabled FP-Register" handler takes - * care of clearing psr.dfh. - */ -static inline void -disabled_fph_fault (struct pt_regs *regs) -{ - struct ia64_psr *psr = ia64_psr(regs); - - /* first, grant user-level access to fph partition: */ - psr->dfh = 0; - - /* - * Make sure that no other task gets in on this processor - * while we're claiming the FPU - */ - preempt_disable(); -#ifndef CONFIG_SMP - { - struct task_struct *fpu_owner - = (struct task_struct *)ia64_get_kr(IA64_KR_FPU_OWNER); - - if (ia64_is_local_fpu_owner(current)) { - preempt_enable_no_resched(); - return; - } - - if (fpu_owner) - ia64_flush_fph(fpu_owner); - } -#endif /* !CONFIG_SMP */ - ia64_set_local_fpu_owner(current); - if ((current->thread.flags & IA64_THREAD_FPH_VALID) != 0) { - __ia64_load_fpu(current->thread.fph); - psr->mfh = 0; - } else { - __ia64_init_fpu(); - /* - * Set mfh because the state in thread.fph does not match the state in - * the fph partition. - */ - psr->mfh = 1; - } - preempt_enable_no_resched(); -} - -static inline int -fp_emulate (int fp_fault, void *bundle, long *ipsr, long *fpsr, long *isr, long *pr, long *ifs, - struct pt_regs *regs) -{ - fp_state_t fp_state; - fpswa_ret_t ret; - - if (!fpswa_interface) - return -1; - - memset(&fp_state, 0, sizeof(fp_state_t)); - - /* - * compute fp_state. only FP registers f6 - f11 are used by the - * kernel, so set those bits in the mask and set the low volatile - * pointer to point to these registers. - */ - fp_state.bitmask_low64 = 0xfc0; /* bit6..bit11 */ - - fp_state.fp_state_low_volatile = (fp_state_low_volatile_t *) ®s->f6; - /* - * unsigned long (*EFI_FPSWA) ( - * unsigned long trap_type, - * void *Bundle, - * unsigned long *pipsr, - * unsigned long *pfsr, - * unsigned long *pisr, - * unsigned long *ppreds, - * unsigned long *pifs, - * void *fp_state); - */ - ret = (*fpswa_interface->fpswa)((unsigned long) fp_fault, bundle, - (unsigned long *) ipsr, (unsigned long *) fpsr, - (unsigned long *) isr, (unsigned long *) pr, - (unsigned long *) ifs, &fp_state); - - return ret.status; -} - -struct fpu_swa_msg { - unsigned long count; - unsigned long time; -}; -static DEFINE_PER_CPU(struct fpu_swa_msg, cpulast); -DECLARE_PER_CPU(struct fpu_swa_msg, cpulast); -static struct fpu_swa_msg last __cacheline_aligned; - - -/* - * Handle floating-point assist faults and traps. - */ -static int -handle_fpu_swa (int fp_fault, struct pt_regs *regs, unsigned long isr) -{ - long exception, bundle[2]; - unsigned long fault_ip; - - fault_ip = regs->cr_iip; - if (!fp_fault && (ia64_psr(regs)->ri == 0)) - fault_ip -= 16; - if (copy_from_user(bundle, (void __user *) fault_ip, sizeof(bundle))) - return -1; - - if (!(current->thread.flags & IA64_THREAD_FPEMU_NOPRINT)) { - unsigned long count, current_jiffies = jiffies; - struct fpu_swa_msg *cp = this_cpu_ptr(&cpulast); - - if (unlikely(current_jiffies > cp->time)) - cp->count = 0; - if (unlikely(cp->count < 5)) { - cp->count++; - cp->time = current_jiffies + 5 * HZ; - - /* minimize races by grabbing a copy of count BEFORE checking last.time. */ - count = last.count; - barrier(); - - /* - * Lower 4 bits are used as a count. Upper bits are a sequence - * number that is updated when count is reset. The cmpxchg will - * fail is seqno has changed. This minimizes multiple cpus - * resetting the count. - */ - if (current_jiffies > last.time) - (void) cmpxchg_acq(&last.count, count, 16 + (count & ~15)); - - /* used fetchadd to atomically update the count */ - if ((last.count & 15) < 5 && (ia64_fetchadd(1, &last.count, acq) & 15) < 5) { - last.time = current_jiffies + 5 * HZ; - printk(KERN_WARNING - "%s(%d): floating-point assist fault at ip %016lx, isr %016lx\n", - current->comm, task_pid_nr(current), regs->cr_iip + ia64_psr(regs)->ri, isr); - } - } - } - - exception = fp_emulate(fp_fault, bundle, ®s->cr_ipsr, ®s->ar_fpsr, &isr, ®s->pr, - ®s->cr_ifs, regs); - if (fp_fault) { - if (exception == 0) { - /* emulation was successful */ - ia64_increment_ip(regs); - } else if (exception == -1) { - printk(KERN_ERR "handle_fpu_swa: fp_emulate() returned -1\n"); - return -1; - } else { - /* is next instruction a trap? */ - int si_code; - - if (exception & 2) { - ia64_increment_ip(regs); - } - si_code = FPE_FLTUNK; /* default code */ - if (isr & 0x11) { - si_code = FPE_FLTINV; - } else if (isr & 0x22) { - /* denormal operand gets the same si_code as underflow - * see arch/i386/kernel/traps.c:math_error() */ - si_code = FPE_FLTUND; - } else if (isr & 0x44) { - si_code = FPE_FLTDIV; - } - force_sig_fault(SIGFPE, si_code, - (void __user *) (regs->cr_iip + ia64_psr(regs)->ri), - 0, __ISR_VALID, isr); - } - } else { - if (exception == -1) { - printk(KERN_ERR "handle_fpu_swa: fp_emulate() returned -1\n"); - return -1; - } else if (exception != 0) { - /* raise exception */ - int si_code; - - si_code = FPE_FLTUNK; /* default code */ - if (isr & 0x880) { - si_code = FPE_FLTOVF; - } else if (isr & 0x1100) { - si_code = FPE_FLTUND; - } else if (isr & 0x2200) { - si_code = FPE_FLTRES; - } - force_sig_fault(SIGFPE, si_code, - (void __user *) (regs->cr_iip + ia64_psr(regs)->ri), - 0, __ISR_VALID, isr); - } - } - return 0; -} - -struct illegal_op_return { - unsigned long fkt, arg1, arg2, arg3; -}; - -struct illegal_op_return -ia64_illegal_op_fault (unsigned long ec, long arg1, long arg2, long arg3, - long arg4, long arg5, long arg6, long arg7, - struct pt_regs regs) -{ - struct illegal_op_return rv; - char buf[128]; - -#ifdef CONFIG_IA64_BRL_EMU - { - extern struct illegal_op_return ia64_emulate_brl (struct pt_regs *, unsigned long); - - rv = ia64_emulate_brl(®s, ec); - if (rv.fkt != (unsigned long) -1) - return rv; - } -#endif - - sprintf(buf, "IA-64 Illegal operation fault"); - rv.fkt = 0; - if (die_if_kernel(buf, ®s, 0)) - return rv; - - force_sig_fault(SIGILL, ILL_ILLOPC, - (void __user *) (regs.cr_iip + ia64_psr(®s)->ri), - 0, 0, 0); - return rv; -} - -void __kprobes -ia64_fault (unsigned long vector, unsigned long isr, unsigned long ifa, - unsigned long iim, unsigned long itir, long arg5, long arg6, - long arg7, struct pt_regs regs) -{ - unsigned long code, error = isr, iip; - char buf[128]; - int result, sig, si_code; - static const char *reason[] = { - "IA-64 Illegal Operation fault", - "IA-64 Privileged Operation fault", - "IA-64 Privileged Register fault", - "IA-64 Reserved Register/Field fault", - "Disabled Instruction Set Transition fault", - "Unknown fault 5", "Unknown fault 6", "Unknown fault 7", "Illegal Hazard fault", - "Unknown fault 9", "Unknown fault 10", "Unknown fault 11", "Unknown fault 12", - "Unknown fault 13", "Unknown fault 14", "Unknown fault 15" - }; - - if ((isr & IA64_ISR_NA) && ((isr & IA64_ISR_CODE_MASK) == IA64_ISR_CODE_LFETCH)) { - /* - * This fault was due to lfetch.fault, set "ed" bit in the psr to cancel - * the lfetch. - */ - ia64_psr(®s)->ed = 1; - return; - } - - iip = regs.cr_iip + ia64_psr(®s)->ri; - - switch (vector) { - case 24: /* General Exception */ - code = (isr >> 4) & 0xf; - sprintf(buf, "General Exception: %s%s", reason[code], - (code == 3) ? ((isr & (1UL << 37)) - ? " (RSE access)" : " (data access)") : ""); - if (code == 8) { -# ifdef CONFIG_IA64_PRINT_HAZARDS - printk("%s[%d]: possible hazard @ ip=%016lx (pr = %016lx)\n", - current->comm, task_pid_nr(current), - regs.cr_iip + ia64_psr(®s)->ri, regs.pr); -# endif - return; - } - break; - - case 25: /* Disabled FP-Register */ - if (isr & 2) { - disabled_fph_fault(®s); - return; - } - sprintf(buf, "Disabled FPL fault---not supposed to happen!"); - break; - - case 26: /* NaT Consumption */ - if (user_mode(®s)) { - void __user *addr; - - if (((isr >> 4) & 0xf) == 2) { - /* NaT page consumption */ - sig = SIGSEGV; - code = SEGV_ACCERR; - addr = (void __user *) ifa; - } else { - /* register NaT consumption */ - sig = SIGILL; - code = ILL_ILLOPN; - addr = (void __user *) (regs.cr_iip - + ia64_psr(®s)->ri); - } - force_sig_fault(sig, code, addr, - vector, __ISR_VALID, isr); - return; - } else if (ia64_done_with_exception(®s)) - return; - sprintf(buf, "NaT consumption"); - break; - - case 31: /* Unsupported Data Reference */ - if (user_mode(®s)) { - force_sig_fault(SIGILL, ILL_ILLOPN, (void __user *) iip, - vector, __ISR_VALID, isr); - return; - } - sprintf(buf, "Unsupported data reference"); - break; - - case 29: /* Debug */ - case 35: /* Taken Branch Trap */ - case 36: /* Single Step Trap */ - if (fsys_mode(current, ®s)) { - extern char __kernel_syscall_via_break[]; - /* - * Got a trap in fsys-mode: Taken Branch Trap - * and Single Step trap need special handling; - * Debug trap is ignored (we disable it here - * and re-enable it in the lower-privilege trap). - */ - if (unlikely(vector == 29)) { - set_thread_flag(TIF_DB_DISABLED); - ia64_psr(®s)->db = 0; - ia64_psr(®s)->lp = 1; - return; - } - /* re-do the system call via break 0x100000: */ - regs.cr_iip = (unsigned long) __kernel_syscall_via_break; - ia64_psr(®s)->ri = 0; - ia64_psr(®s)->cpl = 3; - return; - } - switch (vector) { - default: - case 29: - si_code = TRAP_HWBKPT; -#ifdef CONFIG_ITANIUM - /* - * Erratum 10 (IFA may contain incorrect address) now has - * "NoFix" status. There are no plans for fixing this. - */ - if (ia64_psr(®s)->is == 0) - ifa = regs.cr_iip; -#endif - break; - case 35: si_code = TRAP_BRANCH; ifa = 0; break; - case 36: si_code = TRAP_TRACE; ifa = 0; break; - } - if (notify_die(DIE_FAULT, "ia64_fault", ®s, vector, si_code, SIGTRAP) - == NOTIFY_STOP) - return; - force_sig_fault(SIGTRAP, si_code, (void __user *) ifa, - 0, __ISR_VALID, isr); - return; - - case 32: /* fp fault */ - case 33: /* fp trap */ - result = handle_fpu_swa((vector == 32) ? 1 : 0, ®s, isr); - if ((result < 0) || (current->thread.flags & IA64_THREAD_FPEMU_SIGFPE)) { - force_sig_fault(SIGFPE, FPE_FLTINV, (void __user *) iip, - 0, __ISR_VALID, isr); - } - return; - - case 34: - if (isr & 0x2) { - /* Lower-Privilege Transfer Trap */ - - /* If we disabled debug traps during an fsyscall, - * re-enable them here. - */ - if (test_thread_flag(TIF_DB_DISABLED)) { - clear_thread_flag(TIF_DB_DISABLED); - ia64_psr(®s)->db = 1; - } - - /* - * Just clear PSR.lp and then return immediately: - * all the interesting work (e.g., signal delivery) - * is done in the kernel exit path. - */ - ia64_psr(®s)->lp = 0; - return; - } else { - /* Unimplemented Instr. Address Trap */ - if (user_mode(®s)) { - force_sig_fault(SIGILL, ILL_BADIADDR, - (void __user *) iip, - 0, 0, 0); - return; - } - sprintf(buf, "Unimplemented Instruction Address fault"); - } - break; - - case 45: - printk(KERN_ERR "Unexpected IA-32 exception (Trap 45)\n"); - printk(KERN_ERR " iip - 0x%lx, ifa - 0x%lx, isr - 0x%lx\n", - iip, ifa, isr); - force_sig(SIGSEGV); - return; - - case 46: - printk(KERN_ERR "Unexpected IA-32 intercept trap (Trap 46)\n"); - printk(KERN_ERR " iip - 0x%lx, ifa - 0x%lx, isr - 0x%lx, iim - 0x%lx\n", - iip, ifa, isr, iim); - force_sig(SIGSEGV); - return; - - case 47: - sprintf(buf, "IA-32 Interruption Fault (int 0x%lx)", isr >> 16); - break; - - default: - sprintf(buf, "Fault %lu", vector); - break; - } - if (!die_if_kernel(buf, ®s, error)) - force_sig(SIGILL); -} diff --git a/arch/ia64/kernel/unaligned.c b/arch/ia64/kernel/unaligned.c deleted file mode 100644 index 0acb5a0cd7ab..000000000000 --- a/arch/ia64/kernel/unaligned.c +++ /dev/null @@ -1,1560 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Architecture-specific unaligned trap handling. - * - * Copyright (C) 1999-2002, 2004 Hewlett-Packard Co - * Stephane Eranian - * David Mosberger-Tang - * - * 2002/12/09 Fix rotating register handling (off-by-1 error, missing fr-rotation). Fix - * get_rse_reg() to not leak kernel bits to user-level (reading an out-of-frame - * stacked register returns an undefined value; it does NOT trigger a - * "rsvd register fault"). - * 2001/10/11 Fix unaligned access to rotating registers in s/w pipelined loops. - * 2001/08/13 Correct size of extended floats (float_fsz) from 16 to 10 bytes. - * 2001/01/17 Add support emulation of unaligned kernel accesses. - */ -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include - -extern int die_if_kernel(char *str, struct pt_regs *regs, long err); - -#undef DEBUG_UNALIGNED_TRAP - -#ifdef DEBUG_UNALIGNED_TRAP -# define DPRINT(a...) do { printk("%s %u: ", __func__, __LINE__); printk (a); } while (0) -# define DDUMP(str,vp,len) dump(str, vp, len) - -static void -dump (const char *str, void *vp, size_t len) -{ - unsigned char *cp = vp; - int i; - - printk("%s", str); - for (i = 0; i < len; ++i) - printk (" %02x", *cp++); - printk("\n"); -} -#else -# define DPRINT(a...) -# define DDUMP(str,vp,len) -#endif - -#define IA64_FIRST_STACKED_GR 32 -#define IA64_FIRST_ROTATING_FR 32 -#define SIGN_EXT9 0xffffffffffffff00ul - -/* - * sysctl settable hook which tells the kernel whether to honor the - * IA64_THREAD_UAC_NOPRINT prctl. Because this is user settable, we want - * to allow the super user to enable/disable this for security reasons - * (i.e. don't allow attacker to fill up logs with unaligned accesses). - */ -int no_unaligned_warning; -int unaligned_dump_stack; - -/* - * For M-unit: - * - * opcode | m | x6 | - * --------|------|---------| - * [40-37] | [36] | [35:30] | - * --------|------|---------| - * 4 | 1 | 6 | = 11 bits - * -------------------------- - * However bits [31:30] are not directly useful to distinguish between - * load/store so we can use [35:32] instead, which gives the following - * mask ([40:32]) using 9 bits. The 'e' comes from the fact that we defer - * checking the m-bit until later in the load/store emulation. - */ -#define IA64_OPCODE_MASK 0x1ef -#define IA64_OPCODE_SHIFT 32 - -/* - * Table C-28 Integer Load/Store - * - * We ignore [35:32]= 0x6, 0x7, 0xE, 0xF - * - * ld8.fill, st8.fill MUST be aligned because the RNATs are based on - * the address (bits [8:3]), so we must failed. - */ -#define LD_OP 0x080 -#define LDS_OP 0x081 -#define LDA_OP 0x082 -#define LDSA_OP 0x083 -#define LDBIAS_OP 0x084 -#define LDACQ_OP 0x085 -/* 0x086, 0x087 are not relevant */ -#define LDCCLR_OP 0x088 -#define LDCNC_OP 0x089 -#define LDCCLRACQ_OP 0x08a -#define ST_OP 0x08c -#define STREL_OP 0x08d -/* 0x08e,0x8f are not relevant */ - -/* - * Table C-29 Integer Load +Reg - * - * we use the ld->m (bit [36:36]) field to determine whether or not we have - * a load/store of this form. - */ - -/* - * Table C-30 Integer Load/Store +Imm - * - * We ignore [35:32]= 0x6, 0x7, 0xE, 0xF - * - * ld8.fill, st8.fill must be aligned because the Nat register are based on - * the address, so we must fail and the program must be fixed. - */ -#define LD_IMM_OP 0x0a0 -#define LDS_IMM_OP 0x0a1 -#define LDA_IMM_OP 0x0a2 -#define LDSA_IMM_OP 0x0a3 -#define LDBIAS_IMM_OP 0x0a4 -#define LDACQ_IMM_OP 0x0a5 -/* 0x0a6, 0xa7 are not relevant */ -#define LDCCLR_IMM_OP 0x0a8 -#define LDCNC_IMM_OP 0x0a9 -#define LDCCLRACQ_IMM_OP 0x0aa -#define ST_IMM_OP 0x0ac -#define STREL_IMM_OP 0x0ad -/* 0x0ae,0xaf are not relevant */ - -/* - * Table C-32 Floating-point Load/Store - */ -#define LDF_OP 0x0c0 -#define LDFS_OP 0x0c1 -#define LDFA_OP 0x0c2 -#define LDFSA_OP 0x0c3 -/* 0x0c6 is irrelevant */ -#define LDFCCLR_OP 0x0c8 -#define LDFCNC_OP 0x0c9 -/* 0x0cb is irrelevant */ -#define STF_OP 0x0cc - -/* - * Table C-33 Floating-point Load +Reg - * - * we use the ld->m (bit [36:36]) field to determine whether or not we have - * a load/store of this form. - */ - -/* - * Table C-34 Floating-point Load/Store +Imm - */ -#define LDF_IMM_OP 0x0e0 -#define LDFS_IMM_OP 0x0e1 -#define LDFA_IMM_OP 0x0e2 -#define LDFSA_IMM_OP 0x0e3 -/* 0x0e6 is irrelevant */ -#define LDFCCLR_IMM_OP 0x0e8 -#define LDFCNC_IMM_OP 0x0e9 -#define STF_IMM_OP 0x0ec - -typedef struct { - unsigned long qp:6; /* [0:5] */ - unsigned long r1:7; /* [6:12] */ - unsigned long imm:7; /* [13:19] */ - unsigned long r3:7; /* [20:26] */ - unsigned long x:1; /* [27:27] */ - unsigned long hint:2; /* [28:29] */ - unsigned long x6_sz:2; /* [30:31] */ - unsigned long x6_op:4; /* [32:35], x6 = x6_sz|x6_op */ - unsigned long m:1; /* [36:36] */ - unsigned long op:4; /* [37:40] */ - unsigned long pad:23; /* [41:63] */ -} load_store_t; - - -typedef enum { - UPD_IMMEDIATE, /* ldXZ r1=[r3],imm(9) */ - UPD_REG /* ldXZ r1=[r3],r2 */ -} update_t; - -/* - * We use tables to keep track of the offsets of registers in the saved state. - * This way we save having big switch/case statements. - * - * We use bit 0 to indicate switch_stack or pt_regs. - * The offset is simply shifted by 1 bit. - * A 2-byte value should be enough to hold any kind of offset - * - * In case the calling convention changes (and thus pt_regs/switch_stack) - * simply use RSW instead of RPT or vice-versa. - */ - -#define RPO(x) ((size_t) &((struct pt_regs *)0)->x) -#define RSO(x) ((size_t) &((struct switch_stack *)0)->x) - -#define RPT(x) (RPO(x) << 1) -#define RSW(x) (1| RSO(x)<<1) - -#define GR_OFFS(x) (gr_info[x]>>1) -#define GR_IN_SW(x) (gr_info[x] & 0x1) - -#define FR_OFFS(x) (fr_info[x]>>1) -#define FR_IN_SW(x) (fr_info[x] & 0x1) - -static u16 gr_info[32]={ - 0, /* r0 is read-only : WE SHOULD NEVER GET THIS */ - - RPT(r1), RPT(r2), RPT(r3), - - RSW(r4), RSW(r5), RSW(r6), RSW(r7), - - RPT(r8), RPT(r9), RPT(r10), RPT(r11), - RPT(r12), RPT(r13), RPT(r14), RPT(r15), - - RPT(r16), RPT(r17), RPT(r18), RPT(r19), - RPT(r20), RPT(r21), RPT(r22), RPT(r23), - RPT(r24), RPT(r25), RPT(r26), RPT(r27), - RPT(r28), RPT(r29), RPT(r30), RPT(r31) -}; - -static u16 fr_info[32]={ - 0, /* constant : WE SHOULD NEVER GET THIS */ - 0, /* constant : WE SHOULD NEVER GET THIS */ - - RSW(f2), RSW(f3), RSW(f4), RSW(f5), - - RPT(f6), RPT(f7), RPT(f8), RPT(f9), - RPT(f10), RPT(f11), - - RSW(f12), RSW(f13), RSW(f14), - RSW(f15), RSW(f16), RSW(f17), RSW(f18), RSW(f19), - RSW(f20), RSW(f21), RSW(f22), RSW(f23), RSW(f24), - RSW(f25), RSW(f26), RSW(f27), RSW(f28), RSW(f29), - RSW(f30), RSW(f31) -}; - -/* Invalidate ALAT entry for integer register REGNO. */ -static void -invala_gr (int regno) -{ -# define F(reg) case reg: ia64_invala_gr(reg); break - - switch (regno) { - F( 0); F( 1); F( 2); F( 3); F( 4); F( 5); F( 6); F( 7); - F( 8); F( 9); F( 10); F( 11); F( 12); F( 13); F( 14); F( 15); - F( 16); F( 17); F( 18); F( 19); F( 20); F( 21); F( 22); F( 23); - F( 24); F( 25); F( 26); F( 27); F( 28); F( 29); F( 30); F( 31); - F( 32); F( 33); F( 34); F( 35); F( 36); F( 37); F( 38); F( 39); - F( 40); F( 41); F( 42); F( 43); F( 44); F( 45); F( 46); F( 47); - F( 48); F( 49); F( 50); F( 51); F( 52); F( 53); F( 54); F( 55); - F( 56); F( 57); F( 58); F( 59); F( 60); F( 61); F( 62); F( 63); - F( 64); F( 65); F( 66); F( 67); F( 68); F( 69); F( 70); F( 71); - F( 72); F( 73); F( 74); F( 75); F( 76); F( 77); F( 78); F( 79); - F( 80); F( 81); F( 82); F( 83); F( 84); F( 85); F( 86); F( 87); - F( 88); F( 89); F( 90); F( 91); F( 92); F( 93); F( 94); F( 95); - F( 96); F( 97); F( 98); F( 99); F(100); F(101); F(102); F(103); - F(104); F(105); F(106); F(107); F(108); F(109); F(110); F(111); - F(112); F(113); F(114); F(115); F(116); F(117); F(118); F(119); - F(120); F(121); F(122); F(123); F(124); F(125); F(126); F(127); - } -# undef F -} - -/* Invalidate ALAT entry for floating-point register REGNO. */ -static void -invala_fr (int regno) -{ -# define F(reg) case reg: ia64_invala_fr(reg); break - - switch (regno) { - F( 0); F( 1); F( 2); F( 3); F( 4); F( 5); F( 6); F( 7); - F( 8); F( 9); F( 10); F( 11); F( 12); F( 13); F( 14); F( 15); - F( 16); F( 17); F( 18); F( 19); F( 20); F( 21); F( 22); F( 23); - F( 24); F( 25); F( 26); F( 27); F( 28); F( 29); F( 30); F( 31); - F( 32); F( 33); F( 34); F( 35); F( 36); F( 37); F( 38); F( 39); - F( 40); F( 41); F( 42); F( 43); F( 44); F( 45); F( 46); F( 47); - F( 48); F( 49); F( 50); F( 51); F( 52); F( 53); F( 54); F( 55); - F( 56); F( 57); F( 58); F( 59); F( 60); F( 61); F( 62); F( 63); - F( 64); F( 65); F( 66); F( 67); F( 68); F( 69); F( 70); F( 71); - F( 72); F( 73); F( 74); F( 75); F( 76); F( 77); F( 78); F( 79); - F( 80); F( 81); F( 82); F( 83); F( 84); F( 85); F( 86); F( 87); - F( 88); F( 89); F( 90); F( 91); F( 92); F( 93); F( 94); F( 95); - F( 96); F( 97); F( 98); F( 99); F(100); F(101); F(102); F(103); - F(104); F(105); F(106); F(107); F(108); F(109); F(110); F(111); - F(112); F(113); F(114); F(115); F(116); F(117); F(118); F(119); - F(120); F(121); F(122); F(123); F(124); F(125); F(126); F(127); - } -# undef F -} - -static inline unsigned long -rotate_reg (unsigned long sor, unsigned long rrb, unsigned long reg) -{ - reg += rrb; - if (reg >= sor) - reg -= sor; - return reg; -} - -static void -set_rse_reg (struct pt_regs *regs, unsigned long r1, unsigned long val, int nat) -{ - struct switch_stack *sw = (struct switch_stack *) regs - 1; - unsigned long *bsp, *bspstore, *addr, *rnat_addr, *ubs_end; - unsigned long *kbs = (void *) current + IA64_RBS_OFFSET; - unsigned long rnats, nat_mask; - unsigned long on_kbs; - long sof = (regs->cr_ifs) & 0x7f; - long sor = 8 * ((regs->cr_ifs >> 14) & 0xf); - long rrb_gr = (regs->cr_ifs >> 18) & 0x7f; - long ridx = r1 - 32; - - if (ridx >= sof) { - /* this should never happen, as the "rsvd register fault" has higher priority */ - DPRINT("ignoring write to r%lu; only %lu registers are allocated!\n", r1, sof); - return; - } - - if (ridx < sor) - ridx = rotate_reg(sor, rrb_gr, ridx); - - DPRINT("r%lu, sw.bspstore=%lx pt.bspstore=%lx sof=%ld sol=%ld ridx=%ld\n", - r1, sw->ar_bspstore, regs->ar_bspstore, sof, (regs->cr_ifs >> 7) & 0x7f, ridx); - - on_kbs = ia64_rse_num_regs(kbs, (unsigned long *) sw->ar_bspstore); - addr = ia64_rse_skip_regs((unsigned long *) sw->ar_bspstore, -sof + ridx); - if (addr >= kbs) { - /* the register is on the kernel backing store: easy... */ - rnat_addr = ia64_rse_rnat_addr(addr); - if ((unsigned long) rnat_addr >= sw->ar_bspstore) - rnat_addr = &sw->ar_rnat; - nat_mask = 1UL << ia64_rse_slot_num(addr); - - *addr = val; - if (nat) - *rnat_addr |= nat_mask; - else - *rnat_addr &= ~nat_mask; - return; - } - - if (!user_stack(current, regs)) { - DPRINT("ignoring kernel write to r%lu; register isn't on the kernel RBS!", r1); - return; - } - - bspstore = (unsigned long *)regs->ar_bspstore; - ubs_end = ia64_rse_skip_regs(bspstore, on_kbs); - bsp = ia64_rse_skip_regs(ubs_end, -sof); - addr = ia64_rse_skip_regs(bsp, ridx); - - DPRINT("ubs_end=%p bsp=%p addr=%p\n", (void *) ubs_end, (void *) bsp, (void *) addr); - - ia64_poke(current, sw, (unsigned long) ubs_end, (unsigned long) addr, val); - - rnat_addr = ia64_rse_rnat_addr(addr); - - ia64_peek(current, sw, (unsigned long) ubs_end, (unsigned long) rnat_addr, &rnats); - DPRINT("rnat @%p = 0x%lx nat=%d old nat=%ld\n", - (void *) rnat_addr, rnats, nat, (rnats >> ia64_rse_slot_num(addr)) & 1); - - nat_mask = 1UL << ia64_rse_slot_num(addr); - if (nat) - rnats |= nat_mask; - else - rnats &= ~nat_mask; - ia64_poke(current, sw, (unsigned long) ubs_end, (unsigned long) rnat_addr, rnats); - - DPRINT("rnat changed to @%p = 0x%lx\n", (void *) rnat_addr, rnats); -} - - -static void -get_rse_reg (struct pt_regs *regs, unsigned long r1, unsigned long *val, int *nat) -{ - struct switch_stack *sw = (struct switch_stack *) regs - 1; - unsigned long *bsp, *addr, *rnat_addr, *ubs_end, *bspstore; - unsigned long *kbs = (void *) current + IA64_RBS_OFFSET; - unsigned long rnats, nat_mask; - unsigned long on_kbs; - long sof = (regs->cr_ifs) & 0x7f; - long sor = 8 * ((regs->cr_ifs >> 14) & 0xf); - long rrb_gr = (regs->cr_ifs >> 18) & 0x7f; - long ridx = r1 - 32; - - if (ridx >= sof) { - /* read of out-of-frame register returns an undefined value; 0 in our case. */ - DPRINT("ignoring read from r%lu; only %lu registers are allocated!\n", r1, sof); - goto fail; - } - - if (ridx < sor) - ridx = rotate_reg(sor, rrb_gr, ridx); - - DPRINT("r%lu, sw.bspstore=%lx pt.bspstore=%lx sof=%ld sol=%ld ridx=%ld\n", - r1, sw->ar_bspstore, regs->ar_bspstore, sof, (regs->cr_ifs >> 7) & 0x7f, ridx); - - on_kbs = ia64_rse_num_regs(kbs, (unsigned long *) sw->ar_bspstore); - addr = ia64_rse_skip_regs((unsigned long *) sw->ar_bspstore, -sof + ridx); - if (addr >= kbs) { - /* the register is on the kernel backing store: easy... */ - *val = *addr; - if (nat) { - rnat_addr = ia64_rse_rnat_addr(addr); - if ((unsigned long) rnat_addr >= sw->ar_bspstore) - rnat_addr = &sw->ar_rnat; - nat_mask = 1UL << ia64_rse_slot_num(addr); - *nat = (*rnat_addr & nat_mask) != 0; - } - return; - } - - if (!user_stack(current, regs)) { - DPRINT("ignoring kernel read of r%lu; register isn't on the RBS!", r1); - goto fail; - } - - bspstore = (unsigned long *)regs->ar_bspstore; - ubs_end = ia64_rse_skip_regs(bspstore, on_kbs); - bsp = ia64_rse_skip_regs(ubs_end, -sof); - addr = ia64_rse_skip_regs(bsp, ridx); - - DPRINT("ubs_end=%p bsp=%p addr=%p\n", (void *) ubs_end, (void *) bsp, (void *) addr); - - ia64_peek(current, sw, (unsigned long) ubs_end, (unsigned long) addr, val); - - if (nat) { - rnat_addr = ia64_rse_rnat_addr(addr); - nat_mask = 1UL << ia64_rse_slot_num(addr); - - DPRINT("rnat @%p = 0x%lx\n", (void *) rnat_addr, rnats); - - ia64_peek(current, sw, (unsigned long) ubs_end, (unsigned long) rnat_addr, &rnats); - *nat = (rnats & nat_mask) != 0; - } - return; - - fail: - *val = 0; - if (nat) - *nat = 0; - return; -} - - -static void -setreg (unsigned long regnum, unsigned long val, int nat, struct pt_regs *regs) -{ - struct switch_stack *sw = (struct switch_stack *) regs - 1; - unsigned long addr; - unsigned long bitmask; - unsigned long *unat; - - /* - * First takes care of stacked registers - */ - if (regnum >= IA64_FIRST_STACKED_GR) { - set_rse_reg(regs, regnum, val, nat); - return; - } - - /* - * Using r0 as a target raises a General Exception fault which has higher priority - * than the Unaligned Reference fault. - */ - - /* - * Now look at registers in [0-31] range and init correct UNAT - */ - if (GR_IN_SW(regnum)) { - addr = (unsigned long)sw; - unat = &sw->ar_unat; - } else { - addr = (unsigned long)regs; - unat = &sw->caller_unat; - } - DPRINT("tmp_base=%lx switch_stack=%s offset=%d\n", - addr, unat==&sw->ar_unat ? "yes":"no", GR_OFFS(regnum)); - /* - * add offset from base of struct - * and do it ! - */ - addr += GR_OFFS(regnum); - - *(unsigned long *)addr = val; - - /* - * We need to clear the corresponding UNAT bit to fully emulate the load - * UNAT bit_pos = GR[r3]{8:3} form EAS-2.4 - */ - bitmask = 1UL << (addr >> 3 & 0x3f); - DPRINT("*0x%lx=0x%lx NaT=%d prev_unat @%p=%lx\n", addr, val, nat, (void *) unat, *unat); - if (nat) { - *unat |= bitmask; - } else { - *unat &= ~bitmask; - } - DPRINT("*0x%lx=0x%lx NaT=%d new unat: %p=%lx\n", addr, val, nat, (void *) unat,*unat); -} - -/* - * Return the (rotated) index for floating point register REGNUM (REGNUM must be in the - * range from 32-127, result is in the range from 0-95. - */ -static inline unsigned long -fph_index (struct pt_regs *regs, long regnum) -{ - unsigned long rrb_fr = (regs->cr_ifs >> 25) & 0x7f; - return rotate_reg(96, rrb_fr, (regnum - IA64_FIRST_ROTATING_FR)); -} - -static void -setfpreg (unsigned long regnum, struct ia64_fpreg *fpval, struct pt_regs *regs) -{ - struct switch_stack *sw = (struct switch_stack *)regs - 1; - unsigned long addr; - - /* - * From EAS-2.5: FPDisableFault has higher priority than Unaligned - * Fault. Thus, when we get here, we know the partition is enabled. - * To update f32-f127, there are three choices: - * - * (1) save f32-f127 to thread.fph and update the values there - * (2) use a gigantic switch statement to directly access the registers - * (3) generate code on the fly to update the desired register - * - * For now, we are using approach (1). - */ - if (regnum >= IA64_FIRST_ROTATING_FR) { - ia64_sync_fph(current); - current->thread.fph[fph_index(regs, regnum)] = *fpval; - } else { - /* - * pt_regs or switch_stack ? - */ - if (FR_IN_SW(regnum)) { - addr = (unsigned long)sw; - } else { - addr = (unsigned long)regs; - } - - DPRINT("tmp_base=%lx offset=%d\n", addr, FR_OFFS(regnum)); - - addr += FR_OFFS(regnum); - *(struct ia64_fpreg *)addr = *fpval; - - /* - * mark the low partition as being used now - * - * It is highly unlikely that this bit is not already set, but - * let's do it for safety. - */ - regs->cr_ipsr |= IA64_PSR_MFL; - } -} - -/* - * Those 2 inline functions generate the spilled versions of the constant floating point - * registers which can be used with stfX - */ -static inline void -float_spill_f0 (struct ia64_fpreg *final) -{ - ia64_stf_spill(final, 0); -} - -static inline void -float_spill_f1 (struct ia64_fpreg *final) -{ - ia64_stf_spill(final, 1); -} - -static void -getfpreg (unsigned long regnum, struct ia64_fpreg *fpval, struct pt_regs *regs) -{ - struct switch_stack *sw = (struct switch_stack *) regs - 1; - unsigned long addr; - - /* - * From EAS-2.5: FPDisableFault has higher priority than - * Unaligned Fault. Thus, when we get here, we know the partition is - * enabled. - * - * When regnum > 31, the register is still live and we need to force a save - * to current->thread.fph to get access to it. See discussion in setfpreg() - * for reasons and other ways of doing this. - */ - if (regnum >= IA64_FIRST_ROTATING_FR) { - ia64_flush_fph(current); - *fpval = current->thread.fph[fph_index(regs, regnum)]; - } else { - /* - * f0 = 0.0, f1= 1.0. Those registers are constant and are thus - * not saved, we must generate their spilled form on the fly - */ - switch(regnum) { - case 0: - float_spill_f0(fpval); - break; - case 1: - float_spill_f1(fpval); - break; - default: - /* - * pt_regs or switch_stack ? - */ - addr = FR_IN_SW(regnum) ? (unsigned long)sw - : (unsigned long)regs; - - DPRINT("is_sw=%d tmp_base=%lx offset=0x%x\n", - FR_IN_SW(regnum), addr, FR_OFFS(regnum)); - - addr += FR_OFFS(regnum); - *fpval = *(struct ia64_fpreg *)addr; - } - } -} - - -static void -getreg (unsigned long regnum, unsigned long *val, int *nat, struct pt_regs *regs) -{ - struct switch_stack *sw = (struct switch_stack *) regs - 1; - unsigned long addr, *unat; - - if (regnum >= IA64_FIRST_STACKED_GR) { - get_rse_reg(regs, regnum, val, nat); - return; - } - - /* - * take care of r0 (read-only always evaluate to 0) - */ - if (regnum == 0) { - *val = 0; - if (nat) - *nat = 0; - return; - } - - /* - * Now look at registers in [0-31] range and init correct UNAT - */ - if (GR_IN_SW(regnum)) { - addr = (unsigned long)sw; - unat = &sw->ar_unat; - } else { - addr = (unsigned long)regs; - unat = &sw->caller_unat; - } - - DPRINT("addr_base=%lx offset=0x%x\n", addr, GR_OFFS(regnum)); - - addr += GR_OFFS(regnum); - - *val = *(unsigned long *)addr; - - /* - * do it only when requested - */ - if (nat) - *nat = (*unat >> (addr >> 3 & 0x3f)) & 0x1UL; -} - -static void -emulate_load_updates (update_t type, load_store_t ld, struct pt_regs *regs, unsigned long ifa) -{ - /* - * IMPORTANT: - * Given the way we handle unaligned speculative loads, we should - * not get to this point in the code but we keep this sanity check, - * just in case. - */ - if (ld.x6_op == 1 || ld.x6_op == 3) { - printk(KERN_ERR "%s: register update on speculative load, error\n", __func__); - if (die_if_kernel("unaligned reference on speculative load with register update\n", - regs, 30)) - return; - } - - - /* - * at this point, we know that the base register to update is valid i.e., - * it's not r0 - */ - if (type == UPD_IMMEDIATE) { - unsigned long imm; - - /* - * Load +Imm: ldXZ r1=[r3],imm(9) - * - * - * form imm9: [13:19] contain the first 7 bits - */ - imm = ld.x << 7 | ld.imm; - - /* - * sign extend (1+8bits) if m set - */ - if (ld.m) imm |= SIGN_EXT9; - - /* - * ifa == r3 and we know that the NaT bit on r3 was clear so - * we can directly use ifa. - */ - ifa += imm; - - setreg(ld.r3, ifa, 0, regs); - - DPRINT("ld.x=%d ld.m=%d imm=%ld r3=0x%lx\n", ld.x, ld.m, imm, ifa); - - } else if (ld.m) { - unsigned long r2; - int nat_r2; - - /* - * Load +Reg Opcode: ldXZ r1=[r3],r2 - * - * Note: that we update r3 even in the case of ldfX.a - * (where the load does not happen) - * - * The way the load algorithm works, we know that r3 does not - * have its NaT bit set (would have gotten NaT consumption - * before getting the unaligned fault). So we can use ifa - * which equals r3 at this point. - * - * IMPORTANT: - * The above statement holds ONLY because we know that we - * never reach this code when trying to do a ldX.s. - * If we ever make it to here on an ldfX.s then - */ - getreg(ld.imm, &r2, &nat_r2, regs); - - ifa += r2; - - /* - * propagate Nat r2 -> r3 - */ - setreg(ld.r3, ifa, nat_r2, regs); - - DPRINT("imm=%d r2=%ld r3=0x%lx nat_r2=%d\n",ld.imm, r2, ifa, nat_r2); - } -} - -static int emulate_store(unsigned long ifa, void *val, int len, bool kernel_mode) -{ - if (kernel_mode) - return copy_to_kernel_nofault((void *)ifa, val, len); - - return copy_to_user((void __user *)ifa, val, len); -} - -static int emulate_load(void *val, unsigned long ifa, int len, bool kernel_mode) -{ - if (kernel_mode) - return copy_from_kernel_nofault(val, (void *)ifa, len); - - return copy_from_user(val, (void __user *)ifa, len); -} - -static int -emulate_load_int (unsigned long ifa, load_store_t ld, struct pt_regs *regs, - bool kernel_mode) -{ - unsigned int len = 1 << ld.x6_sz; - unsigned long val = 0; - - /* - * r0, as target, doesn't need to be checked because Illegal Instruction - * faults have higher priority than unaligned faults. - * - * r0 cannot be found as the base as it would never generate an - * unaligned reference. - */ - - /* - * ldX.a we will emulate load and also invalidate the ALAT entry. - * See comment below for explanation on how we handle ldX.a - */ - - if (len != 2 && len != 4 && len != 8) { - DPRINT("unknown size: x6=%d\n", ld.x6_sz); - return -1; - } - /* this assumes little-endian byte-order: */ - if (emulate_load(&val, ifa, len, kernel_mode)) - return -1; - setreg(ld.r1, val, 0, regs); - - /* - * check for updates on any kind of loads - */ - if (ld.op == 0x5 || ld.m) - emulate_load_updates(ld.op == 0x5 ? UPD_IMMEDIATE: UPD_REG, ld, regs, ifa); - - /* - * handling of various loads (based on EAS2.4): - * - * ldX.acq (ordered load): - * - acquire semantics would have been used, so force fence instead. - * - * ldX.c.clr (check load and clear): - * - if we get to this handler, it's because the entry was not in the ALAT. - * Therefore the operation reverts to a normal load - * - * ldX.c.nc (check load no clear): - * - same as previous one - * - * ldX.c.clr.acq (ordered check load and clear): - * - same as above for c.clr part. The load needs to have acquire semantics. So - * we use the fence semantics which is stronger and thus ensures correctness. - * - * ldX.a (advanced load): - * - suppose ldX.a r1=[r3]. If we get to the unaligned trap it's because the - * address doesn't match requested size alignment. This means that we would - * possibly need more than one load to get the result. - * - * The load part can be handled just like a normal load, however the difficult - * part is to get the right thing into the ALAT. The critical piece of information - * in the base address of the load & size. To do that, a ld.a must be executed, - * clearly any address can be pushed into the table by using ld1.a r1=[r3]. Now - * if we use the same target register, we will be okay for the check.a instruction. - * If we look at the store, basically a stX [r3]=r1 checks the ALAT for any entry - * which would overlap within [r3,r3+X] (the size of the load was store in the - * ALAT). If such an entry is found the entry is invalidated. But this is not good - * enough, take the following example: - * r3=3 - * ld4.a r1=[r3] - * - * Could be emulated by doing: - * ld1.a r1=[r3],1 - * store to temporary; - * ld1.a r1=[r3],1 - * store & shift to temporary; - * ld1.a r1=[r3],1 - * store & shift to temporary; - * ld1.a r1=[r3] - * store & shift to temporary; - * r1=temporary - * - * So in this case, you would get the right value is r1 but the wrong info in - * the ALAT. Notice that you could do it in reverse to finish with address 3 - * but you would still get the size wrong. To get the size right, one needs to - * execute exactly the same kind of load. You could do it from a aligned - * temporary location, but you would get the address wrong. - * - * So no matter what, it is not possible to emulate an advanced load - * correctly. But is that really critical ? - * - * We will always convert ld.a into a normal load with ALAT invalidated. This - * will enable compiler to do optimization where certain code path after ld.a - * is not required to have ld.c/chk.a, e.g., code path with no intervening stores. - * - * If there is a store after the advanced load, one must either do a ld.c.* or - * chk.a.* to reuse the value stored in the ALAT. Both can "fail" (meaning no - * entry found in ALAT), and that's perfectly ok because: - * - * - ld.c.*, if the entry is not present a normal load is executed - * - chk.a.*, if the entry is not present, execution jumps to recovery code - * - * In either case, the load can be potentially retried in another form. - * - * ALAT must be invalidated for the register (so that chk.a or ld.c don't pick - * up a stale entry later). The register base update MUST also be performed. - */ - - /* - * when the load has the .acq completer then - * use ordering fence. - */ - if (ld.x6_op == 0x5 || ld.x6_op == 0xa) - mb(); - - /* - * invalidate ALAT entry in case of advanced load - */ - if (ld.x6_op == 0x2) - invala_gr(ld.r1); - - return 0; -} - -static int -emulate_store_int (unsigned long ifa, load_store_t ld, struct pt_regs *regs, - bool kernel_mode) -{ - unsigned long r2; - unsigned int len = 1 << ld.x6_sz; - - /* - * if we get to this handler, Nat bits on both r3 and r2 have already - * been checked. so we don't need to do it - * - * extract the value to be stored - */ - getreg(ld.imm, &r2, NULL, regs); - - /* - * we rely on the macros in unaligned.h for now i.e., - * we let the compiler figure out how to read memory gracefully. - * - * We need this switch/case because the way the inline function - * works. The code is optimized by the compiler and looks like - * a single switch/case. - */ - DPRINT("st%d [%lx]=%lx\n", len, ifa, r2); - - if (len != 2 && len != 4 && len != 8) { - DPRINT("unknown size: x6=%d\n", ld.x6_sz); - return -1; - } - - /* this assumes little-endian byte-order: */ - if (emulate_store(ifa, &r2, len, kernel_mode)) - return -1; - - /* - * stX [r3]=r2,imm(9) - * - * NOTE: - * ld.r3 can never be r0, because r0 would not generate an - * unaligned access. - */ - if (ld.op == 0x5) { - unsigned long imm; - - /* - * form imm9: [12:6] contain first 7bits - */ - imm = ld.x << 7 | ld.r1; - /* - * sign extend (8bits) if m set - */ - if (ld.m) imm |= SIGN_EXT9; - /* - * ifa == r3 (NaT is necessarily cleared) - */ - ifa += imm; - - DPRINT("imm=%lx r3=%lx\n", imm, ifa); - - setreg(ld.r3, ifa, 0, regs); - } - /* - * we don't have alat_invalidate_multiple() so we need - * to do the complete flush :-<< - */ - ia64_invala(); - - /* - * stX.rel: use fence instead of release - */ - if (ld.x6_op == 0xd) - mb(); - - return 0; -} - -/* - * floating point operations sizes in bytes - */ -static const unsigned char float_fsz[4]={ - 10, /* extended precision (e) */ - 8, /* integer (8) */ - 4, /* single precision (s) */ - 8 /* double precision (d) */ -}; - -static inline void -mem2float_extended (struct ia64_fpreg *init, struct ia64_fpreg *final) -{ - ia64_ldfe(6, init); - ia64_stop(); - ia64_stf_spill(final, 6); -} - -static inline void -mem2float_integer (struct ia64_fpreg *init, struct ia64_fpreg *final) -{ - ia64_ldf8(6, init); - ia64_stop(); - ia64_stf_spill(final, 6); -} - -static inline void -mem2float_single (struct ia64_fpreg *init, struct ia64_fpreg *final) -{ - ia64_ldfs(6, init); - ia64_stop(); - ia64_stf_spill(final, 6); -} - -static inline void -mem2float_double (struct ia64_fpreg *init, struct ia64_fpreg *final) -{ - ia64_ldfd(6, init); - ia64_stop(); - ia64_stf_spill(final, 6); -} - -static inline void -float2mem_extended (struct ia64_fpreg *init, struct ia64_fpreg *final) -{ - ia64_ldf_fill(6, init); - ia64_stop(); - ia64_stfe(final, 6); -} - -static inline void -float2mem_integer (struct ia64_fpreg *init, struct ia64_fpreg *final) -{ - ia64_ldf_fill(6, init); - ia64_stop(); - ia64_stf8(final, 6); -} - -static inline void -float2mem_single (struct ia64_fpreg *init, struct ia64_fpreg *final) -{ - ia64_ldf_fill(6, init); - ia64_stop(); - ia64_stfs(final, 6); -} - -static inline void -float2mem_double (struct ia64_fpreg *init, struct ia64_fpreg *final) -{ - ia64_ldf_fill(6, init); - ia64_stop(); - ia64_stfd(final, 6); -} - -static int -emulate_load_floatpair (unsigned long ifa, load_store_t ld, struct pt_regs *regs, bool kernel_mode) -{ - struct ia64_fpreg fpr_init[2]; - struct ia64_fpreg fpr_final[2]; - unsigned long len = float_fsz[ld.x6_sz]; - - /* - * fr0 & fr1 don't need to be checked because Illegal Instruction faults have - * higher priority than unaligned faults. - * - * r0 cannot be found as the base as it would never generate an unaligned - * reference. - */ - - /* - * make sure we get clean buffers - */ - memset(&fpr_init, 0, sizeof(fpr_init)); - memset(&fpr_final, 0, sizeof(fpr_final)); - - /* - * ldfpX.a: we don't try to emulate anything but we must - * invalidate the ALAT entry and execute updates, if any. - */ - if (ld.x6_op != 0x2) { - /* - * This assumes little-endian byte-order. Note that there is no "ldfpe" - * instruction: - */ - if (emulate_load(&fpr_init[0], ifa, len, kernel_mode) - || emulate_load(&fpr_init[1], (ifa + len), len, kernel_mode)) - return -1; - - DPRINT("ld.r1=%d ld.imm=%d x6_sz=%d\n", ld.r1, ld.imm, ld.x6_sz); - DDUMP("frp_init =", &fpr_init, 2*len); - /* - * XXX fixme - * Could optimize inlines by using ldfpX & 2 spills - */ - switch( ld.x6_sz ) { - case 0: - mem2float_extended(&fpr_init[0], &fpr_final[0]); - mem2float_extended(&fpr_init[1], &fpr_final[1]); - break; - case 1: - mem2float_integer(&fpr_init[0], &fpr_final[0]); - mem2float_integer(&fpr_init[1], &fpr_final[1]); - break; - case 2: - mem2float_single(&fpr_init[0], &fpr_final[0]); - mem2float_single(&fpr_init[1], &fpr_final[1]); - break; - case 3: - mem2float_double(&fpr_init[0], &fpr_final[0]); - mem2float_double(&fpr_init[1], &fpr_final[1]); - break; - } - DDUMP("fpr_final =", &fpr_final, 2*len); - /* - * XXX fixme - * - * A possible optimization would be to drop fpr_final and directly - * use the storage from the saved context i.e., the actual final - * destination (pt_regs, switch_stack or thread structure). - */ - setfpreg(ld.r1, &fpr_final[0], regs); - setfpreg(ld.imm, &fpr_final[1], regs); - } - - /* - * Check for updates: only immediate updates are available for this - * instruction. - */ - if (ld.m) { - /* - * the immediate is implicit given the ldsz of the operation: - * single: 8 (2x4) and for all others it's 16 (2x8) - */ - ifa += len<<1; - - /* - * IMPORTANT: - * the fact that we force the NaT of r3 to zero is ONLY valid - * as long as we don't come here with a ldfpX.s. - * For this reason we keep this sanity check - */ - if (ld.x6_op == 1 || ld.x6_op == 3) - printk(KERN_ERR "%s: register update on speculative load pair, error\n", - __func__); - - setreg(ld.r3, ifa, 0, regs); - } - - /* - * Invalidate ALAT entries, if any, for both registers. - */ - if (ld.x6_op == 0x2) { - invala_fr(ld.r1); - invala_fr(ld.imm); - } - return 0; -} - - -static int -emulate_load_float (unsigned long ifa, load_store_t ld, struct pt_regs *regs, - bool kernel_mode) -{ - struct ia64_fpreg fpr_init; - struct ia64_fpreg fpr_final; - unsigned long len = float_fsz[ld.x6_sz]; - - /* - * fr0 & fr1 don't need to be checked because Illegal Instruction - * faults have higher priority than unaligned faults. - * - * r0 cannot be found as the base as it would never generate an - * unaligned reference. - */ - - /* - * make sure we get clean buffers - */ - memset(&fpr_init,0, sizeof(fpr_init)); - memset(&fpr_final,0, sizeof(fpr_final)); - - /* - * ldfX.a we don't try to emulate anything but we must - * invalidate the ALAT entry. - * See comments in ldX for descriptions on how the various loads are handled. - */ - if (ld.x6_op != 0x2) { - if (emulate_load(&fpr_init, ifa, len, kernel_mode)) - return -1; - - DPRINT("ld.r1=%d x6_sz=%d\n", ld.r1, ld.x6_sz); - DDUMP("fpr_init =", &fpr_init, len); - /* - * we only do something for x6_op={0,8,9} - */ - switch( ld.x6_sz ) { - case 0: - mem2float_extended(&fpr_init, &fpr_final); - break; - case 1: - mem2float_integer(&fpr_init, &fpr_final); - break; - case 2: - mem2float_single(&fpr_init, &fpr_final); - break; - case 3: - mem2float_double(&fpr_init, &fpr_final); - break; - } - DDUMP("fpr_final =", &fpr_final, len); - /* - * XXX fixme - * - * A possible optimization would be to drop fpr_final and directly - * use the storage from the saved context i.e., the actual final - * destination (pt_regs, switch_stack or thread structure). - */ - setfpreg(ld.r1, &fpr_final, regs); - } - - /* - * check for updates on any loads - */ - if (ld.op == 0x7 || ld.m) - emulate_load_updates(ld.op == 0x7 ? UPD_IMMEDIATE: UPD_REG, ld, regs, ifa); - - /* - * invalidate ALAT entry in case of advanced floating point loads - */ - if (ld.x6_op == 0x2) - invala_fr(ld.r1); - - return 0; -} - - -static int -emulate_store_float (unsigned long ifa, load_store_t ld, struct pt_regs *regs, - bool kernel_mode) -{ - struct ia64_fpreg fpr_init; - struct ia64_fpreg fpr_final; - unsigned long len = float_fsz[ld.x6_sz]; - - /* - * make sure we get clean buffers - */ - memset(&fpr_init,0, sizeof(fpr_init)); - memset(&fpr_final,0, sizeof(fpr_final)); - - /* - * if we get to this handler, Nat bits on both r3 and r2 have already - * been checked. so we don't need to do it - * - * extract the value to be stored - */ - getfpreg(ld.imm, &fpr_init, regs); - /* - * during this step, we extract the spilled registers from the saved - * context i.e., we refill. Then we store (no spill) to temporary - * aligned location - */ - switch( ld.x6_sz ) { - case 0: - float2mem_extended(&fpr_init, &fpr_final); - break; - case 1: - float2mem_integer(&fpr_init, &fpr_final); - break; - case 2: - float2mem_single(&fpr_init, &fpr_final); - break; - case 3: - float2mem_double(&fpr_init, &fpr_final); - break; - } - DPRINT("ld.r1=%d x6_sz=%d\n", ld.r1, ld.x6_sz); - DDUMP("fpr_init =", &fpr_init, len); - DDUMP("fpr_final =", &fpr_final, len); - - if (emulate_store(ifa, &fpr_final, len, kernel_mode)) - return -1; - - /* - * stfX [r3]=r2,imm(9) - * - * NOTE: - * ld.r3 can never be r0, because r0 would not generate an - * unaligned access. - */ - if (ld.op == 0x7) { - unsigned long imm; - - /* - * form imm9: [12:6] contain first 7bits - */ - imm = ld.x << 7 | ld.r1; - /* - * sign extend (8bits) if m set - */ - if (ld.m) - imm |= SIGN_EXT9; - /* - * ifa == r3 (NaT is necessarily cleared) - */ - ifa += imm; - - DPRINT("imm=%lx r3=%lx\n", imm, ifa); - - setreg(ld.r3, ifa, 0, regs); - } - /* - * we don't have alat_invalidate_multiple() so we need - * to do the complete flush :-<< - */ - ia64_invala(); - - return 0; -} - -/* - * Make sure we log the unaligned access, so that user/sysadmin can notice it and - * eventually fix the program. However, we don't want to do that for every access so we - * pace it with jiffies. - */ -static DEFINE_RATELIMIT_STATE(logging_rate_limit, 5 * HZ, 5); - -void -ia64_handle_unaligned (unsigned long ifa, struct pt_regs *regs) -{ - struct ia64_psr *ipsr = ia64_psr(regs); - unsigned long bundle[2]; - unsigned long opcode; - const struct exception_table_entry *eh = NULL; - union { - unsigned long l; - load_store_t insn; - } u; - int ret = -1; - bool kernel_mode = false; - - if (ia64_psr(regs)->be) { - /* we don't support big-endian accesses */ - if (die_if_kernel("big-endian unaligned accesses are not supported", regs, 0)) - return; - goto force_sigbus; - } - - /* - * Treat kernel accesses for which there is an exception handler entry the same as - * user-level unaligned accesses. Otherwise, a clever program could trick this - * handler into reading an arbitrary kernel addresses... - */ - if (!user_mode(regs)) - eh = search_exception_tables(regs->cr_iip + ia64_psr(regs)->ri); - if (user_mode(regs) || eh) { - if ((current->thread.flags & IA64_THREAD_UAC_SIGBUS) != 0) - goto force_sigbus; - - if (!no_unaligned_warning && - !(current->thread.flags & IA64_THREAD_UAC_NOPRINT) && - __ratelimit(&logging_rate_limit)) - { - char buf[200]; /* comm[] is at most 16 bytes... */ - size_t len; - - len = sprintf(buf, "%s(%d): unaligned access to 0x%016lx, " - "ip=0x%016lx\n\r", current->comm, - task_pid_nr(current), - ifa, regs->cr_iip + ipsr->ri); - /* - * Don't call tty_write_message() if we're in the kernel; we might - * be holding locks... - */ - if (user_mode(regs)) { - struct tty_struct *tty = get_current_tty(); - tty_write_message(tty, buf); - tty_kref_put(tty); - } - buf[len-1] = '\0'; /* drop '\r' */ - /* watch for command names containing %s */ - printk(KERN_WARNING "%s", buf); - } else { - if (no_unaligned_warning) { - printk_once(KERN_WARNING "%s(%d) encountered an " - "unaligned exception which required\n" - "kernel assistance, which degrades " - "the performance of the application.\n" - "Unaligned exception warnings have " - "been disabled by the system " - "administrator\n" - "echo 0 > /proc/sys/kernel/ignore-" - "unaligned-usertrap to re-enable\n", - current->comm, task_pid_nr(current)); - } - } - } else { - if (__ratelimit(&logging_rate_limit)) { - printk(KERN_WARNING "kernel unaligned access to 0x%016lx, ip=0x%016lx\n", - ifa, regs->cr_iip + ipsr->ri); - if (unaligned_dump_stack) - dump_stack(); - } - kernel_mode = true; - } - - DPRINT("iip=%lx ifa=%lx isr=%lx (ei=%d, sp=%d)\n", - regs->cr_iip, ifa, regs->cr_ipsr, ipsr->ri, ipsr->it); - - if (emulate_load(bundle, regs->cr_iip, 16, kernel_mode)) - goto failure; - - /* - * extract the instruction from the bundle given the slot number - */ - switch (ipsr->ri) { - default: - case 0: u.l = (bundle[0] >> 5); break; - case 1: u.l = (bundle[0] >> 46) | (bundle[1] << 18); break; - case 2: u.l = (bundle[1] >> 23); break; - } - opcode = (u.l >> IA64_OPCODE_SHIFT) & IA64_OPCODE_MASK; - - DPRINT("opcode=%lx ld.qp=%d ld.r1=%d ld.imm=%d ld.r3=%d ld.x=%d ld.hint=%d " - "ld.x6=0x%x ld.m=%d ld.op=%d\n", opcode, u.insn.qp, u.insn.r1, u.insn.imm, - u.insn.r3, u.insn.x, u.insn.hint, u.insn.x6_sz, u.insn.m, u.insn.op); - - /* - * IMPORTANT: - * Notice that the switch statement DOES not cover all possible instructions - * that DO generate unaligned references. This is made on purpose because for some - * instructions it DOES NOT make sense to try and emulate the access. Sometimes it - * is WRONG to try and emulate. Here is a list of instruction we don't emulate i.e., - * the program will get a signal and die: - * - * load/store: - * - ldX.spill - * - stX.spill - * Reason: RNATs are based on addresses - * - ld16 - * - st16 - * Reason: ld16 and st16 are supposed to occur in a single - * memory op - * - * synchronization: - * - cmpxchg - * - fetchadd - * - xchg - * Reason: ATOMIC operations cannot be emulated properly using multiple - * instructions. - * - * speculative loads: - * - ldX.sZ - * Reason: side effects, code must be ready to deal with failure so simpler - * to let the load fail. - * --------------------------------------------------------------------------------- - * XXX fixme - * - * I would like to get rid of this switch case and do something - * more elegant. - */ - switch (opcode) { - case LDS_OP: - case LDSA_OP: - if (u.insn.x) - /* oops, really a semaphore op (cmpxchg, etc) */ - goto failure; - fallthrough; - case LDS_IMM_OP: - case LDSA_IMM_OP: - case LDFS_OP: - case LDFSA_OP: - case LDFS_IMM_OP: - /* - * The instruction will be retried with deferred exceptions turned on, and - * we should get Nat bit installed - * - * IMPORTANT: When PSR_ED is set, the register & immediate update forms - * are actually executed even though the operation failed. So we don't - * need to take care of this. - */ - DPRINT("forcing PSR_ED\n"); - regs->cr_ipsr |= IA64_PSR_ED; - goto done; - - case LD_OP: - case LDA_OP: - case LDBIAS_OP: - case LDACQ_OP: - case LDCCLR_OP: - case LDCNC_OP: - case LDCCLRACQ_OP: - if (u.insn.x) - /* oops, really a semaphore op (cmpxchg, etc) */ - goto failure; - fallthrough; - case LD_IMM_OP: - case LDA_IMM_OP: - case LDBIAS_IMM_OP: - case LDACQ_IMM_OP: - case LDCCLR_IMM_OP: - case LDCNC_IMM_OP: - case LDCCLRACQ_IMM_OP: - ret = emulate_load_int(ifa, u.insn, regs, kernel_mode); - break; - - case ST_OP: - case STREL_OP: - if (u.insn.x) - /* oops, really a semaphore op (cmpxchg, etc) */ - goto failure; - fallthrough; - case ST_IMM_OP: - case STREL_IMM_OP: - ret = emulate_store_int(ifa, u.insn, regs, kernel_mode); - break; - - case LDF_OP: - case LDFA_OP: - case LDFCCLR_OP: - case LDFCNC_OP: - if (u.insn.x) - ret = emulate_load_floatpair(ifa, u.insn, regs, kernel_mode); - else - ret = emulate_load_float(ifa, u.insn, regs, kernel_mode); - break; - - case LDF_IMM_OP: - case LDFA_IMM_OP: - case LDFCCLR_IMM_OP: - case LDFCNC_IMM_OP: - ret = emulate_load_float(ifa, u.insn, regs, kernel_mode); - break; - - case STF_OP: - case STF_IMM_OP: - ret = emulate_store_float(ifa, u.insn, regs, kernel_mode); - break; - - default: - goto failure; - } - DPRINT("ret=%d\n", ret); - if (ret) - goto failure; - - if (ipsr->ri == 2) - /* - * given today's architecture this case is not likely to happen because a - * memory access instruction (M) can never be in the last slot of a - * bundle. But let's keep it for now. - */ - regs->cr_iip += 16; - ipsr->ri = (ipsr->ri + 1) & 0x3; - - DPRINT("ipsr->ri=%d iip=%lx\n", ipsr->ri, regs->cr_iip); - done: - return; - - failure: - /* something went wrong... */ - if (!user_mode(regs)) { - if (eh) { - ia64_handle_exception(regs, eh); - goto done; - } - if (die_if_kernel("error during unaligned kernel access\n", regs, ret)) - return; - /* NOT_REACHED */ - } - force_sigbus: - force_sig_fault(SIGBUS, BUS_ADRALN, (void __user *) ifa, - 0, 0, 0); - goto done; -} diff --git a/arch/ia64/kernel/uncached.c b/arch/ia64/kernel/uncached.c deleted file mode 100644 index a0fec82c56b8..000000000000 --- a/arch/ia64/kernel/uncached.c +++ /dev/null @@ -1,273 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Copyright (C) 2001-2008 Silicon Graphics, Inc. All rights reserved. - * - * A simple uncached page allocator using the generic allocator. This - * allocator first utilizes the spare (spill) pages found in the EFI - * memmap and will then start converting cached pages to uncached ones - * at a granule at a time. Node awareness is implemented by having a - * pool of pages per node. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -struct uncached_pool { - struct gen_pool *pool; - struct mutex add_chunk_mutex; /* serialize adding a converted chunk */ - int nchunks_added; /* #of converted chunks added to pool */ - atomic_t status; /* smp called function's return status*/ -}; - -#define MAX_CONVERTED_CHUNKS_PER_NODE 2 - -struct uncached_pool uncached_pools[MAX_NUMNODES]; - - -static void uncached_ipi_visibility(void *data) -{ - int status; - struct uncached_pool *uc_pool = (struct uncached_pool *)data; - - status = ia64_pal_prefetch_visibility(PAL_VISIBILITY_PHYSICAL); - if ((status != PAL_VISIBILITY_OK) && - (status != PAL_VISIBILITY_OK_REMOTE_NEEDED)) - atomic_inc(&uc_pool->status); -} - - -static void uncached_ipi_mc_drain(void *data) -{ - int status; - struct uncached_pool *uc_pool = (struct uncached_pool *)data; - - status = ia64_pal_mc_drain(); - if (status != PAL_STATUS_SUCCESS) - atomic_inc(&uc_pool->status); -} - - -/* - * Add a new chunk of uncached memory pages to the specified pool. - * - * @pool: pool to add new chunk of uncached memory to - * @nid: node id of node to allocate memory from, or -1 - * - * This is accomplished by first allocating a granule of cached memory pages - * and then converting them to uncached memory pages. - */ -static int uncached_add_chunk(struct uncached_pool *uc_pool, int nid) -{ - struct page *page; - int status, i, nchunks_added = uc_pool->nchunks_added; - unsigned long c_addr, uc_addr; - - if (mutex_lock_interruptible(&uc_pool->add_chunk_mutex) != 0) - return -1; /* interrupted by a signal */ - - if (uc_pool->nchunks_added > nchunks_added) { - /* someone added a new chunk while we were waiting */ - mutex_unlock(&uc_pool->add_chunk_mutex); - return 0; - } - - if (uc_pool->nchunks_added >= MAX_CONVERTED_CHUNKS_PER_NODE) { - mutex_unlock(&uc_pool->add_chunk_mutex); - return -1; - } - - /* attempt to allocate a granule's worth of cached memory pages */ - - page = __alloc_pages_node(nid, - GFP_KERNEL | __GFP_ZERO | __GFP_THISNODE, - IA64_GRANULE_SHIFT-PAGE_SHIFT); - if (!page) { - mutex_unlock(&uc_pool->add_chunk_mutex); - return -1; - } - - /* convert the memory pages from cached to uncached */ - - c_addr = (unsigned long)page_address(page); - uc_addr = c_addr - PAGE_OFFSET + __IA64_UNCACHED_OFFSET; - - /* - * There's a small race here where it's possible for someone to - * access the page through /dev/mem halfway through the conversion - * to uncached - not sure it's really worth bothering about - */ - for (i = 0; i < (IA64_GRANULE_SIZE / PAGE_SIZE); i++) - SetPageUncached(&page[i]); - - flush_tlb_kernel_range(uc_addr, uc_addr + IA64_GRANULE_SIZE); - - status = ia64_pal_prefetch_visibility(PAL_VISIBILITY_PHYSICAL); - if (status == PAL_VISIBILITY_OK_REMOTE_NEEDED) { - atomic_set(&uc_pool->status, 0); - smp_call_function(uncached_ipi_visibility, uc_pool, 1); - if (atomic_read(&uc_pool->status)) - goto failed; - } else if (status != PAL_VISIBILITY_OK) - goto failed; - - preempt_disable(); - - flush_icache_range(uc_addr, uc_addr + IA64_GRANULE_SIZE); - - /* flush the just introduced uncached translation from the TLB */ - local_flush_tlb_all(); - - preempt_enable(); - - status = ia64_pal_mc_drain(); - if (status != PAL_STATUS_SUCCESS) - goto failed; - atomic_set(&uc_pool->status, 0); - smp_call_function(uncached_ipi_mc_drain, uc_pool, 1); - if (atomic_read(&uc_pool->status)) - goto failed; - - /* - * The chunk of memory pages has been converted to uncached so now we - * can add it to the pool. - */ - status = gen_pool_add(uc_pool->pool, uc_addr, IA64_GRANULE_SIZE, nid); - if (status) - goto failed; - - uc_pool->nchunks_added++; - mutex_unlock(&uc_pool->add_chunk_mutex); - return 0; - - /* failed to convert or add the chunk so give it back to the kernel */ -failed: - for (i = 0; i < (IA64_GRANULE_SIZE / PAGE_SIZE); i++) - ClearPageUncached(&page[i]); - - free_pages(c_addr, IA64_GRANULE_SHIFT-PAGE_SHIFT); - mutex_unlock(&uc_pool->add_chunk_mutex); - return -1; -} - - -/* - * uncached_alloc_page - * - * @starting_nid: node id of node to start with, or -1 - * @n_pages: number of contiguous pages to allocate - * - * Allocate the specified number of contiguous uncached pages on the - * requested node. If not enough contiguous uncached pages are available - * on the requested node, roundrobin starting with the next higher node. - */ -unsigned long uncached_alloc_page(int starting_nid, int n_pages) -{ - unsigned long uc_addr; - struct uncached_pool *uc_pool; - int nid; - - if (unlikely(starting_nid >= MAX_NUMNODES)) - return 0; - - if (starting_nid < 0) - starting_nid = numa_node_id(); - nid = starting_nid; - - do { - if (!node_state(nid, N_HIGH_MEMORY)) - continue; - uc_pool = &uncached_pools[nid]; - if (uc_pool->pool == NULL) - continue; - do { - uc_addr = gen_pool_alloc(uc_pool->pool, - n_pages * PAGE_SIZE); - if (uc_addr != 0) - return uc_addr; - } while (uncached_add_chunk(uc_pool, nid) == 0); - - } while ((nid = (nid + 1) % MAX_NUMNODES) != starting_nid); - - return 0; -} -EXPORT_SYMBOL(uncached_alloc_page); - - -/* - * uncached_free_page - * - * @uc_addr: uncached address of first page to free - * @n_pages: number of contiguous pages to free - * - * Free the specified number of uncached pages. - */ -void uncached_free_page(unsigned long uc_addr, int n_pages) -{ - int nid = paddr_to_nid(uc_addr - __IA64_UNCACHED_OFFSET); - struct gen_pool *pool = uncached_pools[nid].pool; - - if (unlikely(pool == NULL)) - return; - - if ((uc_addr & (0XFUL << 60)) != __IA64_UNCACHED_OFFSET) - panic("uncached_free_page invalid address %lx\n", uc_addr); - - gen_pool_free(pool, uc_addr, n_pages * PAGE_SIZE); -} -EXPORT_SYMBOL(uncached_free_page); - - -/* - * uncached_build_memmap, - * - * @uc_start: uncached starting address of a chunk of uncached memory - * @uc_end: uncached ending address of a chunk of uncached memory - * @arg: ignored, (NULL argument passed in on call to efi_memmap_walk_uc()) - * - * Called at boot time to build a map of pages that can be used for - * memory special operations. - */ -static int __init uncached_build_memmap(u64 uc_start, u64 uc_end, void *arg) -{ - int nid = paddr_to_nid(uc_start - __IA64_UNCACHED_OFFSET); - struct gen_pool *pool = uncached_pools[nid].pool; - size_t size = uc_end - uc_start; - - touch_softlockup_watchdog(); - - if (pool != NULL) { - memset((char *)uc_start, 0, size); - (void) gen_pool_add(pool, uc_start, size, nid); - } - return 0; -} - - -static int __init uncached_init(void) -{ - int nid; - - for_each_online_node(nid) { - uncached_pools[nid].pool = gen_pool_create(PAGE_SHIFT, nid); - mutex_init(&uncached_pools[nid].add_chunk_mutex); - } - - efi_memmap_walk_uc(uncached_build_memmap, NULL); - return 0; -} - -__initcall(uncached_init); diff --git a/arch/ia64/kernel/unwind.c b/arch/ia64/kernel/unwind.c deleted file mode 100644 index 6bd64c35e691..000000000000 --- a/arch/ia64/kernel/unwind.c +++ /dev/null @@ -1,2320 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Copyright (C) 1999-2004 Hewlett-Packard Co - * David Mosberger-Tang - * Copyright (C) 2003 Fenghua Yu - * - Change pt_regs_off() to make it less dependent on pt_regs structure. - */ -/* - * This file implements call frame unwind support for the Linux - * kernel. Parsing and processing the unwind information is - * time-consuming, so this implementation translates the unwind - * descriptors into unwind scripts. These scripts are very simple - * (basically a sequence of assignments) and efficient to execute. - * They are cached for later re-use. Each script is specific for a - * given instruction pointer address and the set of predicate values - * that the script depends on (most unwind descriptors are - * unconditional and scripts often do not depend on predicates at - * all). This code is based on the unwind conventions described in - * the "IA-64 Software Conventions and Runtime Architecture" manual. - * - * SMP conventions: - * o updates to the global unwind data (in structure "unw") are serialized - * by the unw.lock spinlock - * o each unwind script has its own read-write lock; a thread must acquire - * a read lock before executing a script and must acquire a write lock - * before modifying a script - * o if both the unw.lock spinlock and a script's read-write lock must be - * acquired, then the read-write lock must be acquired first. - */ -#include -#include -#include -#include -#include -#include - -#include - -#include -#include -#include -#include -#include -#include -#include - -#include "entry.h" -#include "unwind_i.h" - -#define UNW_LOG_CACHE_SIZE 7 /* each unw_script is ~256 bytes in size */ -#define UNW_CACHE_SIZE (1 << UNW_LOG_CACHE_SIZE) - -#define UNW_LOG_HASH_SIZE (UNW_LOG_CACHE_SIZE + 1) -#define UNW_HASH_SIZE (1 << UNW_LOG_HASH_SIZE) - -#define UNW_STATS 0 /* WARNING: this disabled interrupts for long time-spans!! */ - -#ifdef UNW_DEBUG - static unsigned int unw_debug_level = UNW_DEBUG; -# define UNW_DEBUG_ON(n) unw_debug_level >= n - /* Do not code a printk level, not all debug lines end in newline */ -# define UNW_DPRINT(n, ...) if (UNW_DEBUG_ON(n)) printk(__VA_ARGS__) -# undef inline -# define inline -#else /* !UNW_DEBUG */ -# define UNW_DEBUG_ON(n) 0 -# define UNW_DPRINT(n, ...) -#endif /* UNW_DEBUG */ - -#if UNW_STATS -# define STAT(x...) x -#else -# define STAT(x...) -#endif - -#define alloc_reg_state() kmalloc(sizeof(struct unw_reg_state), GFP_ATOMIC) -#define free_reg_state(usr) kfree(usr) -#define alloc_labeled_state() kmalloc(sizeof(struct unw_labeled_state), GFP_ATOMIC) -#define free_labeled_state(usr) kfree(usr) - -typedef unsigned long unw_word; -typedef unsigned char unw_hash_index_t; - -static struct { - spinlock_t lock; /* spinlock for unwind data */ - - /* list of unwind tables (one per load-module) */ - struct unw_table *tables; - - unsigned long r0; /* constant 0 for r0 */ - - /* table of registers that prologues can save (and order in which they're saved): */ - const unsigned char save_order[8]; - - /* maps a preserved register index (preg_index) to corresponding switch_stack offset: */ - unsigned short sw_off[sizeof(struct unw_frame_info) / 8]; - - unsigned short lru_head; /* index of lead-recently used script */ - unsigned short lru_tail; /* index of most-recently used script */ - - /* index into unw_frame_info for preserved register i */ - unsigned short preg_index[UNW_NUM_REGS]; - - short pt_regs_offsets[32]; - - /* unwind table for the kernel: */ - struct unw_table kernel_table; - - /* unwind table describing the gate page (kernel code that is mapped into user space): */ - size_t gate_table_size; - unsigned long *gate_table; - - /* hash table that maps instruction pointer to script index: */ - unsigned short hash[UNW_HASH_SIZE]; - - /* script cache: */ - struct unw_script cache[UNW_CACHE_SIZE]; - -# ifdef UNW_DEBUG - const char *preg_name[UNW_NUM_REGS]; -# endif -# if UNW_STATS - struct { - struct { - int lookups; - int hinted_hits; - int normal_hits; - int collision_chain_traversals; - } cache; - struct { - unsigned long build_time; - unsigned long run_time; - unsigned long parse_time; - int builds; - int news; - int collisions; - int runs; - } script; - struct { - unsigned long init_time; - unsigned long unwind_time; - int inits; - int unwinds; - } api; - } stat; -# endif -} unw = { - .tables = &unw.kernel_table, - .lock = __SPIN_LOCK_UNLOCKED(unw.lock), - .save_order = { - UNW_REG_RP, UNW_REG_PFS, UNW_REG_PSP, UNW_REG_PR, - UNW_REG_UNAT, UNW_REG_LC, UNW_REG_FPSR, UNW_REG_PRI_UNAT_GR - }, - .preg_index = { - offsetof(struct unw_frame_info, pri_unat_loc)/8, /* PRI_UNAT_GR */ - offsetof(struct unw_frame_info, pri_unat_loc)/8, /* PRI_UNAT_MEM */ - offsetof(struct unw_frame_info, bsp_loc)/8, - offsetof(struct unw_frame_info, bspstore_loc)/8, - offsetof(struct unw_frame_info, pfs_loc)/8, - offsetof(struct unw_frame_info, rnat_loc)/8, - offsetof(struct unw_frame_info, psp)/8, - offsetof(struct unw_frame_info, rp_loc)/8, - offsetof(struct unw_frame_info, r4)/8, - offsetof(struct unw_frame_info, r5)/8, - offsetof(struct unw_frame_info, r6)/8, - offsetof(struct unw_frame_info, r7)/8, - offsetof(struct unw_frame_info, unat_loc)/8, - offsetof(struct unw_frame_info, pr_loc)/8, - offsetof(struct unw_frame_info, lc_loc)/8, - offsetof(struct unw_frame_info, fpsr_loc)/8, - offsetof(struct unw_frame_info, b1_loc)/8, - offsetof(struct unw_frame_info, b2_loc)/8, - offsetof(struct unw_frame_info, b3_loc)/8, - offsetof(struct unw_frame_info, b4_loc)/8, - offsetof(struct unw_frame_info, b5_loc)/8, - offsetof(struct unw_frame_info, f2_loc)/8, - offsetof(struct unw_frame_info, f3_loc)/8, - offsetof(struct unw_frame_info, f4_loc)/8, - offsetof(struct unw_frame_info, f5_loc)/8, - offsetof(struct unw_frame_info, fr_loc[16 - 16])/8, - offsetof(struct unw_frame_info, fr_loc[17 - 16])/8, - offsetof(struct unw_frame_info, fr_loc[18 - 16])/8, - offsetof(struct unw_frame_info, fr_loc[19 - 16])/8, - offsetof(struct unw_frame_info, fr_loc[20 - 16])/8, - offsetof(struct unw_frame_info, fr_loc[21 - 16])/8, - offsetof(struct unw_frame_info, fr_loc[22 - 16])/8, - offsetof(struct unw_frame_info, fr_loc[23 - 16])/8, - offsetof(struct unw_frame_info, fr_loc[24 - 16])/8, - offsetof(struct unw_frame_info, fr_loc[25 - 16])/8, - offsetof(struct unw_frame_info, fr_loc[26 - 16])/8, - offsetof(struct unw_frame_info, fr_loc[27 - 16])/8, - offsetof(struct unw_frame_info, fr_loc[28 - 16])/8, - offsetof(struct unw_frame_info, fr_loc[29 - 16])/8, - offsetof(struct unw_frame_info, fr_loc[30 - 16])/8, - offsetof(struct unw_frame_info, fr_loc[31 - 16])/8, - }, - .pt_regs_offsets = { - [0] = -1, - offsetof(struct pt_regs, r1), - offsetof(struct pt_regs, r2), - offsetof(struct pt_regs, r3), - [4] = -1, [5] = -1, [6] = -1, [7] = -1, - offsetof(struct pt_regs, r8), - offsetof(struct pt_regs, r9), - offsetof(struct pt_regs, r10), - offsetof(struct pt_regs, r11), - offsetof(struct pt_regs, r12), - offsetof(struct pt_regs, r13), - offsetof(struct pt_regs, r14), - offsetof(struct pt_regs, r15), - offsetof(struct pt_regs, r16), - offsetof(struct pt_regs, r17), - offsetof(struct pt_regs, r18), - offsetof(struct pt_regs, r19), - offsetof(struct pt_regs, r20), - offsetof(struct pt_regs, r21), - offsetof(struct pt_regs, r22), - offsetof(struct pt_regs, r23), - offsetof(struct pt_regs, r24), - offsetof(struct pt_regs, r25), - offsetof(struct pt_regs, r26), - offsetof(struct pt_regs, r27), - offsetof(struct pt_regs, r28), - offsetof(struct pt_regs, r29), - offsetof(struct pt_regs, r30), - offsetof(struct pt_regs, r31), - }, - .hash = { [0 ... UNW_HASH_SIZE - 1] = -1 }, -#ifdef UNW_DEBUG - .preg_name = { - "pri_unat_gr", "pri_unat_mem", "bsp", "bspstore", "ar.pfs", "ar.rnat", "psp", "rp", - "r4", "r5", "r6", "r7", - "ar.unat", "pr", "ar.lc", "ar.fpsr", - "b1", "b2", "b3", "b4", "b5", - "f2", "f3", "f4", "f5", - "f16", "f17", "f18", "f19", "f20", "f21", "f22", "f23", - "f24", "f25", "f26", "f27", "f28", "f29", "f30", "f31" - } -#endif -}; - -static inline int -read_only (void *addr) -{ - return (unsigned long) ((char *) addr - (char *) &unw.r0) < sizeof(unw.r0); -} - -/* - * Returns offset of rREG in struct pt_regs. - */ -static inline unsigned long -pt_regs_off (unsigned long reg) -{ - short off = -1; - - if (reg < ARRAY_SIZE(unw.pt_regs_offsets)) - off = unw.pt_regs_offsets[reg]; - - if (off < 0) { - UNW_DPRINT(0, "unwind.%s: bad scratch reg r%lu\n", __func__, reg); - off = 0; - } - return (unsigned long) off; -} - -static inline struct pt_regs * -get_scratch_regs (struct unw_frame_info *info) -{ - if (!info->pt) { - /* This should not happen with valid unwind info. */ - UNW_DPRINT(0, "unwind.%s: bad unwind info: resetting info->pt\n", __func__); - if (info->flags & UNW_FLAG_INTERRUPT_FRAME) - info->pt = (unsigned long) ((struct pt_regs *) info->psp - 1); - else - info->pt = info->sp - 16; - } - UNW_DPRINT(3, "unwind.%s: sp 0x%lx pt 0x%lx\n", __func__, info->sp, info->pt); - return (struct pt_regs *) info->pt; -} - -/* Unwind accessors. */ - -int -unw_access_gr (struct unw_frame_info *info, int regnum, unsigned long *val, char *nat, int write) -{ - unsigned long *addr, *nat_addr, nat_mask = 0, dummy_nat; - struct unw_ireg *ireg; - struct pt_regs *pt; - - if ((unsigned) regnum - 1 >= 127) { - if (regnum == 0 && !write) { - *val = 0; /* read r0 always returns 0 */ - *nat = 0; - return 0; - } - UNW_DPRINT(0, "unwind.%s: trying to access non-existent r%u\n", - __func__, regnum); - return -1; - } - - if (regnum < 32) { - if (regnum >= 4 && regnum <= 7) { - /* access a preserved register */ - ireg = &info->r4 + (regnum - 4); - addr = ireg->loc; - if (addr) { - nat_addr = addr + ireg->nat.off; - switch (ireg->nat.type) { - case UNW_NAT_VAL: - /* simulate getf.sig/setf.sig */ - if (write) { - if (*nat) { - /* write NaTVal and be done with it */ - addr[0] = 0; - addr[1] = 0x1fffe; - return 0; - } - addr[1] = 0x1003e; - } else { - if (addr[0] == 0 && addr[1] == 0x1ffe) { - /* return NaT and be done with it */ - *val = 0; - *nat = 1; - return 0; - } - } - fallthrough; - case UNW_NAT_NONE: - dummy_nat = 0; - nat_addr = &dummy_nat; - break; - - case UNW_NAT_MEMSTK: - nat_mask = (1UL << ((long) addr & 0x1f8)/8); - break; - - case UNW_NAT_REGSTK: - nat_addr = ia64_rse_rnat_addr(addr); - if ((unsigned long) addr < info->regstk.limit - || (unsigned long) addr >= info->regstk.top) - { - UNW_DPRINT(0, "unwind.%s: %p outside of regstk " - "[0x%lx-0x%lx)\n", - __func__, (void *) addr, - info->regstk.limit, - info->regstk.top); - return -1; - } - if ((unsigned long) nat_addr >= info->regstk.top) - nat_addr = &info->sw->ar_rnat; - nat_mask = (1UL << ia64_rse_slot_num(addr)); - break; - } - } else { - addr = &info->sw->r4 + (regnum - 4); - nat_addr = &info->sw->ar_unat; - nat_mask = (1UL << ((long) addr & 0x1f8)/8); - } - } else { - /* access a scratch register */ - pt = get_scratch_regs(info); - addr = (unsigned long *) ((unsigned long)pt + pt_regs_off(regnum)); - if (info->pri_unat_loc) - nat_addr = info->pri_unat_loc; - else - nat_addr = &info->sw->caller_unat; - nat_mask = (1UL << ((long) addr & 0x1f8)/8); - } - } else { - /* access a stacked register */ - addr = ia64_rse_skip_regs((unsigned long *) info->bsp, regnum - 32); - nat_addr = ia64_rse_rnat_addr(addr); - if ((unsigned long) addr < info->regstk.limit - || (unsigned long) addr >= info->regstk.top) - { - UNW_DPRINT(0, "unwind.%s: ignoring attempt to access register outside " - "of rbs\n", __func__); - return -1; - } - if ((unsigned long) nat_addr >= info->regstk.top) - nat_addr = &info->sw->ar_rnat; - nat_mask = (1UL << ia64_rse_slot_num(addr)); - } - - if (write) { - if (read_only(addr)) { - UNW_DPRINT(0, "unwind.%s: ignoring attempt to write read-only location\n", - __func__); - } else { - *addr = *val; - if (*nat) - *nat_addr |= nat_mask; - else - *nat_addr &= ~nat_mask; - } - } else { - if ((*nat_addr & nat_mask) == 0) { - *val = *addr; - *nat = 0; - } else { - *val = 0; /* if register is a NaT, *addr may contain kernel data! */ - *nat = 1; - } - } - return 0; -} -EXPORT_SYMBOL(unw_access_gr); - -int -unw_access_br (struct unw_frame_info *info, int regnum, unsigned long *val, int write) -{ - unsigned long *addr; - struct pt_regs *pt; - - switch (regnum) { - /* scratch: */ - case 0: pt = get_scratch_regs(info); addr = &pt->b0; break; - case 6: pt = get_scratch_regs(info); addr = &pt->b6; break; - case 7: pt = get_scratch_regs(info); addr = &pt->b7; break; - - /* preserved: */ - case 1: case 2: case 3: case 4: case 5: - addr = *(&info->b1_loc + (regnum - 1)); - if (!addr) - addr = &info->sw->b1 + (regnum - 1); - break; - - default: - UNW_DPRINT(0, "unwind.%s: trying to access non-existent b%u\n", - __func__, regnum); - return -1; - } - if (write) - if (read_only(addr)) { - UNW_DPRINT(0, "unwind.%s: ignoring attempt to write read-only location\n", - __func__); - } else - *addr = *val; - else - *val = *addr; - return 0; -} -EXPORT_SYMBOL(unw_access_br); - -int -unw_access_fr (struct unw_frame_info *info, int regnum, struct ia64_fpreg *val, int write) -{ - struct ia64_fpreg *addr = NULL; - struct pt_regs *pt; - - if ((unsigned) (regnum - 2) >= 126) { - UNW_DPRINT(0, "unwind.%s: trying to access non-existent f%u\n", - __func__, regnum); - return -1; - } - - if (regnum <= 5) { - addr = *(&info->f2_loc + (regnum - 2)); - if (!addr) - addr = &info->sw->f2 + (regnum - 2); - } else if (regnum <= 15) { - if (regnum <= 11) { - pt = get_scratch_regs(info); - addr = &pt->f6 + (regnum - 6); - } - else - addr = &info->sw->f12 + (regnum - 12); - } else if (regnum <= 31) { - addr = info->fr_loc[regnum - 16]; - if (!addr) - addr = &info->sw->f16 + (regnum - 16); - } else { - struct task_struct *t = info->task; - - if (write) - ia64_sync_fph(t); - else - ia64_flush_fph(t); - addr = t->thread.fph + (regnum - 32); - } - - if (write) - if (read_only(addr)) { - UNW_DPRINT(0, "unwind.%s: ignoring attempt to write read-only location\n", - __func__); - } else - *addr = *val; - else - *val = *addr; - return 0; -} -EXPORT_SYMBOL(unw_access_fr); - -int -unw_access_ar (struct unw_frame_info *info, int regnum, unsigned long *val, int write) -{ - unsigned long *addr; - struct pt_regs *pt; - - switch (regnum) { - case UNW_AR_BSP: - addr = info->bsp_loc; - if (!addr) - addr = &info->sw->ar_bspstore; - break; - - case UNW_AR_BSPSTORE: - addr = info->bspstore_loc; - if (!addr) - addr = &info->sw->ar_bspstore; - break; - - case UNW_AR_PFS: - addr = info->pfs_loc; - if (!addr) - addr = &info->sw->ar_pfs; - break; - - case UNW_AR_RNAT: - addr = info->rnat_loc; - if (!addr) - addr = &info->sw->ar_rnat; - break; - - case UNW_AR_UNAT: - addr = info->unat_loc; - if (!addr) - addr = &info->sw->caller_unat; - break; - - case UNW_AR_LC: - addr = info->lc_loc; - if (!addr) - addr = &info->sw->ar_lc; - break; - - case UNW_AR_EC: - if (!info->cfm_loc) - return -1; - if (write) - *info->cfm_loc = - (*info->cfm_loc & ~(0x3fUL << 52)) | ((*val & 0x3f) << 52); - else - *val = (*info->cfm_loc >> 52) & 0x3f; - return 0; - - case UNW_AR_FPSR: - addr = info->fpsr_loc; - if (!addr) - addr = &info->sw->ar_fpsr; - break; - - case UNW_AR_RSC: - pt = get_scratch_regs(info); - addr = &pt->ar_rsc; - break; - - case UNW_AR_CCV: - pt = get_scratch_regs(info); - addr = &pt->ar_ccv; - break; - - case UNW_AR_CSD: - pt = get_scratch_regs(info); - addr = &pt->ar_csd; - break; - - case UNW_AR_SSD: - pt = get_scratch_regs(info); - addr = &pt->ar_ssd; - break; - - default: - UNW_DPRINT(0, "unwind.%s: trying to access non-existent ar%u\n", - __func__, regnum); - return -1; - } - - if (write) { - if (read_only(addr)) { - UNW_DPRINT(0, "unwind.%s: ignoring attempt to write read-only location\n", - __func__); - } else - *addr = *val; - } else - *val = *addr; - return 0; -} -EXPORT_SYMBOL(unw_access_ar); - -int -unw_access_pr (struct unw_frame_info *info, unsigned long *val, int write) -{ - unsigned long *addr; - - addr = info->pr_loc; - if (!addr) - addr = &info->sw->pr; - - if (write) { - if (read_only(addr)) { - UNW_DPRINT(0, "unwind.%s: ignoring attempt to write read-only location\n", - __func__); - } else - *addr = *val; - } else - *val = *addr; - return 0; -} -EXPORT_SYMBOL(unw_access_pr); - - -/* Routines to manipulate the state stack. */ - -static inline void -push (struct unw_state_record *sr) -{ - struct unw_reg_state *rs; - - rs = alloc_reg_state(); - if (!rs) { - printk(KERN_ERR "unwind: cannot stack reg state!\n"); - return; - } - memcpy(rs, &sr->curr, sizeof(*rs)); - sr->curr.next = rs; -} - -static void -pop (struct unw_state_record *sr) -{ - struct unw_reg_state *rs = sr->curr.next; - - if (!rs) { - printk(KERN_ERR "unwind: stack underflow!\n"); - return; - } - memcpy(&sr->curr, rs, sizeof(*rs)); - free_reg_state(rs); -} - -/* Make a copy of the state stack. Non-recursive to avoid stack overflows. */ -static struct unw_reg_state * -dup_state_stack (struct unw_reg_state *rs) -{ - struct unw_reg_state *copy, *prev = NULL, *first = NULL; - - while (rs) { - copy = alloc_reg_state(); - if (!copy) { - printk(KERN_ERR "unwind.dup_state_stack: out of memory\n"); - return NULL; - } - memcpy(copy, rs, sizeof(*copy)); - if (first) - prev->next = copy; - else - first = copy; - rs = rs->next; - prev = copy; - } - return first; -} - -/* Free all stacked register states (but not RS itself). */ -static void -free_state_stack (struct unw_reg_state *rs) -{ - struct unw_reg_state *p, *next; - - for (p = rs->next; p != NULL; p = next) { - next = p->next; - free_reg_state(p); - } - rs->next = NULL; -} - -/* Unwind decoder routines */ - -static enum unw_register_index __attribute_const__ -decode_abreg (unsigned char abreg, int memory) -{ - switch (abreg) { - case 0x04 ... 0x07: return UNW_REG_R4 + (abreg - 0x04); - case 0x22 ... 0x25: return UNW_REG_F2 + (abreg - 0x22); - case 0x30 ... 0x3f: return UNW_REG_F16 + (abreg - 0x30); - case 0x41 ... 0x45: return UNW_REG_B1 + (abreg - 0x41); - case 0x60: return UNW_REG_PR; - case 0x61: return UNW_REG_PSP; - case 0x62: return memory ? UNW_REG_PRI_UNAT_MEM : UNW_REG_PRI_UNAT_GR; - case 0x63: return UNW_REG_RP; - case 0x64: return UNW_REG_BSP; - case 0x65: return UNW_REG_BSPSTORE; - case 0x66: return UNW_REG_RNAT; - case 0x67: return UNW_REG_UNAT; - case 0x68: return UNW_REG_FPSR; - case 0x69: return UNW_REG_PFS; - case 0x6a: return UNW_REG_LC; - default: - break; - } - UNW_DPRINT(0, "unwind.%s: bad abreg=0x%x\n", __func__, abreg); - return UNW_REG_LC; -} - -static void -set_reg (struct unw_reg_info *reg, enum unw_where where, int when, unsigned long val) -{ - reg->val = val; - reg->where = where; - if (reg->when == UNW_WHEN_NEVER) - reg->when = when; -} - -static void -alloc_spill_area (unsigned long *offp, unsigned long regsize, - struct unw_reg_info *lo, struct unw_reg_info *hi) -{ - struct unw_reg_info *reg; - - for (reg = hi; reg >= lo; --reg) { - if (reg->where == UNW_WHERE_SPILL_HOME) { - reg->where = UNW_WHERE_PSPREL; - *offp -= regsize; - reg->val = *offp; - } - } -} - -static inline void -spill_next_when (struct unw_reg_info **regp, struct unw_reg_info *lim, unw_word t) -{ - struct unw_reg_info *reg; - - for (reg = *regp; reg <= lim; ++reg) { - if (reg->where == UNW_WHERE_SPILL_HOME) { - reg->when = t; - *regp = reg + 1; - return; - } - } - UNW_DPRINT(0, "unwind.%s: excess spill!\n", __func__); -} - -static inline void -finish_prologue (struct unw_state_record *sr) -{ - struct unw_reg_info *reg; - unsigned long off; - int i; - - /* - * First, resolve implicit register save locations (see Section "11.4.2.3 Rules - * for Using Unwind Descriptors", rule 3): - */ - for (i = 0; i < (int) ARRAY_SIZE(unw.save_order); ++i) { - reg = sr->curr.reg + unw.save_order[i]; - if (reg->where == UNW_WHERE_GR_SAVE) { - reg->where = UNW_WHERE_GR; - reg->val = sr->gr_save_loc++; - } - } - - /* - * Next, compute when the fp, general, and branch registers get - * saved. This must come before alloc_spill_area() because - * we need to know which registers are spilled to their home - * locations. - */ - if (sr->imask) { - unsigned char kind, mask = 0, *cp = sr->imask; - int t; - static const unsigned char limit[3] = { - UNW_REG_F31, UNW_REG_R7, UNW_REG_B5 - }; - struct unw_reg_info *(regs[3]); - - regs[0] = sr->curr.reg + UNW_REG_F2; - regs[1] = sr->curr.reg + UNW_REG_R4; - regs[2] = sr->curr.reg + UNW_REG_B1; - - for (t = 0; t < sr->region_len; ++t) { - if ((t & 3) == 0) - mask = *cp++; - kind = (mask >> 2*(3-(t & 3))) & 3; - if (kind > 0) - spill_next_when(®s[kind - 1], sr->curr.reg + limit[kind - 1], - sr->region_start + t); - } - } - /* - * Next, lay out the memory stack spill area: - */ - if (sr->any_spills) { - off = sr->spill_offset; - alloc_spill_area(&off, 16, sr->curr.reg + UNW_REG_F2, sr->curr.reg + UNW_REG_F31); - alloc_spill_area(&off, 8, sr->curr.reg + UNW_REG_B1, sr->curr.reg + UNW_REG_B5); - alloc_spill_area(&off, 8, sr->curr.reg + UNW_REG_R4, sr->curr.reg + UNW_REG_R7); - } -} - -/* - * Region header descriptors. - */ - -static void -desc_prologue (int body, unw_word rlen, unsigned char mask, unsigned char grsave, - struct unw_state_record *sr) -{ - int i, region_start; - - if (!(sr->in_body || sr->first_region)) - finish_prologue(sr); - sr->first_region = 0; - - /* check if we're done: */ - if (sr->when_target < sr->region_start + sr->region_len) { - sr->done = 1; - return; - } - - region_start = sr->region_start + sr->region_len; - - for (i = 0; i < sr->epilogue_count; ++i) - pop(sr); - sr->epilogue_count = 0; - sr->epilogue_start = UNW_WHEN_NEVER; - - sr->region_start = region_start; - sr->region_len = rlen; - sr->in_body = body; - - if (!body) { - push(sr); - - for (i = 0; i < 4; ++i) { - if (mask & 0x8) - set_reg(sr->curr.reg + unw.save_order[i], UNW_WHERE_GR, - sr->region_start + sr->region_len - 1, grsave++); - mask <<= 1; - } - sr->gr_save_loc = grsave; - sr->any_spills = 0; - sr->imask = NULL; - sr->spill_offset = 0x10; /* default to psp+16 */ - } -} - -/* - * Prologue descriptors. - */ - -static inline void -desc_abi (unsigned char abi, unsigned char context, struct unw_state_record *sr) -{ - if (abi == 3 && context == 'i') { - sr->flags |= UNW_FLAG_INTERRUPT_FRAME; - UNW_DPRINT(3, "unwind.%s: interrupt frame\n", __func__); - } - else - UNW_DPRINT(0, "unwind%s: ignoring unwabi(abi=0x%x,context=0x%x)\n", - __func__, abi, context); -} - -static inline void -desc_br_gr (unsigned char brmask, unsigned char gr, struct unw_state_record *sr) -{ - int i; - - for (i = 0; i < 5; ++i) { - if (brmask & 1) - set_reg(sr->curr.reg + UNW_REG_B1 + i, UNW_WHERE_GR, - sr->region_start + sr->region_len - 1, gr++); - brmask >>= 1; - } -} - -static inline void -desc_br_mem (unsigned char brmask, struct unw_state_record *sr) -{ - int i; - - for (i = 0; i < 5; ++i) { - if (brmask & 1) { - set_reg(sr->curr.reg + UNW_REG_B1 + i, UNW_WHERE_SPILL_HOME, - sr->region_start + sr->region_len - 1, 0); - sr->any_spills = 1; - } - brmask >>= 1; - } -} - -static inline void -desc_frgr_mem (unsigned char grmask, unw_word frmask, struct unw_state_record *sr) -{ - int i; - - for (i = 0; i < 4; ++i) { - if ((grmask & 1) != 0) { - set_reg(sr->curr.reg + UNW_REG_R4 + i, UNW_WHERE_SPILL_HOME, - sr->region_start + sr->region_len - 1, 0); - sr->any_spills = 1; - } - grmask >>= 1; - } - for (i = 0; i < 20; ++i) { - if ((frmask & 1) != 0) { - int base = (i < 4) ? UNW_REG_F2 : UNW_REG_F16 - 4; - set_reg(sr->curr.reg + base + i, UNW_WHERE_SPILL_HOME, - sr->region_start + sr->region_len - 1, 0); - sr->any_spills = 1; - } - frmask >>= 1; - } -} - -static inline void -desc_fr_mem (unsigned char frmask, struct unw_state_record *sr) -{ - int i; - - for (i = 0; i < 4; ++i) { - if ((frmask & 1) != 0) { - set_reg(sr->curr.reg + UNW_REG_F2 + i, UNW_WHERE_SPILL_HOME, - sr->region_start + sr->region_len - 1, 0); - sr->any_spills = 1; - } - frmask >>= 1; - } -} - -static inline void -desc_gr_gr (unsigned char grmask, unsigned char gr, struct unw_state_record *sr) -{ - int i; - - for (i = 0; i < 4; ++i) { - if ((grmask & 1) != 0) - set_reg(sr->curr.reg + UNW_REG_R4 + i, UNW_WHERE_GR, - sr->region_start + sr->region_len - 1, gr++); - grmask >>= 1; - } -} - -static inline void -desc_gr_mem (unsigned char grmask, struct unw_state_record *sr) -{ - int i; - - for (i = 0; i < 4; ++i) { - if ((grmask & 1) != 0) { - set_reg(sr->curr.reg + UNW_REG_R4 + i, UNW_WHERE_SPILL_HOME, - sr->region_start + sr->region_len - 1, 0); - sr->any_spills = 1; - } - grmask >>= 1; - } -} - -static inline void -desc_mem_stack_f (unw_word t, unw_word size, struct unw_state_record *sr) -{ - set_reg(sr->curr.reg + UNW_REG_PSP, UNW_WHERE_NONE, - sr->region_start + min_t(int, t, sr->region_len - 1), 16*size); -} - -static inline void -desc_mem_stack_v (unw_word t, struct unw_state_record *sr) -{ - sr->curr.reg[UNW_REG_PSP].when = sr->region_start + min_t(int, t, sr->region_len - 1); -} - -static inline void -desc_reg_gr (unsigned char reg, unsigned char dst, struct unw_state_record *sr) -{ - set_reg(sr->curr.reg + reg, UNW_WHERE_GR, sr->region_start + sr->region_len - 1, dst); -} - -static inline void -desc_reg_psprel (unsigned char reg, unw_word pspoff, struct unw_state_record *sr) -{ - set_reg(sr->curr.reg + reg, UNW_WHERE_PSPREL, sr->region_start + sr->region_len - 1, - 0x10 - 4*pspoff); -} - -static inline void -desc_reg_sprel (unsigned char reg, unw_word spoff, struct unw_state_record *sr) -{ - set_reg(sr->curr.reg + reg, UNW_WHERE_SPREL, sr->region_start + sr->region_len - 1, - 4*spoff); -} - -static inline void -desc_rp_br (unsigned char dst, struct unw_state_record *sr) -{ - sr->return_link_reg = dst; -} - -static inline void -desc_reg_when (unsigned char regnum, unw_word t, struct unw_state_record *sr) -{ - struct unw_reg_info *reg = sr->curr.reg + regnum; - - if (reg->where == UNW_WHERE_NONE) - reg->where = UNW_WHERE_GR_SAVE; - reg->when = sr->region_start + min_t(int, t, sr->region_len - 1); -} - -static inline void -desc_spill_base (unw_word pspoff, struct unw_state_record *sr) -{ - sr->spill_offset = 0x10 - 4*pspoff; -} - -static inline unsigned char * -desc_spill_mask (unsigned char *imaskp, struct unw_state_record *sr) -{ - sr->imask = imaskp; - return imaskp + (2*sr->region_len + 7)/8; -} - -/* - * Body descriptors. - */ -static inline void -desc_epilogue (unw_word t, unw_word ecount, struct unw_state_record *sr) -{ - sr->epilogue_start = sr->region_start + sr->region_len - 1 - t; - sr->epilogue_count = ecount + 1; -} - -static inline void -desc_copy_state (unw_word label, struct unw_state_record *sr) -{ - struct unw_labeled_state *ls; - - for (ls = sr->labeled_states; ls; ls = ls->next) { - if (ls->label == label) { - free_state_stack(&sr->curr); - memcpy(&sr->curr, &ls->saved_state, sizeof(sr->curr)); - sr->curr.next = dup_state_stack(ls->saved_state.next); - return; - } - } - printk(KERN_ERR "unwind: failed to find state labeled 0x%lx\n", label); -} - -static inline void -desc_label_state (unw_word label, struct unw_state_record *sr) -{ - struct unw_labeled_state *ls; - - ls = alloc_labeled_state(); - if (!ls) { - printk(KERN_ERR "unwind.desc_label_state(): out of memory\n"); - return; - } - ls->label = label; - memcpy(&ls->saved_state, &sr->curr, sizeof(ls->saved_state)); - ls->saved_state.next = dup_state_stack(sr->curr.next); - - /* insert into list of labeled states: */ - ls->next = sr->labeled_states; - sr->labeled_states = ls; -} - -/* - * General descriptors. - */ - -static inline int -desc_is_active (unsigned char qp, unw_word t, struct unw_state_record *sr) -{ - if (sr->when_target <= sr->region_start + min_t(int, t, sr->region_len - 1)) - return 0; - if (qp > 0) { - if ((sr->pr_val & (1UL << qp)) == 0) - return 0; - sr->pr_mask |= (1UL << qp); - } - return 1; -} - -static inline void -desc_restore_p (unsigned char qp, unw_word t, unsigned char abreg, struct unw_state_record *sr) -{ - struct unw_reg_info *r; - - if (!desc_is_active(qp, t, sr)) - return; - - r = sr->curr.reg + decode_abreg(abreg, 0); - r->where = UNW_WHERE_NONE; - r->when = UNW_WHEN_NEVER; - r->val = 0; -} - -static inline void -desc_spill_reg_p (unsigned char qp, unw_word t, unsigned char abreg, unsigned char x, - unsigned char ytreg, struct unw_state_record *sr) -{ - enum unw_where where = UNW_WHERE_GR; - struct unw_reg_info *r; - - if (!desc_is_active(qp, t, sr)) - return; - - if (x) - where = UNW_WHERE_BR; - else if (ytreg & 0x80) - where = UNW_WHERE_FR; - - r = sr->curr.reg + decode_abreg(abreg, 0); - r->where = where; - r->when = sr->region_start + min_t(int, t, sr->region_len - 1); - r->val = (ytreg & 0x7f); -} - -static inline void -desc_spill_psprel_p (unsigned char qp, unw_word t, unsigned char abreg, unw_word pspoff, - struct unw_state_record *sr) -{ - struct unw_reg_info *r; - - if (!desc_is_active(qp, t, sr)) - return; - - r = sr->curr.reg + decode_abreg(abreg, 1); - r->where = UNW_WHERE_PSPREL; - r->when = sr->region_start + min_t(int, t, sr->region_len - 1); - r->val = 0x10 - 4*pspoff; -} - -static inline void -desc_spill_sprel_p (unsigned char qp, unw_word t, unsigned char abreg, unw_word spoff, - struct unw_state_record *sr) -{ - struct unw_reg_info *r; - - if (!desc_is_active(qp, t, sr)) - return; - - r = sr->curr.reg + decode_abreg(abreg, 1); - r->where = UNW_WHERE_SPREL; - r->when = sr->region_start + min_t(int, t, sr->region_len - 1); - r->val = 4*spoff; -} - -#define UNW_DEC_BAD_CODE(code) printk(KERN_ERR "unwind: unknown code 0x%02x\n", \ - code); - -/* - * region headers: - */ -#define UNW_DEC_PROLOGUE_GR(fmt,r,m,gr,arg) desc_prologue(0,r,m,gr,arg) -#define UNW_DEC_PROLOGUE(fmt,b,r,arg) desc_prologue(b,r,0,32,arg) -/* - * prologue descriptors: - */ -#define UNW_DEC_ABI(fmt,a,c,arg) desc_abi(a,c,arg) -#define UNW_DEC_BR_GR(fmt,b,g,arg) desc_br_gr(b,g,arg) -#define UNW_DEC_BR_MEM(fmt,b,arg) desc_br_mem(b,arg) -#define UNW_DEC_FRGR_MEM(fmt,g,f,arg) desc_frgr_mem(g,f,arg) -#define UNW_DEC_FR_MEM(fmt,f,arg) desc_fr_mem(f,arg) -#define UNW_DEC_GR_GR(fmt,m,g,arg) desc_gr_gr(m,g,arg) -#define UNW_DEC_GR_MEM(fmt,m,arg) desc_gr_mem(m,arg) -#define UNW_DEC_MEM_STACK_F(fmt,t,s,arg) desc_mem_stack_f(t,s,arg) -#define UNW_DEC_MEM_STACK_V(fmt,t,arg) desc_mem_stack_v(t,arg) -#define UNW_DEC_REG_GR(fmt,r,d,arg) desc_reg_gr(r,d,arg) -#define UNW_DEC_REG_PSPREL(fmt,r,o,arg) desc_reg_psprel(r,o,arg) -#define UNW_DEC_REG_SPREL(fmt,r,o,arg) desc_reg_sprel(r,o,arg) -#define UNW_DEC_REG_WHEN(fmt,r,t,arg) desc_reg_when(r,t,arg) -#define UNW_DEC_PRIUNAT_WHEN_GR(fmt,t,arg) desc_reg_when(UNW_REG_PRI_UNAT_GR,t,arg) -#define UNW_DEC_PRIUNAT_WHEN_MEM(fmt,t,arg) desc_reg_when(UNW_REG_PRI_UNAT_MEM,t,arg) -#define UNW_DEC_PRIUNAT_GR(fmt,r,arg) desc_reg_gr(UNW_REG_PRI_UNAT_GR,r,arg) -#define UNW_DEC_PRIUNAT_PSPREL(fmt,o,arg) desc_reg_psprel(UNW_REG_PRI_UNAT_MEM,o,arg) -#define UNW_DEC_PRIUNAT_SPREL(fmt,o,arg) desc_reg_sprel(UNW_REG_PRI_UNAT_MEM,o,arg) -#define UNW_DEC_RP_BR(fmt,d,arg) desc_rp_br(d,arg) -#define UNW_DEC_SPILL_BASE(fmt,o,arg) desc_spill_base(o,arg) -#define UNW_DEC_SPILL_MASK(fmt,m,arg) (m = desc_spill_mask(m,arg)) -/* - * body descriptors: - */ -#define UNW_DEC_EPILOGUE(fmt,t,c,arg) desc_epilogue(t,c,arg) -#define UNW_DEC_COPY_STATE(fmt,l,arg) desc_copy_state(l,arg) -#define UNW_DEC_LABEL_STATE(fmt,l,arg) desc_label_state(l,arg) -/* - * general unwind descriptors: - */ -#define UNW_DEC_SPILL_REG_P(f,p,t,a,x,y,arg) desc_spill_reg_p(p,t,a,x,y,arg) -#define UNW_DEC_SPILL_REG(f,t,a,x,y,arg) desc_spill_reg_p(0,t,a,x,y,arg) -#define UNW_DEC_SPILL_PSPREL_P(f,p,t,a,o,arg) desc_spill_psprel_p(p,t,a,o,arg) -#define UNW_DEC_SPILL_PSPREL(f,t,a,o,arg) desc_spill_psprel_p(0,t,a,o,arg) -#define UNW_DEC_SPILL_SPREL_P(f,p,t,a,o,arg) desc_spill_sprel_p(p,t,a,o,arg) -#define UNW_DEC_SPILL_SPREL(f,t,a,o,arg) desc_spill_sprel_p(0,t,a,o,arg) -#define UNW_DEC_RESTORE_P(f,p,t,a,arg) desc_restore_p(p,t,a,arg) -#define UNW_DEC_RESTORE(f,t,a,arg) desc_restore_p(0,t,a,arg) - -#include "unwind_decoder.c" - - -/* Unwind scripts. */ - -static inline unw_hash_index_t -hash (unsigned long ip) -{ - /* magic number = ((sqrt(5)-1)/2)*2^64 */ - static const unsigned long hashmagic = 0x9e3779b97f4a7c16UL; - - return (ip >> 4) * hashmagic >> (64 - UNW_LOG_HASH_SIZE); -} - -static inline long -cache_match (struct unw_script *script, unsigned long ip, unsigned long pr) -{ - read_lock(&script->lock); - if (ip == script->ip && ((pr ^ script->pr_val) & script->pr_mask) == 0) - /* keep the read lock... */ - return 1; - read_unlock(&script->lock); - return 0; -} - -static inline struct unw_script * -script_lookup (struct unw_frame_info *info) -{ - struct unw_script *script = unw.cache + info->hint; - unsigned short index; - unsigned long ip, pr; - - if (UNW_DEBUG_ON(0)) - return NULL; /* Always regenerate scripts in debug mode */ - - STAT(++unw.stat.cache.lookups); - - ip = info->ip; - pr = info->pr; - - if (cache_match(script, ip, pr)) { - STAT(++unw.stat.cache.hinted_hits); - return script; - } - - index = unw.hash[hash(ip)]; - if (index >= UNW_CACHE_SIZE) - return NULL; - - script = unw.cache + index; - while (1) { - if (cache_match(script, ip, pr)) { - /* update hint; no locking required as single-word writes are atomic */ - STAT(++unw.stat.cache.normal_hits); - unw.cache[info->prev_script].hint = script - unw.cache; - return script; - } - if (script->coll_chain >= UNW_HASH_SIZE) - return NULL; - script = unw.cache + script->coll_chain; - STAT(++unw.stat.cache.collision_chain_traversals); - } -} - -/* - * On returning, a write lock for the SCRIPT is still being held. - */ -static inline struct unw_script * -script_new (unsigned long ip) -{ - struct unw_script *script, *prev, *tmp; - unw_hash_index_t index; - unsigned short head; - - STAT(++unw.stat.script.news); - - /* - * Can't (easily) use cmpxchg() here because of ABA problem - * that is intrinsic in cmpxchg()... - */ - head = unw.lru_head; - script = unw.cache + head; - unw.lru_head = script->lru_chain; - - /* - * We'd deadlock here if we interrupted a thread that is holding a read lock on - * script->lock. Thus, if the write_trylock() fails, we simply bail out. The - * alternative would be to disable interrupts whenever we hold a read-lock, but - * that seems silly. - */ - if (!write_trylock(&script->lock)) - return NULL; - - /* re-insert script at the tail of the LRU chain: */ - unw.cache[unw.lru_tail].lru_chain = head; - unw.lru_tail = head; - - /* remove the old script from the hash table (if it's there): */ - if (script->ip) { - index = hash(script->ip); - tmp = unw.cache + unw.hash[index]; - prev = NULL; - while (1) { - if (tmp == script) { - if (prev) - prev->coll_chain = tmp->coll_chain; - else - unw.hash[index] = tmp->coll_chain; - break; - } else - prev = tmp; - if (tmp->coll_chain >= UNW_CACHE_SIZE) - /* old script wasn't in the hash-table */ - break; - tmp = unw.cache + tmp->coll_chain; - } - } - - /* enter new script in the hash table */ - index = hash(ip); - script->coll_chain = unw.hash[index]; - unw.hash[index] = script - unw.cache; - - script->ip = ip; /* set new IP while we're holding the locks */ - - STAT(if (script->coll_chain < UNW_CACHE_SIZE) ++unw.stat.script.collisions); - - script->flags = 0; - script->hint = 0; - script->count = 0; - return script; -} - -static void -script_finalize (struct unw_script *script, struct unw_state_record *sr) -{ - script->pr_mask = sr->pr_mask; - script->pr_val = sr->pr_val; - /* - * We could down-grade our write-lock on script->lock here but - * the rwlock API doesn't offer atomic lock downgrading, so - * we'll just keep the write-lock and release it later when - * we're done using the script. - */ -} - -static inline void -script_emit (struct unw_script *script, struct unw_insn insn) -{ - if (script->count >= UNW_MAX_SCRIPT_LEN) { - UNW_DPRINT(0, "unwind.%s: script exceeds maximum size of %u instructions!\n", - __func__, UNW_MAX_SCRIPT_LEN); - return; - } - script->insn[script->count++] = insn; -} - -static inline void -emit_nat_info (struct unw_state_record *sr, int i, struct unw_script *script) -{ - struct unw_reg_info *r = sr->curr.reg + i; - enum unw_insn_opcode opc; - struct unw_insn insn; - unsigned long val = 0; - - switch (r->where) { - case UNW_WHERE_GR: - if (r->val >= 32) { - /* register got spilled to a stacked register */ - opc = UNW_INSN_SETNAT_TYPE; - val = UNW_NAT_REGSTK; - } else - /* register got spilled to a scratch register */ - opc = UNW_INSN_SETNAT_MEMSTK; - break; - - case UNW_WHERE_FR: - opc = UNW_INSN_SETNAT_TYPE; - val = UNW_NAT_VAL; - break; - - case UNW_WHERE_BR: - opc = UNW_INSN_SETNAT_TYPE; - val = UNW_NAT_NONE; - break; - - case UNW_WHERE_PSPREL: - case UNW_WHERE_SPREL: - opc = UNW_INSN_SETNAT_MEMSTK; - break; - - default: - UNW_DPRINT(0, "unwind.%s: don't know how to emit nat info for where = %u\n", - __func__, r->where); - return; - } - insn.opc = opc; - insn.dst = unw.preg_index[i]; - insn.val = val; - script_emit(script, insn); -} - -static void -compile_reg (struct unw_state_record *sr, int i, struct unw_script *script) -{ - struct unw_reg_info *r = sr->curr.reg + i; - enum unw_insn_opcode opc; - unsigned long val, rval; - struct unw_insn insn; - long need_nat_info; - - if (r->where == UNW_WHERE_NONE || r->when >= sr->when_target) - return; - - opc = UNW_INSN_MOVE; - val = rval = r->val; - need_nat_info = (i >= UNW_REG_R4 && i <= UNW_REG_R7); - - switch (r->where) { - case UNW_WHERE_GR: - if (rval >= 32) { - opc = UNW_INSN_MOVE_STACKED; - val = rval - 32; - } else if (rval >= 4 && rval <= 7) { - if (need_nat_info) { - opc = UNW_INSN_MOVE2; - need_nat_info = 0; - } - val = unw.preg_index[UNW_REG_R4 + (rval - 4)]; - } else if (rval == 0) { - opc = UNW_INSN_MOVE_CONST; - val = 0; - } else { - /* register got spilled to a scratch register */ - opc = UNW_INSN_MOVE_SCRATCH; - val = pt_regs_off(rval); - } - break; - - case UNW_WHERE_FR: - if (rval <= 5) - val = unw.preg_index[UNW_REG_F2 + (rval - 2)]; - else if (rval >= 16 && rval <= 31) - val = unw.preg_index[UNW_REG_F16 + (rval - 16)]; - else { - opc = UNW_INSN_MOVE_SCRATCH; - if (rval <= 11) - val = offsetof(struct pt_regs, f6) + 16*(rval - 6); - else - UNW_DPRINT(0, "unwind.%s: kernel may not touch f%lu\n", - __func__, rval); - } - break; - - case UNW_WHERE_BR: - if (rval >= 1 && rval <= 5) - val = unw.preg_index[UNW_REG_B1 + (rval - 1)]; - else { - opc = UNW_INSN_MOVE_SCRATCH; - if (rval == 0) - val = offsetof(struct pt_regs, b0); - else if (rval == 6) - val = offsetof(struct pt_regs, b6); - else - val = offsetof(struct pt_regs, b7); - } - break; - - case UNW_WHERE_SPREL: - opc = UNW_INSN_ADD_SP; - break; - - case UNW_WHERE_PSPREL: - opc = UNW_INSN_ADD_PSP; - break; - - default: - UNW_DPRINT(0, "unwind%s: register %u has unexpected `where' value of %u\n", - __func__, i, r->where); - break; - } - insn.opc = opc; - insn.dst = unw.preg_index[i]; - insn.val = val; - script_emit(script, insn); - if (need_nat_info) - emit_nat_info(sr, i, script); - - if (i == UNW_REG_PSP) { - /* - * info->psp must contain the _value_ of the previous - * sp, not it's save location. We get this by - * dereferencing the value we just stored in - * info->psp: - */ - insn.opc = UNW_INSN_LOAD; - insn.dst = insn.val = unw.preg_index[UNW_REG_PSP]; - script_emit(script, insn); - } -} - -static inline const struct unw_table_entry * -lookup (struct unw_table *table, unsigned long rel_ip) -{ - const struct unw_table_entry *e = NULL; - unsigned long lo, hi, mid; - - /* do a binary search for right entry: */ - for (lo = 0, hi = table->length; lo < hi; ) { - mid = (lo + hi) / 2; - e = &table->array[mid]; - if (rel_ip < e->start_offset) - hi = mid; - else if (rel_ip >= e->end_offset) - lo = mid + 1; - else - break; - } - if (rel_ip < e->start_offset || rel_ip >= e->end_offset) - return NULL; - return e; -} - -/* - * Build an unwind script that unwinds from state OLD_STATE to the - * entrypoint of the function that called OLD_STATE. - */ -static inline struct unw_script * -build_script (struct unw_frame_info *info) -{ - const struct unw_table_entry *e = NULL; - struct unw_script *script = NULL; - struct unw_labeled_state *ls, *next; - unsigned long ip = info->ip; - struct unw_state_record sr; - struct unw_table *table, *prev; - struct unw_reg_info *r; - struct unw_insn insn; - u8 *dp, *desc_end; - u64 hdr; - int i; - STAT(unsigned long start, parse_start;) - - STAT(++unw.stat.script.builds; start = ia64_get_itc()); - - /* build state record */ - memset(&sr, 0, sizeof(sr)); - for (r = sr.curr.reg; r < sr.curr.reg + UNW_NUM_REGS; ++r) - r->when = UNW_WHEN_NEVER; - sr.pr_val = info->pr; - - UNW_DPRINT(3, "unwind.%s: ip 0x%lx\n", __func__, ip); - script = script_new(ip); - if (!script) { - UNW_DPRINT(0, "unwind.%s: failed to create unwind script\n", __func__); - STAT(unw.stat.script.build_time += ia64_get_itc() - start); - return NULL; - } - unw.cache[info->prev_script].hint = script - unw.cache; - - /* search the kernels and the modules' unwind tables for IP: */ - - STAT(parse_start = ia64_get_itc()); - - prev = NULL; - for (table = unw.tables; table; table = table->next) { - if (ip >= table->start && ip < table->end) { - /* - * Leave the kernel unwind table at the very front, - * lest moving it breaks some assumption elsewhere. - * Otherwise, move the matching table to the second - * position in the list so that traversals can benefit - * from commonality in backtrace paths. - */ - if (prev && prev != unw.tables) { - /* unw is safe - we're already spinlocked */ - prev->next = table->next; - table->next = unw.tables->next; - unw.tables->next = table; - } - e = lookup(table, ip - table->segment_base); - break; - } - prev = table; - } - if (!e) { - /* no info, return default unwinder (leaf proc, no mem stack, no saved regs) */ - UNW_DPRINT(1, "unwind.%s: no unwind info for ip=0x%lx (prev ip=0x%lx)\n", - __func__, ip, unw.cache[info->prev_script].ip); - sr.curr.reg[UNW_REG_RP].where = UNW_WHERE_BR; - sr.curr.reg[UNW_REG_RP].when = -1; - sr.curr.reg[UNW_REG_RP].val = 0; - compile_reg(&sr, UNW_REG_RP, script); - script_finalize(script, &sr); - STAT(unw.stat.script.parse_time += ia64_get_itc() - parse_start); - STAT(unw.stat.script.build_time += ia64_get_itc() - start); - return script; - } - - sr.when_target = (3*((ip & ~0xfUL) - (table->segment_base + e->start_offset))/16 - + (ip & 0xfUL)); - hdr = *(u64 *) (table->segment_base + e->info_offset); - dp = (u8 *) (table->segment_base + e->info_offset + 8); - desc_end = dp + 8*UNW_LENGTH(hdr); - - while (!sr.done && dp < desc_end) - dp = unw_decode(dp, sr.in_body, &sr); - - if (sr.when_target > sr.epilogue_start) { - /* - * sp has been restored and all values on the memory stack below - * psp also have been restored. - */ - sr.curr.reg[UNW_REG_PSP].val = 0; - sr.curr.reg[UNW_REG_PSP].where = UNW_WHERE_NONE; - sr.curr.reg[UNW_REG_PSP].when = UNW_WHEN_NEVER; - for (r = sr.curr.reg; r < sr.curr.reg + UNW_NUM_REGS; ++r) - if ((r->where == UNW_WHERE_PSPREL && r->val <= 0x10) - || r->where == UNW_WHERE_SPREL) - { - r->val = 0; - r->where = UNW_WHERE_NONE; - r->when = UNW_WHEN_NEVER; - } - } - - script->flags = sr.flags; - - /* - * If RP did't get saved, generate entry for the return link - * register. - */ - if (sr.curr.reg[UNW_REG_RP].when >= sr.when_target) { - sr.curr.reg[UNW_REG_RP].where = UNW_WHERE_BR; - sr.curr.reg[UNW_REG_RP].when = -1; - sr.curr.reg[UNW_REG_RP].val = sr.return_link_reg; - UNW_DPRINT(1, "unwind.%s: using default for rp at ip=0x%lx where=%d val=0x%lx\n", - __func__, ip, sr.curr.reg[UNW_REG_RP].where, - sr.curr.reg[UNW_REG_RP].val); - } - -#ifdef UNW_DEBUG - UNW_DPRINT(1, "unwind.%s: state record for func 0x%lx, t=%u:\n", - __func__, table->segment_base + e->start_offset, sr.when_target); - for (r = sr.curr.reg; r < sr.curr.reg + UNW_NUM_REGS; ++r) { - if (r->where != UNW_WHERE_NONE || r->when != UNW_WHEN_NEVER) { - UNW_DPRINT(1, " %s <- ", unw.preg_name[r - sr.curr.reg]); - switch (r->where) { - case UNW_WHERE_GR: UNW_DPRINT(1, "r%lu", r->val); break; - case UNW_WHERE_FR: UNW_DPRINT(1, "f%lu", r->val); break; - case UNW_WHERE_BR: UNW_DPRINT(1, "b%lu", r->val); break; - case UNW_WHERE_SPREL: UNW_DPRINT(1, "[sp+0x%lx]", r->val); break; - case UNW_WHERE_PSPREL: UNW_DPRINT(1, "[psp+0x%lx]", r->val); break; - case UNW_WHERE_NONE: - UNW_DPRINT(1, "%s+0x%lx", unw.preg_name[r - sr.curr.reg], r->val); - break; - - default: - UNW_DPRINT(1, "BADWHERE(%d)", r->where); - break; - } - UNW_DPRINT(1, "\t\t%d\n", r->when); - } - } -#endif - - STAT(unw.stat.script.parse_time += ia64_get_itc() - parse_start); - - /* translate state record into unwinder instructions: */ - - /* - * First, set psp if we're dealing with a fixed-size frame; - * subsequent instructions may depend on this value. - */ - if (sr.when_target > sr.curr.reg[UNW_REG_PSP].when - && (sr.curr.reg[UNW_REG_PSP].where == UNW_WHERE_NONE) - && sr.curr.reg[UNW_REG_PSP].val != 0) { - /* new psp is sp plus frame size */ - insn.opc = UNW_INSN_ADD; - insn.dst = offsetof(struct unw_frame_info, psp)/8; - insn.val = sr.curr.reg[UNW_REG_PSP].val; /* frame size */ - script_emit(script, insn); - } - - /* determine where the primary UNaT is: */ - if (sr.when_target < sr.curr.reg[UNW_REG_PRI_UNAT_GR].when) - i = UNW_REG_PRI_UNAT_MEM; - else if (sr.when_target < sr.curr.reg[UNW_REG_PRI_UNAT_MEM].when) - i = UNW_REG_PRI_UNAT_GR; - else if (sr.curr.reg[UNW_REG_PRI_UNAT_MEM].when > sr.curr.reg[UNW_REG_PRI_UNAT_GR].when) - i = UNW_REG_PRI_UNAT_MEM; - else - i = UNW_REG_PRI_UNAT_GR; - - compile_reg(&sr, i, script); - - for (i = UNW_REG_BSP; i < UNW_NUM_REGS; ++i) - compile_reg(&sr, i, script); - - /* free labeled register states & stack: */ - - STAT(parse_start = ia64_get_itc()); - for (ls = sr.labeled_states; ls; ls = next) { - next = ls->next; - free_state_stack(&ls->saved_state); - free_labeled_state(ls); - } - free_state_stack(&sr.curr); - STAT(unw.stat.script.parse_time += ia64_get_itc() - parse_start); - - script_finalize(script, &sr); - STAT(unw.stat.script.build_time += ia64_get_itc() - start); - return script; -} - -/* - * Apply the unwinding actions represented by OPS and update SR to - * reflect the state that existed upon entry to the function that this - * unwinder represents. - */ -static inline void -run_script (struct unw_script *script, struct unw_frame_info *state) -{ - struct unw_insn *ip, *limit, next_insn; - unsigned long opc, dst, val, off; - unsigned long *s = (unsigned long *) state; - STAT(unsigned long start;) - - STAT(++unw.stat.script.runs; start = ia64_get_itc()); - state->flags = script->flags; - ip = script->insn; - limit = script->insn + script->count; - next_insn = *ip; - - while (ip++ < limit) { - opc = next_insn.opc; - dst = next_insn.dst; - val = next_insn.val; - next_insn = *ip; - - redo: - switch (opc) { - case UNW_INSN_ADD: - s[dst] += val; - break; - - case UNW_INSN_MOVE2: - if (!s[val]) - goto lazy_init; - s[dst+1] = s[val+1]; - s[dst] = s[val]; - break; - - case UNW_INSN_MOVE: - if (!s[val]) - goto lazy_init; - s[dst] = s[val]; - break; - - case UNW_INSN_MOVE_SCRATCH: - if (state->pt) { - s[dst] = (unsigned long) get_scratch_regs(state) + val; - } else { - s[dst] = 0; - UNW_DPRINT(0, "unwind.%s: no state->pt, dst=%ld, val=%ld\n", - __func__, dst, val); - } - break; - - case UNW_INSN_MOVE_CONST: - if (val == 0) - s[dst] = (unsigned long) &unw.r0; - else { - s[dst] = 0; - UNW_DPRINT(0, "unwind.%s: UNW_INSN_MOVE_CONST bad val=%ld\n", - __func__, val); - } - break; - - - case UNW_INSN_MOVE_STACKED: - s[dst] = (unsigned long) ia64_rse_skip_regs((unsigned long *)state->bsp, - val); - break; - - case UNW_INSN_ADD_PSP: - s[dst] = state->psp + val; - break; - - case UNW_INSN_ADD_SP: - s[dst] = state->sp + val; - break; - - case UNW_INSN_SETNAT_MEMSTK: - if (!state->pri_unat_loc) - state->pri_unat_loc = &state->sw->caller_unat; - /* register off. is a multiple of 8, so the least 3 bits (type) are 0 */ - s[dst+1] = ((unsigned long) state->pri_unat_loc - s[dst]) | UNW_NAT_MEMSTK; - break; - - case UNW_INSN_SETNAT_TYPE: - s[dst+1] = val; - break; - - case UNW_INSN_LOAD: -#ifdef UNW_DEBUG - if ((s[val] & (local_cpu_data->unimpl_va_mask | 0x7)) != 0 - || s[val] < TASK_SIZE) - { - UNW_DPRINT(0, "unwind.%s: rejecting bad psp=0x%lx\n", - __func__, s[val]); - break; - } -#endif - s[dst] = *(unsigned long *) s[val]; - break; - } - } - STAT(unw.stat.script.run_time += ia64_get_itc() - start); - return; - - lazy_init: - off = unw.sw_off[val]; - s[val] = (unsigned long) state->sw + off; - if (off >= offsetof(struct switch_stack, r4) && off <= offsetof(struct switch_stack, r7)) - /* - * We're initializing a general register: init NaT info, too. Note that - * the offset is a multiple of 8 which gives us the 3 bits needed for - * the type field. - */ - s[val+1] = (offsetof(struct switch_stack, ar_unat) - off) | UNW_NAT_MEMSTK; - goto redo; -} - -static int -find_save_locs (struct unw_frame_info *info) -{ - int have_write_lock = 0; - struct unw_script *scr; - unsigned long flags = 0; - - if ((info->ip & (local_cpu_data->unimpl_va_mask | 0xf)) || info->ip < TASK_SIZE) { - /* don't let obviously bad addresses pollute the cache */ - /* FIXME: should really be level 0 but it occurs too often. KAO */ - UNW_DPRINT(1, "unwind.%s: rejecting bad ip=0x%lx\n", __func__, info->ip); - info->rp_loc = NULL; - return -1; - } - - scr = script_lookup(info); - if (!scr) { - spin_lock_irqsave(&unw.lock, flags); - scr = build_script(info); - if (!scr) { - spin_unlock_irqrestore(&unw.lock, flags); - UNW_DPRINT(0, - "unwind.%s: failed to locate/build unwind script for ip %lx\n", - __func__, info->ip); - return -1; - } - have_write_lock = 1; - } - info->hint = scr->hint; - info->prev_script = scr - unw.cache; - - run_script(scr, info); - - if (have_write_lock) { - write_unlock(&scr->lock); - spin_unlock_irqrestore(&unw.lock, flags); - } else - read_unlock(&scr->lock); - return 0; -} - -static int -unw_valid(const struct unw_frame_info *info, unsigned long* p) -{ - unsigned long loc = (unsigned long)p; - return (loc >= info->regstk.limit && loc < info->regstk.top) || - (loc >= info->memstk.top && loc < info->memstk.limit); -} - -int -unw_unwind (struct unw_frame_info *info) -{ - unsigned long prev_ip, prev_sp, prev_bsp; - unsigned long ip, pr, num_regs; - STAT(unsigned long start, flags;) - int retval; - - STAT(local_irq_save(flags); ++unw.stat.api.unwinds; start = ia64_get_itc()); - - prev_ip = info->ip; - prev_sp = info->sp; - prev_bsp = info->bsp; - - /* validate the return IP pointer */ - if (!unw_valid(info, info->rp_loc)) { - /* FIXME: should really be level 0 but it occurs too often. KAO */ - UNW_DPRINT(1, "unwind.%s: failed to locate return link (ip=0x%lx)!\n", - __func__, info->ip); - STAT(unw.stat.api.unwind_time += ia64_get_itc() - start; local_irq_restore(flags)); - return -1; - } - /* restore the ip */ - ip = info->ip = *info->rp_loc; - if (ip < GATE_ADDR) { - UNW_DPRINT(2, "unwind.%s: reached user-space (ip=0x%lx)\n", __func__, ip); - STAT(unw.stat.api.unwind_time += ia64_get_itc() - start; local_irq_restore(flags)); - return -1; - } - - /* validate the previous stack frame pointer */ - if (!unw_valid(info, info->pfs_loc)) { - UNW_DPRINT(0, "unwind.%s: failed to locate ar.pfs!\n", __func__); - STAT(unw.stat.api.unwind_time += ia64_get_itc() - start; local_irq_restore(flags)); - return -1; - } - /* restore the cfm: */ - info->cfm_loc = info->pfs_loc; - - /* restore the bsp: */ - pr = info->pr; - num_regs = 0; - if ((info->flags & UNW_FLAG_INTERRUPT_FRAME)) { - info->pt = info->sp + 16; - if ((pr & (1UL << PRED_NON_SYSCALL)) != 0) - num_regs = *info->cfm_loc & 0x7f; /* size of frame */ - info->pfs_loc = - (unsigned long *) (info->pt + offsetof(struct pt_regs, ar_pfs)); - UNW_DPRINT(3, "unwind.%s: interrupt_frame pt 0x%lx\n", __func__, info->pt); - } else - num_regs = (*info->cfm_loc >> 7) & 0x7f; /* size of locals */ - info->bsp = (unsigned long) ia64_rse_skip_regs((unsigned long *) info->bsp, -num_regs); - if (info->bsp < info->regstk.limit || info->bsp > info->regstk.top) { - UNW_DPRINT(0, "unwind.%s: bsp (0x%lx) out of range [0x%lx-0x%lx]\n", - __func__, info->bsp, info->regstk.limit, info->regstk.top); - STAT(unw.stat.api.unwind_time += ia64_get_itc() - start; local_irq_restore(flags)); - return -1; - } - - /* restore the sp: */ - info->sp = info->psp; - if (info->sp < info->memstk.top || info->sp > info->memstk.limit) { - UNW_DPRINT(0, "unwind.%s: sp (0x%lx) out of range [0x%lx-0x%lx]\n", - __func__, info->sp, info->memstk.top, info->memstk.limit); - STAT(unw.stat.api.unwind_time += ia64_get_itc() - start; local_irq_restore(flags)); - return -1; - } - - if (info->ip == prev_ip && info->sp == prev_sp && info->bsp == prev_bsp) { - UNW_DPRINT(0, "unwind.%s: ip, sp, bsp unchanged; stopping here (ip=0x%lx)\n", - __func__, ip); - STAT(unw.stat.api.unwind_time += ia64_get_itc() - start; local_irq_restore(flags)); - return -1; - } - - /* as we unwind, the saved ar.unat becomes the primary unat: */ - info->pri_unat_loc = info->unat_loc; - - /* finally, restore the predicates: */ - unw_get_pr(info, &info->pr); - - retval = find_save_locs(info); - STAT(unw.stat.api.unwind_time += ia64_get_itc() - start; local_irq_restore(flags)); - return retval; -} -EXPORT_SYMBOL(unw_unwind); - -int -unw_unwind_to_user (struct unw_frame_info *info) -{ - unsigned long ip, sp, pr = info->pr; - - do { - unw_get_sp(info, &sp); - if ((long)((unsigned long)info->task + IA64_STK_OFFSET - sp) - < IA64_PT_REGS_SIZE) { - UNW_DPRINT(0, "unwind.%s: ran off the top of the kernel stack\n", - __func__); - break; - } - if (unw_is_intr_frame(info) && - (pr & (1UL << PRED_USER_STACK))) - return 0; - if (unw_get_pr (info, &pr) < 0) { - unw_get_rp(info, &ip); - UNW_DPRINT(0, "unwind.%s: failed to read " - "predicate register (ip=0x%lx)\n", - __func__, ip); - return -1; - } - } while (unw_unwind(info) >= 0); - unw_get_ip(info, &ip); - UNW_DPRINT(0, "unwind.%s: failed to unwind to user-level (ip=0x%lx)\n", - __func__, ip); - return -1; -} -EXPORT_SYMBOL(unw_unwind_to_user); - -static void -init_frame_info (struct unw_frame_info *info, struct task_struct *t, - struct switch_stack *sw, unsigned long stktop) -{ - unsigned long rbslimit, rbstop, stklimit; - STAT(unsigned long start, flags;) - - STAT(local_irq_save(flags); ++unw.stat.api.inits; start = ia64_get_itc()); - - /* - * Subtle stuff here: we _could_ unwind through the switch_stack frame but we - * don't want to do that because it would be slow as each preserved register would - * have to be processed. Instead, what we do here is zero out the frame info and - * start the unwind process at the function that created the switch_stack frame. - * When a preserved value in switch_stack needs to be accessed, run_script() will - * initialize the appropriate pointer on demand. - */ - memset(info, 0, sizeof(*info)); - - rbslimit = (unsigned long) t + IA64_RBS_OFFSET; - stklimit = (unsigned long) t + IA64_STK_OFFSET; - - rbstop = sw->ar_bspstore; - if (rbstop > stklimit || rbstop < rbslimit) - rbstop = rbslimit; - - if (stktop <= rbstop) - stktop = rbstop; - if (stktop > stklimit) - stktop = stklimit; - - info->regstk.limit = rbslimit; - info->regstk.top = rbstop; - info->memstk.limit = stklimit; - info->memstk.top = stktop; - info->task = t; - info->sw = sw; - info->sp = info->psp = stktop; - info->pr = sw->pr; - UNW_DPRINT(3, "unwind.%s:\n" - " task 0x%lx\n" - " rbs = [0x%lx-0x%lx)\n" - " stk = [0x%lx-0x%lx)\n" - " pr 0x%lx\n" - " sw 0x%lx\n" - " sp 0x%lx\n", - __func__, (unsigned long) t, rbslimit, rbstop, stktop, stklimit, - info->pr, (unsigned long) info->sw, info->sp); - STAT(unw.stat.api.init_time += ia64_get_itc() - start; local_irq_restore(flags)); -} - -void -unw_init_frame_info (struct unw_frame_info *info, struct task_struct *t, struct switch_stack *sw) -{ - unsigned long sol; - - init_frame_info(info, t, sw, (unsigned long) (sw + 1) - 16); - info->cfm_loc = &sw->ar_pfs; - sol = (*info->cfm_loc >> 7) & 0x7f; - info->bsp = (unsigned long) ia64_rse_skip_regs((unsigned long *) info->regstk.top, -sol); - info->ip = sw->b0; - UNW_DPRINT(3, "unwind.%s:\n" - " bsp 0x%lx\n" - " sol 0x%lx\n" - " ip 0x%lx\n", - __func__, info->bsp, sol, info->ip); - find_save_locs(info); -} - -EXPORT_SYMBOL(unw_init_frame_info); - -void -unw_init_from_blocked_task (struct unw_frame_info *info, struct task_struct *t) -{ - struct switch_stack *sw = (struct switch_stack *) (t->thread.ksp + 16); - - UNW_DPRINT(1, "unwind.%s\n", __func__); - unw_init_frame_info(info, t, sw); -} -EXPORT_SYMBOL(unw_init_from_blocked_task); - -static void -init_unwind_table (struct unw_table *table, const char *name, unsigned long segment_base, - unsigned long gp, const void *table_start, const void *table_end) -{ - const struct unw_table_entry *start = table_start, *end = table_end; - - table->name = name; - table->segment_base = segment_base; - table->gp = gp; - table->start = segment_base + start[0].start_offset; - table->end = segment_base + end[-1].end_offset; - table->array = start; - table->length = end - start; -} - -void * -unw_add_unwind_table (const char *name, unsigned long segment_base, unsigned long gp, - const void *table_start, const void *table_end) -{ - const struct unw_table_entry *start = table_start, *end = table_end; - struct unw_table *table; - unsigned long flags; - - if (end - start <= 0) { - UNW_DPRINT(0, "unwind.%s: ignoring attempt to insert empty unwind table\n", - __func__); - return NULL; - } - - table = kmalloc(sizeof(*table), GFP_USER); - if (!table) - return NULL; - - init_unwind_table(table, name, segment_base, gp, table_start, table_end); - - spin_lock_irqsave(&unw.lock, flags); - { - /* keep kernel unwind table at the front (it's searched most commonly): */ - table->next = unw.tables->next; - unw.tables->next = table; - } - spin_unlock_irqrestore(&unw.lock, flags); - - return table; -} - -void -unw_remove_unwind_table (void *handle) -{ - struct unw_table *table, *prev; - struct unw_script *tmp; - unsigned long flags; - long index; - - if (!handle) { - UNW_DPRINT(0, "unwind.%s: ignoring attempt to remove non-existent unwind table\n", - __func__); - return; - } - - table = handle; - if (table == &unw.kernel_table) { - UNW_DPRINT(0, "unwind.%s: sorry, freeing the kernel's unwind table is a " - "no-can-do!\n", __func__); - return; - } - - spin_lock_irqsave(&unw.lock, flags); - { - /* first, delete the table: */ - - for (prev = (struct unw_table *) &unw.tables; prev; prev = prev->next) - if (prev->next == table) - break; - if (!prev) { - UNW_DPRINT(0, "unwind.%s: failed to find unwind table %p\n", - __func__, (void *) table); - spin_unlock_irqrestore(&unw.lock, flags); - return; - } - prev->next = table->next; - } - spin_unlock_irqrestore(&unw.lock, flags); - - /* next, remove hash table entries for this table */ - - for (index = 0; index < UNW_HASH_SIZE; ++index) { - tmp = unw.cache + unw.hash[index]; - if (unw.hash[index] >= UNW_CACHE_SIZE - || tmp->ip < table->start || tmp->ip >= table->end) - continue; - - write_lock(&tmp->lock); - { - if (tmp->ip >= table->start && tmp->ip < table->end) { - unw.hash[index] = tmp->coll_chain; - tmp->ip = 0; - } - } - write_unlock(&tmp->lock); - } - - kfree(table); -} - -static int __init -create_gate_table (void) -{ - const struct unw_table_entry *entry, *start, *end; - unsigned long *lp, segbase = GATE_ADDR; - size_t info_size, size; - char *info; - Elf64_Phdr *punw = NULL, *phdr = (Elf64_Phdr *) (GATE_ADDR + GATE_EHDR->e_phoff); - int i; - - for (i = 0; i < GATE_EHDR->e_phnum; ++i, ++phdr) - if (phdr->p_type == PT_IA_64_UNWIND) { - punw = phdr; - break; - } - - if (!punw) { - printk("%s: failed to find gate DSO's unwind table!\n", __func__); - return 0; - } - - start = (const struct unw_table_entry *) punw->p_vaddr; - end = (struct unw_table_entry *) ((char *) start + punw->p_memsz); - size = 0; - - unw_add_unwind_table("linux-gate.so", segbase, 0, start, end); - - for (entry = start; entry < end; ++entry) - size += 3*8 + 8 + 8*UNW_LENGTH(*(u64 *) (segbase + entry->info_offset)); - size += 8; /* reserve space for "end of table" marker */ - - unw.gate_table = kmalloc(size, GFP_KERNEL); - if (!unw.gate_table) { - unw.gate_table_size = 0; - printk(KERN_ERR "%s: unable to create unwind data for gate page!\n", __func__); - return 0; - } - unw.gate_table_size = size; - - lp = unw.gate_table; - info = (char *) unw.gate_table + size; - - for (entry = start; entry < end; ++entry, lp += 3) { - info_size = 8 + 8*UNW_LENGTH(*(u64 *) (segbase + entry->info_offset)); - info -= info_size; - memcpy(info, (char *) segbase + entry->info_offset, info_size); - - lp[0] = segbase + entry->start_offset; /* start */ - lp[1] = segbase + entry->end_offset; /* end */ - lp[2] = info - (char *) unw.gate_table; /* info */ - } - *lp = 0; /* end-of-table marker */ - return 0; -} - -__initcall(create_gate_table); - -void __init -unw_init (void) -{ - extern char __gp[]; - extern void unw_hash_index_t_is_too_narrow (void); - long i, off; - - if (8*sizeof(unw_hash_index_t) < UNW_LOG_HASH_SIZE) - unw_hash_index_t_is_too_narrow(); - - unw.sw_off[unw.preg_index[UNW_REG_PRI_UNAT_GR]] = SW(CALLER_UNAT); - unw.sw_off[unw.preg_index[UNW_REG_BSPSTORE]] = SW(AR_BSPSTORE); - unw.sw_off[unw.preg_index[UNW_REG_PFS]] = SW(AR_PFS); - unw.sw_off[unw.preg_index[UNW_REG_RP]] = SW(B0); - unw.sw_off[unw.preg_index[UNW_REG_UNAT]] = SW(CALLER_UNAT); - unw.sw_off[unw.preg_index[UNW_REG_PR]] = SW(PR); - unw.sw_off[unw.preg_index[UNW_REG_LC]] = SW(AR_LC); - unw.sw_off[unw.preg_index[UNW_REG_FPSR]] = SW(AR_FPSR); - for (i = UNW_REG_R4, off = SW(R4); i <= UNW_REG_R7; ++i, off += 8) - unw.sw_off[unw.preg_index[i]] = off; - for (i = UNW_REG_B1, off = SW(B1); i <= UNW_REG_B5; ++i, off += 8) - unw.sw_off[unw.preg_index[i]] = off; - for (i = UNW_REG_F2, off = SW(F2); i <= UNW_REG_F5; ++i, off += 16) - unw.sw_off[unw.preg_index[i]] = off; - for (i = UNW_REG_F16, off = SW(F16); i <= UNW_REG_F31; ++i, off += 16) - unw.sw_off[unw.preg_index[i]] = off; - - for (i = 0; i < UNW_CACHE_SIZE; ++i) { - if (i > 0) - unw.cache[i].lru_chain = (i - 1); - unw.cache[i].coll_chain = -1; - rwlock_init(&unw.cache[i].lock); - } - unw.lru_head = UNW_CACHE_SIZE - 1; - unw.lru_tail = 0; - - init_unwind_table(&unw.kernel_table, "kernel", KERNEL_START, (unsigned long) __gp, - __start_unwind, __end_unwind); -} - -/* - * DEPRECATED DEPRECATED DEPRECATED DEPRECATED DEPRECATED DEPRECATED DEPRECATED - * - * This system call has been deprecated. The new and improved way to get - * at the kernel's unwind info is via the gate DSO. The address of the - * ELF header for this DSO is passed to user-level via AT_SYSINFO_EHDR. - * - * DEPRECATED DEPRECATED DEPRECATED DEPRECATED DEPRECATED DEPRECATED DEPRECATED - * - * This system call copies the unwind data into the buffer pointed to by BUF and returns - * the size of the unwind data. If BUF_SIZE is smaller than the size of the unwind data - * or if BUF is NULL, nothing is copied, but the system call still returns the size of the - * unwind data. - * - * The first portion of the unwind data contains an unwind table and rest contains the - * associated unwind info (in no particular order). The unwind table consists of a table - * of entries of the form: - * - * u64 start; (64-bit address of start of function) - * u64 end; (64-bit address of start of function) - * u64 info; (BUF-relative offset to unwind info) - * - * The end of the unwind table is indicated by an entry with a START address of zero. - * - * Please see the IA-64 Software Conventions and Runtime Architecture manual for details - * on the format of the unwind info. - * - * ERRORS - * EFAULT BUF points outside your accessible address space. - */ -asmlinkage long -sys_getunwind (void __user *buf, size_t buf_size) -{ - if (buf && buf_size >= unw.gate_table_size) - if (copy_to_user(buf, unw.gate_table, unw.gate_table_size) != 0) - return -EFAULT; - return unw.gate_table_size; -} diff --git a/arch/ia64/kernel/unwind_decoder.c b/arch/ia64/kernel/unwind_decoder.c deleted file mode 100644 index 83f54f7929b5..000000000000 --- a/arch/ia64/kernel/unwind_decoder.c +++ /dev/null @@ -1,460 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Copyright (C) 2000 Hewlett-Packard Co - * Copyright (C) 2000 David Mosberger-Tang - * - * Generic IA-64 unwind info decoder. - * - * This file is used both by the Linux kernel and objdump. Please keep - * the two copies of this file in sync. - * - * You need to customize the decoder by defining the following - * macros/constants before including this file: - * - * Types: - * unw_word Unsigned integer type with at least 64 bits - * - * Register names: - * UNW_REG_BSP - * UNW_REG_BSPSTORE - * UNW_REG_FPSR - * UNW_REG_LC - * UNW_REG_PFS - * UNW_REG_PR - * UNW_REG_RNAT - * UNW_REG_PSP - * UNW_REG_RP - * UNW_REG_UNAT - * - * Decoder action macros: - * UNW_DEC_BAD_CODE(code) - * UNW_DEC_ABI(fmt,abi,context,arg) - * UNW_DEC_BR_GR(fmt,brmask,gr,arg) - * UNW_DEC_BR_MEM(fmt,brmask,arg) - * UNW_DEC_COPY_STATE(fmt,label,arg) - * UNW_DEC_EPILOGUE(fmt,t,ecount,arg) - * UNW_DEC_FRGR_MEM(fmt,grmask,frmask,arg) - * UNW_DEC_FR_MEM(fmt,frmask,arg) - * UNW_DEC_GR_GR(fmt,grmask,gr,arg) - * UNW_DEC_GR_MEM(fmt,grmask,arg) - * UNW_DEC_LABEL_STATE(fmt,label,arg) - * UNW_DEC_MEM_STACK_F(fmt,t,size,arg) - * UNW_DEC_MEM_STACK_V(fmt,t,arg) - * UNW_DEC_PRIUNAT_GR(fmt,r,arg) - * UNW_DEC_PRIUNAT_WHEN_GR(fmt,t,arg) - * UNW_DEC_PRIUNAT_WHEN_MEM(fmt,t,arg) - * UNW_DEC_PRIUNAT_WHEN_PSPREL(fmt,pspoff,arg) - * UNW_DEC_PRIUNAT_WHEN_SPREL(fmt,spoff,arg) - * UNW_DEC_PROLOGUE(fmt,body,rlen,arg) - * UNW_DEC_PROLOGUE_GR(fmt,rlen,mask,grsave,arg) - * UNW_DEC_REG_PSPREL(fmt,reg,pspoff,arg) - * UNW_DEC_REG_REG(fmt,src,dst,arg) - * UNW_DEC_REG_SPREL(fmt,reg,spoff,arg) - * UNW_DEC_REG_WHEN(fmt,reg,t,arg) - * UNW_DEC_RESTORE(fmt,t,abreg,arg) - * UNW_DEC_RESTORE_P(fmt,qp,t,abreg,arg) - * UNW_DEC_SPILL_BASE(fmt,pspoff,arg) - * UNW_DEC_SPILL_MASK(fmt,imaskp,arg) - * UNW_DEC_SPILL_PSPREL(fmt,t,abreg,pspoff,arg) - * UNW_DEC_SPILL_PSPREL_P(fmt,qp,t,abreg,pspoff,arg) - * UNW_DEC_SPILL_REG(fmt,t,abreg,x,ytreg,arg) - * UNW_DEC_SPILL_REG_P(fmt,qp,t,abreg,x,ytreg,arg) - * UNW_DEC_SPILL_SPREL(fmt,t,abreg,spoff,arg) - * UNW_DEC_SPILL_SPREL_P(fmt,qp,t,abreg,pspoff,arg) - */ - -static unw_word -unw_decode_uleb128 (unsigned char **dpp) -{ - unsigned shift = 0; - unw_word byte, result = 0; - unsigned char *bp = *dpp; - - while (1) - { - byte = *bp++; - result |= (byte & 0x7f) << shift; - if ((byte & 0x80) == 0) - break; - shift += 7; - } - *dpp = bp; - return result; -} - -static unsigned char * -unw_decode_x1 (unsigned char *dp, unsigned char code, void *arg) -{ - unsigned char byte1, abreg; - unw_word t, off; - - byte1 = *dp++; - t = unw_decode_uleb128 (&dp); - off = unw_decode_uleb128 (&dp); - abreg = (byte1 & 0x7f); - if (byte1 & 0x80) - UNW_DEC_SPILL_SPREL(X1, t, abreg, off, arg); - else - UNW_DEC_SPILL_PSPREL(X1, t, abreg, off, arg); - return dp; -} - -static unsigned char * -unw_decode_x2 (unsigned char *dp, unsigned char code, void *arg) -{ - unsigned char byte1, byte2, abreg, x, ytreg; - unw_word t; - - byte1 = *dp++; byte2 = *dp++; - t = unw_decode_uleb128 (&dp); - abreg = (byte1 & 0x7f); - ytreg = byte2; - x = (byte1 >> 7) & 1; - if ((byte1 & 0x80) == 0 && ytreg == 0) - UNW_DEC_RESTORE(X2, t, abreg, arg); - else - UNW_DEC_SPILL_REG(X2, t, abreg, x, ytreg, arg); - return dp; -} - -static unsigned char * -unw_decode_x3 (unsigned char *dp, unsigned char code, void *arg) -{ - unsigned char byte1, byte2, abreg, qp; - unw_word t, off; - - byte1 = *dp++; byte2 = *dp++; - t = unw_decode_uleb128 (&dp); - off = unw_decode_uleb128 (&dp); - - qp = (byte1 & 0x3f); - abreg = (byte2 & 0x7f); - - if (byte1 & 0x80) - UNW_DEC_SPILL_SPREL_P(X3, qp, t, abreg, off, arg); - else - UNW_DEC_SPILL_PSPREL_P(X3, qp, t, abreg, off, arg); - return dp; -} - -static unsigned char * -unw_decode_x4 (unsigned char *dp, unsigned char code, void *arg) -{ - unsigned char byte1, byte2, byte3, qp, abreg, x, ytreg; - unw_word t; - - byte1 = *dp++; byte2 = *dp++; byte3 = *dp++; - t = unw_decode_uleb128 (&dp); - - qp = (byte1 & 0x3f); - abreg = (byte2 & 0x7f); - x = (byte2 >> 7) & 1; - ytreg = byte3; - - if ((byte2 & 0x80) == 0 && byte3 == 0) - UNW_DEC_RESTORE_P(X4, qp, t, abreg, arg); - else - UNW_DEC_SPILL_REG_P(X4, qp, t, abreg, x, ytreg, arg); - return dp; -} - -static unsigned char * -unw_decode_r1 (unsigned char *dp, unsigned char code, void *arg) -{ - int body = (code & 0x20) != 0; - unw_word rlen; - - rlen = (code & 0x1f); - UNW_DEC_PROLOGUE(R1, body, rlen, arg); - return dp; -} - -static unsigned char * -unw_decode_r2 (unsigned char *dp, unsigned char code, void *arg) -{ - unsigned char byte1, mask, grsave; - unw_word rlen; - - byte1 = *dp++; - - mask = ((code & 0x7) << 1) | ((byte1 >> 7) & 1); - grsave = (byte1 & 0x7f); - rlen = unw_decode_uleb128 (&dp); - UNW_DEC_PROLOGUE_GR(R2, rlen, mask, grsave, arg); - return dp; -} - -static unsigned char * -unw_decode_r3 (unsigned char *dp, unsigned char code, void *arg) -{ - unw_word rlen; - - rlen = unw_decode_uleb128 (&dp); - UNW_DEC_PROLOGUE(R3, ((code & 0x3) == 1), rlen, arg); - return dp; -} - -static unsigned char * -unw_decode_p1 (unsigned char *dp, unsigned char code, void *arg) -{ - unsigned char brmask = (code & 0x1f); - - UNW_DEC_BR_MEM(P1, brmask, arg); - return dp; -} - -static unsigned char * -unw_decode_p2_p5 (unsigned char *dp, unsigned char code, void *arg) -{ - if ((code & 0x10) == 0) - { - unsigned char byte1 = *dp++; - - UNW_DEC_BR_GR(P2, ((code & 0xf) << 1) | ((byte1 >> 7) & 1), - (byte1 & 0x7f), arg); - } - else if ((code & 0x08) == 0) - { - unsigned char byte1 = *dp++, r, dst; - - r = ((code & 0x7) << 1) | ((byte1 >> 7) & 1); - dst = (byte1 & 0x7f); - switch (r) - { - case 0: UNW_DEC_REG_GR(P3, UNW_REG_PSP, dst, arg); break; - case 1: UNW_DEC_REG_GR(P3, UNW_REG_RP, dst, arg); break; - case 2: UNW_DEC_REG_GR(P3, UNW_REG_PFS, dst, arg); break; - case 3: UNW_DEC_REG_GR(P3, UNW_REG_PR, dst, arg); break; - case 4: UNW_DEC_REG_GR(P3, UNW_REG_UNAT, dst, arg); break; - case 5: UNW_DEC_REG_GR(P3, UNW_REG_LC, dst, arg); break; - case 6: UNW_DEC_RP_BR(P3, dst, arg); break; - case 7: UNW_DEC_REG_GR(P3, UNW_REG_RNAT, dst, arg); break; - case 8: UNW_DEC_REG_GR(P3, UNW_REG_BSP, dst, arg); break; - case 9: UNW_DEC_REG_GR(P3, UNW_REG_BSPSTORE, dst, arg); break; - case 10: UNW_DEC_REG_GR(P3, UNW_REG_FPSR, dst, arg); break; - case 11: UNW_DEC_PRIUNAT_GR(P3, dst, arg); break; - default: UNW_DEC_BAD_CODE(r); break; - } - } - else if ((code & 0x7) == 0) - UNW_DEC_SPILL_MASK(P4, dp, arg); - else if ((code & 0x7) == 1) - { - unw_word grmask, frmask, byte1, byte2, byte3; - - byte1 = *dp++; byte2 = *dp++; byte3 = *dp++; - grmask = ((byte1 >> 4) & 0xf); - frmask = ((byte1 & 0xf) << 16) | (byte2 << 8) | byte3; - UNW_DEC_FRGR_MEM(P5, grmask, frmask, arg); - } - else - UNW_DEC_BAD_CODE(code); - return dp; -} - -static unsigned char * -unw_decode_p6 (unsigned char *dp, unsigned char code, void *arg) -{ - int gregs = (code & 0x10) != 0; - unsigned char mask = (code & 0x0f); - - if (gregs) - UNW_DEC_GR_MEM(P6, mask, arg); - else - UNW_DEC_FR_MEM(P6, mask, arg); - return dp; -} - -static unsigned char * -unw_decode_p7_p10 (unsigned char *dp, unsigned char code, void *arg) -{ - unsigned char r, byte1, byte2; - unw_word t, size; - - if ((code & 0x10) == 0) - { - r = (code & 0xf); - t = unw_decode_uleb128 (&dp); - switch (r) - { - case 0: - size = unw_decode_uleb128 (&dp); - UNW_DEC_MEM_STACK_F(P7, t, size, arg); - break; - - case 1: UNW_DEC_MEM_STACK_V(P7, t, arg); break; - case 2: UNW_DEC_SPILL_BASE(P7, t, arg); break; - case 3: UNW_DEC_REG_SPREL(P7, UNW_REG_PSP, t, arg); break; - case 4: UNW_DEC_REG_WHEN(P7, UNW_REG_RP, t, arg); break; - case 5: UNW_DEC_REG_PSPREL(P7, UNW_REG_RP, t, arg); break; - case 6: UNW_DEC_REG_WHEN(P7, UNW_REG_PFS, t, arg); break; - case 7: UNW_DEC_REG_PSPREL(P7, UNW_REG_PFS, t, arg); break; - case 8: UNW_DEC_REG_WHEN(P7, UNW_REG_PR, t, arg); break; - case 9: UNW_DEC_REG_PSPREL(P7, UNW_REG_PR, t, arg); break; - case 10: UNW_DEC_REG_WHEN(P7, UNW_REG_LC, t, arg); break; - case 11: UNW_DEC_REG_PSPREL(P7, UNW_REG_LC, t, arg); break; - case 12: UNW_DEC_REG_WHEN(P7, UNW_REG_UNAT, t, arg); break; - case 13: UNW_DEC_REG_PSPREL(P7, UNW_REG_UNAT, t, arg); break; - case 14: UNW_DEC_REG_WHEN(P7, UNW_REG_FPSR, t, arg); break; - case 15: UNW_DEC_REG_PSPREL(P7, UNW_REG_FPSR, t, arg); break; - default: UNW_DEC_BAD_CODE(r); break; - } - } - else - { - switch (code & 0xf) - { - case 0x0: /* p8 */ - { - r = *dp++; - t = unw_decode_uleb128 (&dp); - switch (r) - { - case 1: UNW_DEC_REG_SPREL(P8, UNW_REG_RP, t, arg); break; - case 2: UNW_DEC_REG_SPREL(P8, UNW_REG_PFS, t, arg); break; - case 3: UNW_DEC_REG_SPREL(P8, UNW_REG_PR, t, arg); break; - case 4: UNW_DEC_REG_SPREL(P8, UNW_REG_LC, t, arg); break; - case 5: UNW_DEC_REG_SPREL(P8, UNW_REG_UNAT, t, arg); break; - case 6: UNW_DEC_REG_SPREL(P8, UNW_REG_FPSR, t, arg); break; - case 7: UNW_DEC_REG_WHEN(P8, UNW_REG_BSP, t, arg); break; - case 8: UNW_DEC_REG_PSPREL(P8, UNW_REG_BSP, t, arg); break; - case 9: UNW_DEC_REG_SPREL(P8, UNW_REG_BSP, t, arg); break; - case 10: UNW_DEC_REG_WHEN(P8, UNW_REG_BSPSTORE, t, arg); break; - case 11: UNW_DEC_REG_PSPREL(P8, UNW_REG_BSPSTORE, t, arg); break; - case 12: UNW_DEC_REG_SPREL(P8, UNW_REG_BSPSTORE, t, arg); break; - case 13: UNW_DEC_REG_WHEN(P8, UNW_REG_RNAT, t, arg); break; - case 14: UNW_DEC_REG_PSPREL(P8, UNW_REG_RNAT, t, arg); break; - case 15: UNW_DEC_REG_SPREL(P8, UNW_REG_RNAT, t, arg); break; - case 16: UNW_DEC_PRIUNAT_WHEN_GR(P8, t, arg); break; - case 17: UNW_DEC_PRIUNAT_PSPREL(P8, t, arg); break; - case 18: UNW_DEC_PRIUNAT_SPREL(P8, t, arg); break; - case 19: UNW_DEC_PRIUNAT_WHEN_MEM(P8, t, arg); break; - default: UNW_DEC_BAD_CODE(r); break; - } - } - break; - - case 0x1: - byte1 = *dp++; byte2 = *dp++; - UNW_DEC_GR_GR(P9, (byte1 & 0xf), (byte2 & 0x7f), arg); - break; - - case 0xf: /* p10 */ - byte1 = *dp++; byte2 = *dp++; - UNW_DEC_ABI(P10, byte1, byte2, arg); - break; - - case 0x9: - return unw_decode_x1 (dp, code, arg); - - case 0xa: - return unw_decode_x2 (dp, code, arg); - - case 0xb: - return unw_decode_x3 (dp, code, arg); - - case 0xc: - return unw_decode_x4 (dp, code, arg); - - default: - UNW_DEC_BAD_CODE(code); - break; - } - } - return dp; -} - -static unsigned char * -unw_decode_b1 (unsigned char *dp, unsigned char code, void *arg) -{ - unw_word label = (code & 0x1f); - - if ((code & 0x20) != 0) - UNW_DEC_COPY_STATE(B1, label, arg); - else - UNW_DEC_LABEL_STATE(B1, label, arg); - return dp; -} - -static unsigned char * -unw_decode_b2 (unsigned char *dp, unsigned char code, void *arg) -{ - unw_word t; - - t = unw_decode_uleb128 (&dp); - UNW_DEC_EPILOGUE(B2, t, (code & 0x1f), arg); - return dp; -} - -static unsigned char * -unw_decode_b3_x4 (unsigned char *dp, unsigned char code, void *arg) -{ - unw_word t, ecount, label; - - if ((code & 0x10) == 0) - { - t = unw_decode_uleb128 (&dp); - ecount = unw_decode_uleb128 (&dp); - UNW_DEC_EPILOGUE(B3, t, ecount, arg); - } - else if ((code & 0x07) == 0) - { - label = unw_decode_uleb128 (&dp); - if ((code & 0x08) != 0) - UNW_DEC_COPY_STATE(B4, label, arg); - else - UNW_DEC_LABEL_STATE(B4, label, arg); - } - else - switch (code & 0x7) - { - case 1: return unw_decode_x1 (dp, code, arg); - case 2: return unw_decode_x2 (dp, code, arg); - case 3: return unw_decode_x3 (dp, code, arg); - case 4: return unw_decode_x4 (dp, code, arg); - default: UNW_DEC_BAD_CODE(code); break; - } - return dp; -} - -typedef unsigned char *(*unw_decoder) (unsigned char *, unsigned char, void *); - -static unw_decoder unw_decode_table[2][8] = -{ - /* prologue table: */ - { - unw_decode_r1, /* 0 */ - unw_decode_r1, - unw_decode_r2, - unw_decode_r3, - unw_decode_p1, /* 4 */ - unw_decode_p2_p5, - unw_decode_p6, - unw_decode_p7_p10 - }, - { - unw_decode_r1, /* 0 */ - unw_decode_r1, - unw_decode_r2, - unw_decode_r3, - unw_decode_b1, /* 4 */ - unw_decode_b1, - unw_decode_b2, - unw_decode_b3_x4 - } -}; - -/* - * Decode one descriptor and return address of next descriptor. - */ -static inline unsigned char * -unw_decode (unsigned char *dp, int inside_body, void *arg) -{ - unw_decoder decoder; - unsigned char code; - - code = *dp++; - decoder = unw_decode_table[inside_body][code >> 5]; - dp = (*decoder) (dp, code, arg); - return dp; -} diff --git a/arch/ia64/kernel/unwind_i.h b/arch/ia64/kernel/unwind_i.h deleted file mode 100644 index 1dd57ba44327..000000000000 --- a/arch/ia64/kernel/unwind_i.h +++ /dev/null @@ -1,165 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Copyright (C) 2000, 2002-2003 Hewlett-Packard Co - * David Mosberger-Tang - * - * Kernel unwind support. - */ - -#define UNW_VER(x) ((x) >> 48) -#define UNW_FLAG_MASK 0x0000ffff00000000 -#define UNW_FLAG_OSMASK 0x0000f00000000000 -#define UNW_FLAG_EHANDLER(x) ((x) & 0x0000000100000000L) -#define UNW_FLAG_UHANDLER(x) ((x) & 0x0000000200000000L) -#define UNW_LENGTH(x) ((x) & 0x00000000ffffffffL) - -enum unw_register_index { - /* primary unat: */ - UNW_REG_PRI_UNAT_GR, - UNW_REG_PRI_UNAT_MEM, - - /* register stack */ - UNW_REG_BSP, /* register stack pointer */ - UNW_REG_BSPSTORE, - UNW_REG_PFS, /* previous function state */ - UNW_REG_RNAT, - /* memory stack */ - UNW_REG_PSP, /* previous memory stack pointer */ - /* return pointer: */ - UNW_REG_RP, - - /* preserved registers: */ - UNW_REG_R4, UNW_REG_R5, UNW_REG_R6, UNW_REG_R7, - UNW_REG_UNAT, UNW_REG_PR, UNW_REG_LC, UNW_REG_FPSR, - UNW_REG_B1, UNW_REG_B2, UNW_REG_B3, UNW_REG_B4, UNW_REG_B5, - UNW_REG_F2, UNW_REG_F3, UNW_REG_F4, UNW_REG_F5, - UNW_REG_F16, UNW_REG_F17, UNW_REG_F18, UNW_REG_F19, - UNW_REG_F20, UNW_REG_F21, UNW_REG_F22, UNW_REG_F23, - UNW_REG_F24, UNW_REG_F25, UNW_REG_F26, UNW_REG_F27, - UNW_REG_F28, UNW_REG_F29, UNW_REG_F30, UNW_REG_F31, - UNW_NUM_REGS -}; - -struct unw_info_block { - u64 header; - u64 desc[]; /* unwind descriptors */ - /* personality routine and language-specific data follow behind descriptors */ -}; - -struct unw_table { - struct unw_table *next; /* must be first member! */ - const char *name; - unsigned long gp; /* global pointer for this load-module */ - unsigned long segment_base; /* base for offsets in the unwind table entries */ - unsigned long start; - unsigned long end; - const struct unw_table_entry *array; - unsigned long length; -}; - -enum unw_where { - UNW_WHERE_NONE, /* register isn't saved at all */ - UNW_WHERE_GR, /* register is saved in a general register */ - UNW_WHERE_FR, /* register is saved in a floating-point register */ - UNW_WHERE_BR, /* register is saved in a branch register */ - UNW_WHERE_SPREL, /* register is saved on memstack (sp-relative) */ - UNW_WHERE_PSPREL, /* register is saved on memstack (psp-relative) */ - /* - * At the end of each prologue these locations get resolved to - * UNW_WHERE_PSPREL and UNW_WHERE_GR, respectively: - */ - UNW_WHERE_SPILL_HOME, /* register is saved in its spill home */ - UNW_WHERE_GR_SAVE /* register is saved in next general register */ -}; - -#define UNW_WHEN_NEVER 0x7fffffff - -struct unw_reg_info { - unsigned long val; /* save location: register number or offset */ - enum unw_where where; /* where the register gets saved */ - int when; /* when the register gets saved */ -}; - -struct unw_reg_state { - struct unw_reg_state *next; /* next (outer) element on state stack */ - struct unw_reg_info reg[UNW_NUM_REGS]; /* register save locations */ -}; - -struct unw_labeled_state { - struct unw_labeled_state *next; /* next labeled state (or NULL) */ - unsigned long label; /* label for this state */ - struct unw_reg_state saved_state; -}; - -struct unw_state_record { - unsigned int first_region : 1; /* is this the first region? */ - unsigned int done : 1; /* are we done scanning descriptors? */ - unsigned int any_spills : 1; /* got any register spills? */ - unsigned int in_body : 1; /* are we inside a body (as opposed to a prologue)? */ - unsigned long flags; /* see UNW_FLAG_* in unwind.h */ - - u8 *imask; /* imask of spill_mask record or NULL */ - unsigned long pr_val; /* predicate values */ - unsigned long pr_mask; /* predicate mask */ - long spill_offset; /* psp-relative offset for spill base */ - int region_start; - int region_len; - int epilogue_start; - int epilogue_count; - int when_target; - - u8 gr_save_loc; /* next general register to use for saving a register */ - u8 return_link_reg; /* branch register in which the return link is passed */ - - struct unw_labeled_state *labeled_states; /* list of all labeled states */ - struct unw_reg_state curr; /* current state */ -}; - -enum unw_nat_type { - UNW_NAT_NONE, /* NaT not represented */ - UNW_NAT_VAL, /* NaT represented by NaT value (fp reg) */ - UNW_NAT_MEMSTK, /* NaT value is in unat word at offset OFF */ - UNW_NAT_REGSTK /* NaT is in rnat */ -}; - -enum unw_insn_opcode { - UNW_INSN_ADD, /* s[dst] += val */ - UNW_INSN_ADD_PSP, /* s[dst] = (s.psp + val) */ - UNW_INSN_ADD_SP, /* s[dst] = (s.sp + val) */ - UNW_INSN_MOVE, /* s[dst] = s[val] */ - UNW_INSN_MOVE2, /* s[dst] = s[val]; s[dst+1] = s[val+1] */ - UNW_INSN_MOVE_STACKED, /* s[dst] = ia64_rse_skip(*s.bsp, val) */ - UNW_INSN_SETNAT_MEMSTK, /* s[dst+1].nat.type = MEMSTK; - s[dst+1].nat.off = *s.pri_unat - s[dst] */ - UNW_INSN_SETNAT_TYPE, /* s[dst+1].nat.type = val */ - UNW_INSN_LOAD, /* s[dst] = *s[val] */ - UNW_INSN_MOVE_SCRATCH, /* s[dst] = scratch reg "val" */ - UNW_INSN_MOVE_CONST, /* s[dst] = constant reg "val" */ -}; - -struct unw_insn { - unsigned int opc : 4; - unsigned int dst : 9; - signed int val : 19; -}; - -/* - * Preserved general static registers (r4-r7) give rise to two script - * instructions; everything else yields at most one instruction; at - * the end of the script, the psp gets popped, accounting for one more - * instruction. - */ -#define UNW_MAX_SCRIPT_LEN (UNW_NUM_REGS + 5) - -struct unw_script { - unsigned long ip; /* ip this script is for */ - unsigned long pr_mask; /* mask of predicates script depends on */ - unsigned long pr_val; /* predicate values this script is for */ - rwlock_t lock; - unsigned int flags; /* see UNW_FLAG_* in unwind.h */ - unsigned short lru_chain; /* used for least-recently-used chain */ - unsigned short coll_chain; /* used for hash collisions */ - unsigned short hint; /* hint for next script to try (or -1) */ - unsigned short count; /* number of instructions in script */ - struct unw_insn insn[UNW_MAX_SCRIPT_LEN]; -}; diff --git a/arch/ia64/kernel/vmlinux.lds.S b/arch/ia64/kernel/vmlinux.lds.S deleted file mode 100644 index 53dfde161c8a..000000000000 --- a/arch/ia64/kernel/vmlinux.lds.S +++ /dev/null @@ -1,224 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ - -#include -#include -#include -#include - -#define EMITS_PT_NOTE -#define RO_EXCEPTION_TABLE_ALIGN 16 - -#include - -OUTPUT_FORMAT("elf64-ia64-little") -OUTPUT_ARCH(ia64) -ENTRY(phys_start) -jiffies = jiffies_64; - -PHDRS { - text PT_LOAD; - percpu PT_LOAD; - data PT_LOAD; - note PT_NOTE; - unwind 0x70000001; /* PT_IA_64_UNWIND, but ld doesn't match the name */ -} - -SECTIONS { - /* - * unwind exit sections must be discarded before - * the rest of the sections get included. - */ - /DISCARD/ : { - *(.IA_64.unwind.exit.text) - *(.IA_64.unwind_info.exit.text) - *(.comment) - *(.note) - } - - v = PAGE_OFFSET; /* this symbol is here to make debugging easier... */ - phys_start = _start - LOAD_OFFSET; - - code : { - } :text - . = KERNEL_START; - - _text = .; - _stext = .; - - .text : AT(ADDR(.text) - LOAD_OFFSET) { - __start_ivt_text = .; - *(.text..ivt) - __end_ivt_text = .; - TEXT_TEXT - SCHED_TEXT - LOCK_TEXT - KPROBES_TEXT - IRQENTRY_TEXT - SOFTIRQENTRY_TEXT - *(.gnu.linkonce.t*) - } - - .text2 : AT(ADDR(.text2) - LOAD_OFFSET) { - *(.text2) - } - -#ifdef CONFIG_SMP - .text..lock : AT(ADDR(.text..lock) - LOAD_OFFSET) { - *(.text..lock) - } -#endif - _etext = .; - - /* - * Read-only data - */ - - /* MCA table */ - . = ALIGN(16); - __mca_table : AT(ADDR(__mca_table) - LOAD_OFFSET) { - __start___mca_table = .; - *(__mca_table) - __stop___mca_table = .; - } - - .data..patch.phys_stack_reg : AT(ADDR(.data..patch.phys_stack_reg) - LOAD_OFFSET) { - __start___phys_stack_reg_patchlist = .; - *(.data..patch.phys_stack_reg) - __end___phys_stack_reg_patchlist = .; - } - - /* - * Global data - */ - _data = .; - - /* Unwind info & table: */ - . = ALIGN(8); - .IA_64.unwind_info : AT(ADDR(.IA_64.unwind_info) - LOAD_OFFSET) { - *(.IA_64.unwind_info*) - } - .IA_64.unwind : AT(ADDR(.IA_64.unwind) - LOAD_OFFSET) { - __start_unwind = .; - *(.IA_64.unwind*) - __end_unwind = .; - } :text :unwind - code_continues2 : { - } :text - - RO_DATA(4096) - - .opd : AT(ADDR(.opd) - LOAD_OFFSET) { - __start_opd = .; - *(.opd) - __end_opd = .; - } - - /* - * Initialization code and data: - */ - . = ALIGN(PAGE_SIZE); - __init_begin = .; - - INIT_TEXT_SECTION(PAGE_SIZE) - INIT_DATA_SECTION(16) - - .data..patch.vtop : AT(ADDR(.data..patch.vtop) - LOAD_OFFSET) { - __start___vtop_patchlist = .; - *(.data..patch.vtop) - __end___vtop_patchlist = .; - } - - .data..patch.rse : AT(ADDR(.data..patch.rse) - LOAD_OFFSET) { - __start___rse_patchlist = .; - *(.data..patch.rse) - __end___rse_patchlist = .; - } - - .data..patch.mckinley_e9 : AT(ADDR(.data..patch.mckinley_e9) - LOAD_OFFSET) { - __start___mckinley_e9_bundles = .; - *(.data..patch.mckinley_e9) - __end___mckinley_e9_bundles = .; - } - -#ifdef CONFIG_SMP - . = ALIGN(PERCPU_PAGE_SIZE); - __cpu0_per_cpu = .; - . = . + PERCPU_PAGE_SIZE; /* cpu0 per-cpu space */ -#endif - - . = ALIGN(PAGE_SIZE); - __init_end = .; - - .data..page_aligned : AT(ADDR(.data..page_aligned) - LOAD_OFFSET) { - PAGE_ALIGNED_DATA(PAGE_SIZE) - . = ALIGN(PAGE_SIZE); - __start_gate_section = .; - *(.data..gate) - __stop_gate_section = .; - } - /* - * make sure the gate page doesn't expose - * kernel data - */ - . = ALIGN(PAGE_SIZE); - - /* Per-cpu data: */ - . = ALIGN(PERCPU_PAGE_SIZE); - PERCPU_VADDR(SMP_CACHE_BYTES, PERCPU_ADDR, :percpu) - __phys_per_cpu_start = __per_cpu_load; - /* - * ensure percpu data fits - * into percpu page size - */ - . = __phys_per_cpu_start + PERCPU_PAGE_SIZE; - - data : { - } :data - .data : AT(ADDR(.data) - LOAD_OFFSET) { - _sdata = .; - INIT_TASK_DATA(PAGE_SIZE) - CACHELINE_ALIGNED_DATA(SMP_CACHE_BYTES) - READ_MOSTLY_DATA(SMP_CACHE_BYTES) - DATA_DATA - *(.data1) - *(.gnu.linkonce.d*) - CONSTRUCTORS - } - - BUG_TABLE - - . = ALIGN(16); /* gp must be 16-byte aligned for exc. table */ - .got : AT(ADDR(.got) - LOAD_OFFSET) { - *(.got.plt) - *(.got) - } - __gp = ADDR(.got) + 0x200000; - - /* - * We want the small data sections together, - * so single-instruction offsets can access - * them all, and initialized data all before - * uninitialized, so we can shorten the - * on-disk segment size. - */ - .sdata : AT(ADDR(.sdata) - LOAD_OFFSET) { - *(.sdata) - *(.sdata1) - *(.srdata) - } - _edata = .; - - BSS_SECTION(0, 0, 0) - - _end = .; - - code : { - } :text - - STABS_DEBUG - DWARF_DEBUG - ELF_DETAILS - - /* Default discards */ - DISCARDS -} diff --git a/arch/ia64/lib/Makefile b/arch/ia64/lib/Makefile deleted file mode 100644 index 081fcba01dc0..000000000000 --- a/arch/ia64/lib/Makefile +++ /dev/null @@ -1,48 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0 -# -# Makefile for ia64-specific library routines.. -# - -lib-y := io.o __divsi3.o __udivsi3.o __modsi3.o __umodsi3.o \ - __divdi3.o __udivdi3.o __moddi3.o __umoddi3.o \ - checksum.o clear_page.o csum_partial_copy.o \ - clear_user.o strncpy_from_user.o strnlen_user.o \ - flush.o ip_fast_csum.o do_csum.o \ - memset.o strlen.o xor.o - -lib-$(CONFIG_ITANIUM) += copy_page.o copy_user.o memcpy.o -lib-$(CONFIG_MCKINLEY) += copy_page_mck.o memcpy_mck.o - -AFLAGS___divdi3.o = -AFLAGS___udivdi3.o = -DUNSIGNED -AFLAGS___moddi3.o = -DMODULO -AFLAGS___umoddi3.o = -DUNSIGNED -DMODULO - -AFLAGS___divsi3.o = -AFLAGS___udivsi3.o = -DUNSIGNED -AFLAGS___modsi3.o = -DMODULO -AFLAGS___umodsi3.o = -DUNSIGNED -DMODULO - -$(obj)/__divdi3.o: $(src)/idiv64.S FORCE - $(call if_changed_rule,as_o_S) - -$(obj)/__udivdi3.o: $(src)/idiv64.S FORCE - $(call if_changed_rule,as_o_S) - -$(obj)/__moddi3.o: $(src)/idiv64.S FORCE - $(call if_changed_rule,as_o_S) - -$(obj)/__umoddi3.o: $(src)/idiv64.S FORCE - $(call if_changed_rule,as_o_S) - -$(obj)/__divsi3.o: $(src)/idiv32.S FORCE - $(call if_changed_rule,as_o_S) - -$(obj)/__udivsi3.o: $(src)/idiv32.S FORCE - $(call if_changed_rule,as_o_S) - -$(obj)/__modsi3.o: $(src)/idiv32.S FORCE - $(call if_changed_rule,as_o_S) - -$(obj)/__umodsi3.o: $(src)/idiv32.S FORCE - $(call if_changed_rule,as_o_S) diff --git a/arch/ia64/lib/checksum.c b/arch/ia64/lib/checksum.c deleted file mode 100644 index d26517fe3500..000000000000 --- a/arch/ia64/lib/checksum.c +++ /dev/null @@ -1,102 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Network checksum routines - * - * Copyright (C) 1999, 2003 Hewlett-Packard Co - * Stephane Eranian - * - * Most of the code coming from arch/alpha/lib/checksum.c - * - * This file contains network checksum routines that are better done - * in an architecture-specific manner due to speed.. - */ - -#include -#include - -#include - -static inline unsigned short -from64to16 (unsigned long x) -{ - /* add up 32-bit words for 33 bits */ - x = (x & 0xffffffff) + (x >> 32); - /* add up 16-bit and 17-bit words for 17+c bits */ - x = (x & 0xffff) + (x >> 16); - /* add up 16-bit and 2-bit for 16+c bit */ - x = (x & 0xffff) + (x >> 16); - /* add up carry.. */ - x = (x & 0xffff) + (x >> 16); - return x; -} - -/* - * computes the checksum of the TCP/UDP pseudo-header - * returns a 16-bit checksum, already complemented. - */ -__sum16 -csum_tcpudp_magic(__be32 saddr, __be32 daddr, __u32 len, - __u8 proto, __wsum sum) -{ - return (__force __sum16)~from64to16( - (__force u64)saddr + (__force u64)daddr + - (__force u64)sum + ((len + proto) << 8)); -} - -EXPORT_SYMBOL(csum_tcpudp_magic); - -__wsum -csum_tcpudp_nofold(__be32 saddr, __be32 daddr, __u32 len, - __u8 proto, __wsum sum) -{ - unsigned long result; - - result = (__force u64)saddr + (__force u64)daddr + - (__force u64)sum + ((len + proto) << 8); - - /* Fold down to 32-bits so we don't lose in the typedef-less network stack. */ - /* 64 to 33 */ - result = (result & 0xffffffff) + (result >> 32); - /* 33 to 32 */ - result = (result & 0xffffffff) + (result >> 32); - return (__force __wsum)result; -} -EXPORT_SYMBOL(csum_tcpudp_nofold); - -extern unsigned long do_csum (const unsigned char *, long); - -/* - * computes the checksum of a memory block at buff, length len, - * and adds in "sum" (32-bit) - * - * returns a 32-bit number suitable for feeding into itself - * or csum_tcpudp_magic - * - * this function must be called with even lengths, except - * for the last fragment, which may be odd - * - * it's best to have buff aligned on a 32-bit boundary - */ -__wsum csum_partial(const void *buff, int len, __wsum sum) -{ - u64 result = do_csum(buff, len); - - /* add in old sum, and carry.. */ - result += (__force u32)sum; - /* 32+c bits -> 32 bits */ - result = (result & 0xffffffff) + (result >> 32); - return (__force __wsum)result; -} - -EXPORT_SYMBOL(csum_partial); - -/* - * this routine is used for miscellaneous IP-like checksums, mainly - * in icmp.c - */ -__sum16 ip_compute_csum (const void *buff, int len) -{ - return (__force __sum16)~do_csum(buff,len); -} - -EXPORT_SYMBOL(ip_compute_csum); diff --git a/arch/ia64/lib/clear_page.S b/arch/ia64/lib/clear_page.S deleted file mode 100644 index ba0dd2538fa5..000000000000 --- a/arch/ia64/lib/clear_page.S +++ /dev/null @@ -1,79 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Copyright (C) 1999-2002 Hewlett-Packard Co - * Stephane Eranian - * David Mosberger-Tang - * Copyright (C) 2002 Ken Chen - * - * 1/06/01 davidm Tuned for Itanium. - * 2/12/02 kchen Tuned for both Itanium and McKinley - * 3/08/02 davidm Some more tweaking - */ - -#include -#include -#include - -#ifdef CONFIG_ITANIUM -# define L3_LINE_SIZE 64 // Itanium L3 line size -# define PREFETCH_LINES 9 // magic number -#else -# define L3_LINE_SIZE 128 // McKinley L3 line size -# define PREFETCH_LINES 12 // magic number -#endif - -#define saved_lc r2 -#define dst_fetch r3 -#define dst1 r8 -#define dst2 r9 -#define dst3 r10 -#define dst4 r11 - -#define dst_last r31 - -GLOBAL_ENTRY(clear_page) - .prologue - .regstk 1,0,0,0 - mov r16 = PAGE_SIZE/L3_LINE_SIZE-1 // main loop count, -1=repeat/until - .save ar.lc, saved_lc - mov saved_lc = ar.lc - - .body - mov ar.lc = (PREFETCH_LINES - 1) - mov dst_fetch = in0 - adds dst1 = 16, in0 - adds dst2 = 32, in0 - ;; -.fetch: stf.spill.nta [dst_fetch] = f0, L3_LINE_SIZE - adds dst3 = 48, in0 // executing this multiple times is harmless - br.cloop.sptk.few .fetch - ;; - addl dst_last = (PAGE_SIZE - PREFETCH_LINES*L3_LINE_SIZE), dst_fetch - mov ar.lc = r16 // one L3 line per iteration - adds dst4 = 64, in0 - ;; -#ifdef CONFIG_ITANIUM - // Optimized for Itanium -1: stf.spill.nta [dst1] = f0, 64 - stf.spill.nta [dst2] = f0, 64 - cmp.lt p8,p0=dst_fetch, dst_last - ;; -#else - // Optimized for McKinley -1: stf.spill.nta [dst1] = f0, 64 - stf.spill.nta [dst2] = f0, 64 - stf.spill.nta [dst3] = f0, 64 - stf.spill.nta [dst4] = f0, 128 - cmp.lt p8,p0=dst_fetch, dst_last - ;; - stf.spill.nta [dst1] = f0, 64 - stf.spill.nta [dst2] = f0, 64 -#endif - stf.spill.nta [dst3] = f0, 64 -(p8) stf.spill.nta [dst_fetch] = f0, L3_LINE_SIZE - br.cloop.sptk.few 1b - ;; - mov ar.lc = saved_lc // restore lc - br.ret.sptk.many rp -END(clear_page) -EXPORT_SYMBOL(clear_page) diff --git a/arch/ia64/lib/clear_user.S b/arch/ia64/lib/clear_user.S deleted file mode 100644 index 1d9e45ccf8e5..000000000000 --- a/arch/ia64/lib/clear_user.S +++ /dev/null @@ -1,212 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * This routine clears to zero a linear memory buffer in user space. - * - * Inputs: - * in0: address of buffer - * in1: length of buffer in bytes - * Outputs: - * r8: number of bytes that didn't get cleared due to a fault - * - * Copyright (C) 1998, 1999, 2001 Hewlett-Packard Co - * Stephane Eranian - */ - -#include -#include - -// -// arguments -// -#define buf r32 -#define len r33 - -// -// local registers -// -#define cnt r16 -#define buf2 r17 -#define saved_lc r18 -#define saved_pfs r19 -#define tmp r20 -#define len2 r21 -#define len3 r22 - -// -// Theory of operations: -// - we check whether or not the buffer is small, i.e., less than 17 -// in which case we do the byte by byte loop. -// -// - Otherwise we go progressively from 1 byte store to 8byte store in -// the head part, the body is a 16byte store loop and we finish we the -// tail for the last 15 bytes. -// The good point about this breakdown is that the long buffer handling -// contains only 2 branches. -// -// The reason for not using shifting & masking for both the head and the -// tail is to stay semantically correct. This routine is not supposed -// to write bytes outside of the buffer. While most of the time this would -// be ok, we can't tolerate a mistake. A classical example is the case -// of multithreaded code were to the extra bytes touched is actually owned -// by another thread which runs concurrently to ours. Another, less likely, -// example is with device drivers where reading an I/O mapped location may -// have side effects (same thing for writing). -// - -GLOBAL_ENTRY(__do_clear_user) - .prologue - .save ar.pfs, saved_pfs - alloc saved_pfs=ar.pfs,2,0,0,0 - cmp.eq p6,p0=r0,len // check for zero length - .save ar.lc, saved_lc - mov saved_lc=ar.lc // preserve ar.lc (slow) - .body - ;; // avoid WAW on CFM - adds tmp=-1,len // br.ctop is repeat/until - mov ret0=len // return value is length at this point -(p6) br.ret.spnt.many rp - ;; - cmp.lt p6,p0=16,len // if len > 16 then long memset - mov ar.lc=tmp // initialize lc for small count -(p6) br.cond.dptk .long_do_clear - ;; // WAR on ar.lc - // - // worst case 16 iterations, avg 8 iterations - // - // We could have played with the predicates to use the extra - // M slot for 2 stores/iteration but the cost the initialization - // the various counters compared to how long the loop is supposed - // to last on average does not make this solution viable. - // -1: - EX( .Lexit1, st1 [buf]=r0,1 ) - adds len=-1,len // countdown length using len - br.cloop.dptk 1b - ;; // avoid RAW on ar.lc - // - // .Lexit4: comes from byte by byte loop - // len contains bytes left -.Lexit1: - mov ret0=len // faster than using ar.lc - mov ar.lc=saved_lc - br.ret.sptk.many rp // end of short clear_user - - - // - // At this point we know we have more than 16 bytes to copy - // so we focus on alignment (no branches required) - // - // The use of len/len2 for countdown of the number of bytes left - // instead of ret0 is due to the fact that the exception code - // changes the values of r8. - // -.long_do_clear: - tbit.nz p6,p0=buf,0 // odd alignment (for long_do_clear) - ;; - EX( .Lexit3, (p6) st1 [buf]=r0,1 ) // 1-byte aligned -(p6) adds len=-1,len;; // sync because buf is modified - tbit.nz p6,p0=buf,1 - ;; - EX( .Lexit3, (p6) st2 [buf]=r0,2 ) // 2-byte aligned -(p6) adds len=-2,len;; - tbit.nz p6,p0=buf,2 - ;; - EX( .Lexit3, (p6) st4 [buf]=r0,4 ) // 4-byte aligned -(p6) adds len=-4,len;; - tbit.nz p6,p0=buf,3 - ;; - EX( .Lexit3, (p6) st8 [buf]=r0,8 ) // 8-byte aligned -(p6) adds len=-8,len;; - shr.u cnt=len,4 // number of 128-bit (2x64bit) words - ;; - cmp.eq p6,p0=r0,cnt - adds tmp=-1,cnt -(p6) br.cond.dpnt .dotail // we have less than 16 bytes left - ;; - adds buf2=8,buf // setup second base pointer - mov ar.lc=tmp - ;; - - // - // 16bytes/iteration core loop - // - // The second store can never generate a fault because - // we come into the loop only when we are 16-byte aligned. - // This means that if we cross a page then it will always be - // in the first store and never in the second. - // - // - // We need to keep track of the remaining length. A possible (optimistic) - // way would be to use ar.lc and derive how many byte were left by - // doing : left= 16*ar.lc + 16. this would avoid the addition at - // every iteration. - // However we need to keep the synchronization point. A template - // M;;MB does not exist and thus we can keep the addition at no - // extra cycle cost (use a nop slot anyway). It also simplifies the - // (unlikely) error recovery code - // - -2: EX(.Lexit3, st8 [buf]=r0,16 ) - ;; // needed to get len correct when error - st8 [buf2]=r0,16 - adds len=-16,len - br.cloop.dptk 2b - ;; - mov ar.lc=saved_lc - // - // tail correction based on len only - // - // We alternate the use of len3,len2 to allow parallelism and correct - // error handling. We also reuse p6/p7 to return correct value. - // The addition of len2/len3 does not cost anything more compared to - // the regular memset as we had empty slots. - // -.dotail: - mov len2=len // for parallelization of error handling - mov len3=len - tbit.nz p6,p0=len,3 - ;; - EX( .Lexit2, (p6) st8 [buf]=r0,8 ) // at least 8 bytes -(p6) adds len3=-8,len2 - tbit.nz p7,p6=len,2 - ;; - EX( .Lexit2, (p7) st4 [buf]=r0,4 ) // at least 4 bytes -(p7) adds len2=-4,len3 - tbit.nz p6,p7=len,1 - ;; - EX( .Lexit2, (p6) st2 [buf]=r0,2 ) // at least 2 bytes -(p6) adds len3=-2,len2 - tbit.nz p7,p6=len,0 - ;; - EX( .Lexit2, (p7) st1 [buf]=r0 ) // only 1 byte left - mov ret0=r0 // success - br.ret.sptk.many rp // end of most likely path - - // - // Outlined error handling code - // - - // - // .Lexit3: comes from core loop, need restore pr/lc - // len contains bytes left - // - // - // .Lexit2: - // if p6 -> coming from st8 or st2 : len2 contains what's left - // if p7 -> coming from st4 or st1 : len3 contains what's left - // We must restore lc/pr even though might not have been used. -.Lexit2: - .pred.rel "mutex", p6, p7 -(p6) mov len=len2 -(p7) mov len=len3 - ;; - // - // .Lexit4: comes from head, need not restore pr/lc - // len contains bytes left - // -.Lexit3: - mov ret0=len - mov ar.lc=saved_lc - br.ret.sptk.many rp -END(__do_clear_user) -EXPORT_SYMBOL(__do_clear_user) diff --git a/arch/ia64/lib/copy_page.S b/arch/ia64/lib/copy_page.S deleted file mode 100644 index c0a0e6b2af00..000000000000 --- a/arch/ia64/lib/copy_page.S +++ /dev/null @@ -1,101 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * - * Optimized version of the standard copy_page() function - * - * Inputs: - * in0: address of target page - * in1: address of source page - * Output: - * no return value - * - * Copyright (C) 1999, 2001 Hewlett-Packard Co - * Stephane Eranian - * David Mosberger - * - * 4/06/01 davidm Tuned to make it perform well both for cached and uncached copies. - */ -#include -#include -#include - -#define PIPE_DEPTH 3 -#define EPI p[PIPE_DEPTH-1] - -#define lcount r16 -#define saved_pr r17 -#define saved_lc r18 -#define saved_pfs r19 -#define src1 r20 -#define src2 r21 -#define tgt1 r22 -#define tgt2 r23 -#define srcf r24 -#define tgtf r25 -#define tgt_last r26 - -#define Nrot ((8*PIPE_DEPTH+7)&~7) - -GLOBAL_ENTRY(copy_page) - .prologue - .save ar.pfs, saved_pfs - alloc saved_pfs=ar.pfs,3,Nrot-3,0,Nrot - - .rotr t1[PIPE_DEPTH], t2[PIPE_DEPTH], t3[PIPE_DEPTH], t4[PIPE_DEPTH], \ - t5[PIPE_DEPTH], t6[PIPE_DEPTH], t7[PIPE_DEPTH], t8[PIPE_DEPTH] - .rotp p[PIPE_DEPTH] - - .save ar.lc, saved_lc - mov saved_lc=ar.lc - mov ar.ec=PIPE_DEPTH - - mov lcount=PAGE_SIZE/64-1 - .save pr, saved_pr - mov saved_pr=pr - mov pr.rot=1<<16 - - .body - - mov src1=in1 - adds src2=8,in1 - mov tgt_last = PAGE_SIZE - ;; - adds tgt2=8,in0 - add srcf=512,in1 - mov ar.lc=lcount - mov tgt1=in0 - add tgtf=512,in0 - add tgt_last = tgt_last, in0 - ;; -1: -(p[0]) ld8 t1[0]=[src1],16 -(EPI) st8 [tgt1]=t1[PIPE_DEPTH-1],16 -(p[0]) ld8 t2[0]=[src2],16 -(EPI) st8 [tgt2]=t2[PIPE_DEPTH-1],16 - cmp.ltu p6,p0 = tgtf, tgt_last - ;; -(p[0]) ld8 t3[0]=[src1],16 -(EPI) st8 [tgt1]=t3[PIPE_DEPTH-1],16 -(p[0]) ld8 t4[0]=[src2],16 -(EPI) st8 [tgt2]=t4[PIPE_DEPTH-1],16 - ;; -(p[0]) ld8 t5[0]=[src1],16 -(EPI) st8 [tgt1]=t5[PIPE_DEPTH-1],16 -(p[0]) ld8 t6[0]=[src2],16 -(EPI) st8 [tgt2]=t6[PIPE_DEPTH-1],16 - ;; -(p[0]) ld8 t7[0]=[src1],16 -(EPI) st8 [tgt1]=t7[PIPE_DEPTH-1],16 -(p[0]) ld8 t8[0]=[src2],16 -(EPI) st8 [tgt2]=t8[PIPE_DEPTH-1],16 - -(p6) lfetch [srcf], 64 -(p6) lfetch [tgtf], 64 - br.ctop.sptk.few 1b - ;; - mov pr=saved_pr,0xffffffffffff0000 // restore predicates - mov ar.pfs=saved_pfs - mov ar.lc=saved_lc - br.ret.sptk.many rp -END(copy_page) -EXPORT_SYMBOL(copy_page) diff --git a/arch/ia64/lib/copy_page_mck.S b/arch/ia64/lib/copy_page_mck.S deleted file mode 100644 index 5e8bb4b4b535..000000000000 --- a/arch/ia64/lib/copy_page_mck.S +++ /dev/null @@ -1,188 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * McKinley-optimized version of copy_page(). - * - * Copyright (C) 2002 Hewlett-Packard Co - * David Mosberger - * - * Inputs: - * in0: address of target page - * in1: address of source page - * Output: - * no return value - * - * General idea: - * - use regular loads and stores to prefetch data to avoid consuming M-slot just for - * lfetches => good for in-cache performance - * - avoid l2 bank-conflicts by not storing into the same 16-byte bank within a single - * cycle - * - * Principle of operation: - * First, note that L1 has a line-size of 64 bytes and L2 a line-size of 128 bytes. - * To avoid secondary misses in L2, we prefetch both source and destination with a line-size - * of 128 bytes. When both of these lines are in the L2 and the first half of the - * source line is in L1, we start copying the remaining words. The second half of the - * source line is prefetched in an earlier iteration, so that by the time we start - * accessing it, it's also present in the L1. - * - * We use a software-pipelined loop to control the overall operation. The pipeline - * has 2*PREFETCH_DIST+K stages. The first PREFETCH_DIST stages are used for prefetching - * source cache-lines. The second PREFETCH_DIST stages are used for prefetching destination - * cache-lines, the last K stages are used to copy the cache-line words not copied by - * the prefetches. The four relevant points in the pipelined are called A, B, C, D: - * p[A] is TRUE if a source-line should be prefetched, p[B] is TRUE if a destination-line - * should be prefetched, p[C] is TRUE if the second half of an L2 line should be brought - * into L1D and p[D] is TRUE if a cacheline needs to be copied. - * - * This all sounds very complicated, but thanks to the modulo-scheduled loop support, - * the resulting code is very regular and quite easy to follow (once you get the idea). - * - * As a secondary optimization, the first 2*PREFETCH_DIST iterations are implemented - * as the separate .prefetch_loop. Logically, this loop performs exactly like the - * main-loop (.line_copy), but has all known-to-be-predicated-off instructions removed, - * so that each loop iteration is faster (again, good for cached case). - * - * When reading the code, it helps to keep the following picture in mind: - * - * word 0 word 1 - * +------+------+--- - * | v[x] | t1 | ^ - * | t2 | t3 | | - * | t4 | t5 | | - * | t6 | t7 | | 128 bytes - * | n[y] | t9 | | (L2 cache line) - * | t10 | t11 | | - * | t12 | t13 | | - * | t14 | t15 | v - * +------+------+--- - * - * Here, v[x] is copied by the (memory) prefetch. n[y] is loaded at p[C] - * to fetch the second-half of the L2 cache line into L1, and the tX words are copied in - * an order that avoids bank conflicts. - */ -#include -#include -#include - -#define PREFETCH_DIST 8 // McKinley sustains 16 outstanding L2 misses (8 ld, 8 st) - -#define src0 r2 -#define src1 r3 -#define dst0 r9 -#define dst1 r10 -#define src_pre_mem r11 -#define dst_pre_mem r14 -#define src_pre_l2 r15 -#define dst_pre_l2 r16 -#define t1 r17 -#define t2 r18 -#define t3 r19 -#define t4 r20 -#define t5 t1 // alias! -#define t6 t2 // alias! -#define t7 t3 // alias! -#define t9 t5 // alias! -#define t10 t4 // alias! -#define t11 t7 // alias! -#define t12 t6 // alias! -#define t14 t10 // alias! -#define t13 r21 -#define t15 r22 - -#define saved_lc r23 -#define saved_pr r24 - -#define A 0 -#define B (PREFETCH_DIST) -#define C (B + PREFETCH_DIST) -#define D (C + 3) -#define N (D + 1) -#define Nrot ((N + 7) & ~7) - -GLOBAL_ENTRY(copy_page) - .prologue - alloc r8 = ar.pfs, 2, Nrot-2, 0, Nrot - - .rotr v[2*PREFETCH_DIST], n[D-C+1] - .rotp p[N] - - .save ar.lc, saved_lc - mov saved_lc = ar.lc - .save pr, saved_pr - mov saved_pr = pr - .body - - mov src_pre_mem = in1 - mov pr.rot = 0x10000 - mov ar.ec = 1 // special unrolled loop - - mov dst_pre_mem = in0 - mov ar.lc = 2*PREFETCH_DIST - 1 - - add src_pre_l2 = 8*8, in1 - add dst_pre_l2 = 8*8, in0 - add src0 = 8, in1 // first t1 src - add src1 = 3*8, in1 // first t3 src - add dst0 = 8, in0 // first t1 dst - add dst1 = 3*8, in0 // first t3 dst - mov t1 = (PAGE_SIZE/128) - (2*PREFETCH_DIST) - 1 - nop.m 0 - nop.i 0 - ;; - // same as .line_copy loop, but with all predicated-off instructions removed: -.prefetch_loop: -(p[A]) ld8 v[A] = [src_pre_mem], 128 // M0 -(p[B]) st8 [dst_pre_mem] = v[B], 128 // M2 - br.ctop.sptk .prefetch_loop - ;; - cmp.eq p16, p0 = r0, r0 // reset p16 to 1 (br.ctop cleared it to zero) - mov ar.lc = t1 // with 64KB pages, t1 is too big to fit in 8 bits! - mov ar.ec = N // # of stages in pipeline - ;; -.line_copy: -(p[D]) ld8 t2 = [src0], 3*8 // M0 -(p[D]) ld8 t4 = [src1], 3*8 // M1 -(p[B]) st8 [dst_pre_mem] = v[B], 128 // M2 prefetch dst from memory -(p[D]) st8 [dst_pre_l2] = n[D-C], 128 // M3 prefetch dst from L2 - ;; -(p[A]) ld8 v[A] = [src_pre_mem], 128 // M0 prefetch src from memory -(p[C]) ld8 n[0] = [src_pre_l2], 128 // M1 prefetch src from L2 -(p[D]) st8 [dst0] = t1, 8 // M2 -(p[D]) st8 [dst1] = t3, 8 // M3 - ;; -(p[D]) ld8 t5 = [src0], 8 -(p[D]) ld8 t7 = [src1], 3*8 -(p[D]) st8 [dst0] = t2, 3*8 -(p[D]) st8 [dst1] = t4, 3*8 - ;; -(p[D]) ld8 t6 = [src0], 3*8 -(p[D]) ld8 t10 = [src1], 8 -(p[D]) st8 [dst0] = t5, 8 -(p[D]) st8 [dst1] = t7, 3*8 - ;; -(p[D]) ld8 t9 = [src0], 3*8 -(p[D]) ld8 t11 = [src1], 3*8 -(p[D]) st8 [dst0] = t6, 3*8 -(p[D]) st8 [dst1] = t10, 8 - ;; -(p[D]) ld8 t12 = [src0], 8 -(p[D]) ld8 t14 = [src1], 8 -(p[D]) st8 [dst0] = t9, 3*8 -(p[D]) st8 [dst1] = t11, 3*8 - ;; -(p[D]) ld8 t13 = [src0], 4*8 -(p[D]) ld8 t15 = [src1], 4*8 -(p[D]) st8 [dst0] = t12, 8 -(p[D]) st8 [dst1] = t14, 8 - ;; -(p[D-1])ld8 t1 = [src0], 8 -(p[D-1])ld8 t3 = [src1], 8 -(p[D]) st8 [dst0] = t13, 4*8 -(p[D]) st8 [dst1] = t15, 4*8 - br.ctop.sptk .line_copy - ;; - mov ar.lc = saved_lc - mov pr = saved_pr, -1 - br.ret.sptk.many rp -END(copy_page) -EXPORT_SYMBOL(copy_page) diff --git a/arch/ia64/lib/copy_user.S b/arch/ia64/lib/copy_user.S deleted file mode 100644 index 8daab72cfe77..000000000000 --- a/arch/ia64/lib/copy_user.S +++ /dev/null @@ -1,613 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * - * Optimized version of the copy_user() routine. - * It is used to copy date across the kernel/user boundary. - * - * The source and destination are always on opposite side of - * the boundary. When reading from user space we must catch - * faults on loads. When writing to user space we must catch - * errors on stores. Note that because of the nature of the copy - * we don't need to worry about overlapping regions. - * - * - * Inputs: - * in0 address of source buffer - * in1 address of destination buffer - * in2 number of bytes to copy - * - * Outputs: - * ret0 0 in case of success. The number of bytes NOT copied in - * case of error. - * - * Copyright (C) 2000-2001 Hewlett-Packard Co - * Stephane Eranian - * - * Fixme: - * - handle the case where we have more than 16 bytes and the alignment - * are different. - * - more benchmarking - * - fix extraneous stop bit introduced by the EX() macro. - */ - -#include -#include - -// -// Tuneable parameters -// -#define COPY_BREAK 16 // we do byte copy below (must be >=16) -#define PIPE_DEPTH 21 // pipe depth - -#define EPI p[PIPE_DEPTH-1] - -// -// arguments -// -#define dst in0 -#define src in1 -#define len in2 - -// -// local registers -// -#define t1 r2 // rshift in bytes -#define t2 r3 // lshift in bytes -#define rshift r14 // right shift in bits -#define lshift r15 // left shift in bits -#define word1 r16 -#define word2 r17 -#define cnt r18 -#define len2 r19 -#define saved_lc r20 -#define saved_pr r21 -#define tmp r22 -#define val r23 -#define src1 r24 -#define dst1 r25 -#define src2 r26 -#define dst2 r27 -#define len1 r28 -#define enddst r29 -#define endsrc r30 -#define saved_pfs r31 - -GLOBAL_ENTRY(__copy_user) - .prologue - .save ar.pfs, saved_pfs - alloc saved_pfs=ar.pfs,3,((2*PIPE_DEPTH+7)&~7),0,((2*PIPE_DEPTH+7)&~7) - - .rotr val1[PIPE_DEPTH],val2[PIPE_DEPTH] - .rotp p[PIPE_DEPTH] - - adds len2=-1,len // br.ctop is repeat/until - mov ret0=r0 - - ;; // RAW of cfm when len=0 - cmp.eq p8,p0=r0,len // check for zero length - .save ar.lc, saved_lc - mov saved_lc=ar.lc // preserve ar.lc (slow) -(p8) br.ret.spnt.many rp // empty mempcy() - ;; - add enddst=dst,len // first byte after end of source - add endsrc=src,len // first byte after end of destination - .save pr, saved_pr - mov saved_pr=pr // preserve predicates - - .body - - mov dst1=dst // copy because of rotation - mov ar.ec=PIPE_DEPTH - mov pr.rot=1<<16 // p16=true all others are false - - mov src1=src // copy because of rotation - mov ar.lc=len2 // initialize lc for small count - cmp.lt p10,p7=COPY_BREAK,len // if len > COPY_BREAK then long copy - - xor tmp=src,dst // same alignment test prepare -(p10) br.cond.dptk .long_copy_user - ;; // RAW pr.rot/p16 ? - // - // Now we do the byte by byte loop with software pipeline - // - // p7 is necessarily false by now -1: - EX(.failure_in_pipe1,(p16) ld1 val1[0]=[src1],1) - EX(.failure_out,(EPI) st1 [dst1]=val1[PIPE_DEPTH-1],1) - br.ctop.dptk.few 1b - ;; - mov ar.lc=saved_lc - mov pr=saved_pr,0xffffffffffff0000 - mov ar.pfs=saved_pfs // restore ar.ec - br.ret.sptk.many rp // end of short memcpy - - // - // Not 8-byte aligned - // -.diff_align_copy_user: - // At this point we know we have more than 16 bytes to copy - // and also that src and dest do _not_ have the same alignment. - and src2=0x7,src1 // src offset - and dst2=0x7,dst1 // dst offset - ;; - // The basic idea is that we copy byte-by-byte at the head so - // that we can reach 8-byte alignment for both src1 and dst1. - // Then copy the body using software pipelined 8-byte copy, - // shifting the two back-to-back words right and left, then copy - // the tail by copying byte-by-byte. - // - // Fault handling. If the byte-by-byte at the head fails on the - // load, then restart and finish the pipleline by copying zeros - // to the dst1. Then copy zeros for the rest of dst1. - // If 8-byte software pipeline fails on the load, do the same as - // failure_in3 does. If the byte-by-byte at the tail fails, it is - // handled simply by failure_in_pipe1. - // - // The case p14 represents the source has more bytes in the - // the first word (by the shifted part), whereas the p15 needs to - // copy some bytes from the 2nd word of the source that has the - // tail of the 1st of the destination. - // - - // - // Optimization. If dst1 is 8-byte aligned (quite common), we don't need - // to copy the head to dst1, to start 8-byte copy software pipeline. - // We know src1 is not 8-byte aligned in this case. - // - cmp.eq p14,p15=r0,dst2 -(p15) br.cond.spnt 1f - ;; - sub t1=8,src2 - mov t2=src2 - ;; - shl rshift=t2,3 - sub len1=len,t1 // set len1 - ;; - sub lshift=64,rshift - ;; - br.cond.spnt .word_copy_user - ;; -1: - cmp.leu p14,p15=src2,dst2 - sub t1=dst2,src2 - ;; - .pred.rel "mutex", p14, p15 -(p14) sub word1=8,src2 // (8 - src offset) -(p15) sub t1=r0,t1 // absolute value -(p15) sub word1=8,dst2 // (8 - dst offset) - ;; - // For the case p14, we don't need to copy the shifted part to - // the 1st word of destination. - sub t2=8,t1 -(p14) sub word1=word1,t1 - ;; - sub len1=len,word1 // resulting len -(p15) shl rshift=t1,3 // in bits -(p14) shl rshift=t2,3 - ;; -(p14) sub len1=len1,t1 - adds cnt=-1,word1 - ;; - sub lshift=64,rshift - mov ar.ec=PIPE_DEPTH - mov pr.rot=1<<16 // p16=true all others are false - mov ar.lc=cnt - ;; -2: - EX(.failure_in_pipe2,(p16) ld1 val1[0]=[src1],1) - EX(.failure_out,(EPI) st1 [dst1]=val1[PIPE_DEPTH-1],1) - br.ctop.dptk.few 2b - ;; - clrrrb - ;; -.word_copy_user: - cmp.gtu p9,p0=16,len1 -(p9) br.cond.spnt 4f // if (16 > len1) skip 8-byte copy - ;; - shr.u cnt=len1,3 // number of 64-bit words - ;; - adds cnt=-1,cnt - ;; - .pred.rel "mutex", p14, p15 -(p14) sub src1=src1,t2 -(p15) sub src1=src1,t1 - // - // Now both src1 and dst1 point to an 8-byte aligned address. And - // we have more than 8 bytes to copy. - // - mov ar.lc=cnt - mov ar.ec=PIPE_DEPTH - mov pr.rot=1<<16 // p16=true all others are false - ;; -3: - // - // The pipleline consists of 3 stages: - // 1 (p16): Load a word from src1 - // 2 (EPI_1): Shift right pair, saving to tmp - // 3 (EPI): Store tmp to dst1 - // - // To make it simple, use at least 2 (p16) loops to set up val1[n] - // because we need 2 back-to-back val1[] to get tmp. - // Note that this implies EPI_2 must be p18 or greater. - // - -#define EPI_1 p[PIPE_DEPTH-2] -#define SWITCH(pred, shift) cmp.eq pred,p0=shift,rshift -#define CASE(pred, shift) \ - (pred) br.cond.spnt .copy_user_bit##shift -#define BODY(rshift) \ -.copy_user_bit##rshift: \ -1: \ - EX(.failure_out,(EPI) st8 [dst1]=tmp,8); \ -(EPI_1) shrp tmp=val1[PIPE_DEPTH-2],val1[PIPE_DEPTH-1],rshift; \ - EX(3f,(p16) ld8 val1[1]=[src1],8); \ -(p16) mov val1[0]=r0; \ - br.ctop.dptk 1b; \ - ;; \ - br.cond.sptk.many .diff_align_do_tail; \ -2: \ -(EPI) st8 [dst1]=tmp,8; \ -(EPI_1) shrp tmp=val1[PIPE_DEPTH-2],val1[PIPE_DEPTH-1],rshift; \ -3: \ -(p16) mov val1[1]=r0; \ -(p16) mov val1[0]=r0; \ - br.ctop.dptk 2b; \ - ;; \ - br.cond.sptk.many .failure_in2 - - // - // Since the instruction 'shrp' requires a fixed 128-bit value - // specifying the bits to shift, we need to provide 7 cases - // below. - // - SWITCH(p6, 8) - SWITCH(p7, 16) - SWITCH(p8, 24) - SWITCH(p9, 32) - SWITCH(p10, 40) - SWITCH(p11, 48) - SWITCH(p12, 56) - ;; - CASE(p6, 8) - CASE(p7, 16) - CASE(p8, 24) - CASE(p9, 32) - CASE(p10, 40) - CASE(p11, 48) - CASE(p12, 56) - ;; - BODY(8) - BODY(16) - BODY(24) - BODY(32) - BODY(40) - BODY(48) - BODY(56) - ;; -.diff_align_do_tail: - .pred.rel "mutex", p14, p15 -(p14) sub src1=src1,t1 -(p14) adds dst1=-8,dst1 -(p15) sub dst1=dst1,t1 - ;; -4: - // Tail correction. - // - // The problem with this piplelined loop is that the last word is not - // loaded and thus parf of the last word written is not correct. - // To fix that, we simply copy the tail byte by byte. - - sub len1=endsrc,src1,1 - clrrrb - ;; - mov ar.ec=PIPE_DEPTH - mov pr.rot=1<<16 // p16=true all others are false - mov ar.lc=len1 - ;; -5: - EX(.failure_in_pipe1,(p16) ld1 val1[0]=[src1],1) - EX(.failure_out,(EPI) st1 [dst1]=val1[PIPE_DEPTH-1],1) - br.ctop.dptk.few 5b - ;; - mov ar.lc=saved_lc - mov pr=saved_pr,0xffffffffffff0000 - mov ar.pfs=saved_pfs - br.ret.sptk.many rp - - // - // Beginning of long mempcy (i.e. > 16 bytes) - // -.long_copy_user: - tbit.nz p6,p7=src1,0 // odd alignment - and tmp=7,tmp - ;; - cmp.eq p10,p8=r0,tmp - mov len1=len // copy because of rotation -(p8) br.cond.dpnt .diff_align_copy_user - ;; - // At this point we know we have more than 16 bytes to copy - // and also that both src and dest have the same alignment - // which may not be the one we want. So for now we must move - // forward slowly until we reach 16byte alignment: no need to - // worry about reaching the end of buffer. - // - EX(.failure_in1,(p6) ld1 val1[0]=[src1],1) // 1-byte aligned -(p6) adds len1=-1,len1;; - tbit.nz p7,p0=src1,1 - ;; - EX(.failure_in1,(p7) ld2 val1[1]=[src1],2) // 2-byte aligned -(p7) adds len1=-2,len1;; - tbit.nz p8,p0=src1,2 - ;; - // - // Stop bit not required after ld4 because if we fail on ld4 - // we have never executed the ld1, therefore st1 is not executed. - // - EX(.failure_in1,(p8) ld4 val2[0]=[src1],4) // 4-byte aligned - ;; - EX(.failure_out,(p6) st1 [dst1]=val1[0],1) - tbit.nz p9,p0=src1,3 - ;; - // - // Stop bit not required after ld8 because if we fail on ld8 - // we have never executed the ld2, therefore st2 is not executed. - // - EX(.failure_in1,(p9) ld8 val2[1]=[src1],8) // 8-byte aligned - EX(.failure_out,(p7) st2 [dst1]=val1[1],2) -(p8) adds len1=-4,len1 - ;; - EX(.failure_out, (p8) st4 [dst1]=val2[0],4) -(p9) adds len1=-8,len1;; - shr.u cnt=len1,4 // number of 128-bit (2x64bit) words - ;; - EX(.failure_out, (p9) st8 [dst1]=val2[1],8) - tbit.nz p6,p0=len1,3 - cmp.eq p7,p0=r0,cnt - adds tmp=-1,cnt // br.ctop is repeat/until -(p7) br.cond.dpnt .dotail // we have less than 16 bytes left - ;; - adds src2=8,src1 - adds dst2=8,dst1 - mov ar.lc=tmp - ;; - // - // 16bytes/iteration - // -2: - EX(.failure_in3,(p16) ld8 val1[0]=[src1],16) -(p16) ld8 val2[0]=[src2],16 - - EX(.failure_out, (EPI) st8 [dst1]=val1[PIPE_DEPTH-1],16) -(EPI) st8 [dst2]=val2[PIPE_DEPTH-1],16 - br.ctop.dptk 2b - ;; // RAW on src1 when fall through from loop - // - // Tail correction based on len only - // - // No matter where we come from (loop or test) the src1 pointer - // is 16 byte aligned AND we have less than 16 bytes to copy. - // -.dotail: - EX(.failure_in1,(p6) ld8 val1[0]=[src1],8) // at least 8 bytes - tbit.nz p7,p0=len1,2 - ;; - EX(.failure_in1,(p7) ld4 val1[1]=[src1],4) // at least 4 bytes - tbit.nz p8,p0=len1,1 - ;; - EX(.failure_in1,(p8) ld2 val2[0]=[src1],2) // at least 2 bytes - tbit.nz p9,p0=len1,0 - ;; - EX(.failure_out, (p6) st8 [dst1]=val1[0],8) - ;; - EX(.failure_in1,(p9) ld1 val2[1]=[src1]) // only 1 byte left - mov ar.lc=saved_lc - ;; - EX(.failure_out,(p7) st4 [dst1]=val1[1],4) - mov pr=saved_pr,0xffffffffffff0000 - ;; - EX(.failure_out, (p8) st2 [dst1]=val2[0],2) - mov ar.pfs=saved_pfs - ;; - EX(.failure_out, (p9) st1 [dst1]=val2[1]) - br.ret.sptk.many rp - - - // - // Here we handle the case where the byte by byte copy fails - // on the load. - // Several factors make the zeroing of the rest of the buffer kind of - // tricky: - // - the pipeline: loads/stores are not in sync (pipeline) - // - // In the same loop iteration, the dst1 pointer does not directly - // reflect where the faulty load was. - // - // - pipeline effect - // When you get a fault on load, you may have valid data from - // previous loads not yet store in transit. Such data must be - // store normally before moving onto zeroing the rest. - // - // - single/multi dispersal independence. - // - // solution: - // - we don't disrupt the pipeline, i.e. data in transit in - // the software pipeline will be eventually move to memory. - // We simply replace the load with a simple mov and keep the - // pipeline going. We can't really do this inline because - // p16 is always reset to 1 when lc > 0. - // -.failure_in_pipe1: - sub ret0=endsrc,src1 // number of bytes to zero, i.e. not copied -1: -(p16) mov val1[0]=r0 -(EPI) st1 [dst1]=val1[PIPE_DEPTH-1],1 - br.ctop.dptk 1b - ;; - mov pr=saved_pr,0xffffffffffff0000 - mov ar.lc=saved_lc - mov ar.pfs=saved_pfs - br.ret.sptk.many rp - - // - // This is the case where the byte by byte copy fails on the load - // when we copy the head. We need to finish the pipeline and copy - // zeros for the rest of the destination. Since this happens - // at the top we still need to fill the body and tail. -.failure_in_pipe2: - sub ret0=endsrc,src1 // number of bytes to zero, i.e. not copied -2: -(p16) mov val1[0]=r0 -(EPI) st1 [dst1]=val1[PIPE_DEPTH-1],1 - br.ctop.dptk 2b - ;; - sub len=enddst,dst1,1 // precompute len - br.cond.dptk.many .failure_in1bis - ;; - - // - // Here we handle the head & tail part when we check for alignment. - // The following code handles only the load failures. The - // main diffculty comes from the fact that loads/stores are - // scheduled. So when you fail on a load, the stores corresponding - // to previous successful loads must be executed. - // - // However some simplifications are possible given the way - // things work. - // - // 1) HEAD - // Theory of operation: - // - // Page A | Page B - // ---------|----- - // 1|8 x - // 1 2|8 x - // 4|8 x - // 1 4|8 x - // 2 4|8 x - // 1 2 4|8 x - // |1 - // |2 x - // |4 x - // - // page_size >= 4k (2^12). (x means 4, 2, 1) - // Here we suppose Page A exists and Page B does not. - // - // As we move towards eight byte alignment we may encounter faults. - // The numbers on each page show the size of the load (current alignment). - // - // Key point: - // - if you fail on 1, 2, 4 then you have never executed any smaller - // size loads, e.g. failing ld4 means no ld1 nor ld2 executed - // before. - // - // This allows us to simplify the cleanup code, because basically you - // only have to worry about "pending" stores in the case of a failing - // ld8(). Given the way the code is written today, this means only - // worry about st2, st4. There we can use the information encapsulated - // into the predicates. - // - // Other key point: - // - if you fail on the ld8 in the head, it means you went straight - // to it, i.e. 8byte alignment within an unexisting page. - // Again this comes from the fact that if you crossed just for the ld8 then - // you are 8byte aligned but also 16byte align, therefore you would - // either go for the 16byte copy loop OR the ld8 in the tail part. - // The combination ld1, ld2, ld4, ld8 where you fail on ld8 is impossible - // because it would mean you had 15bytes to copy in which case you - // would have defaulted to the byte by byte copy. - // - // - // 2) TAIL - // Here we now we have less than 16 bytes AND we are either 8 or 16 byte - // aligned. - // - // Key point: - // This means that we either: - // - are right on a page boundary - // OR - // - are at more than 16 bytes from a page boundary with - // at most 15 bytes to copy: no chance of crossing. - // - // This allows us to assume that if we fail on a load we haven't possibly - // executed any of the previous (tail) ones, so we don't need to do - // any stores. For instance, if we fail on ld2, this means we had - // 2 or 3 bytes left to copy and we did not execute the ld8 nor ld4. - // - // This means that we are in a situation similar the a fault in the - // head part. That's nice! - // -.failure_in1: - sub ret0=endsrc,src1 // number of bytes to zero, i.e. not copied - sub len=endsrc,src1,1 - // - // we know that ret0 can never be zero at this point - // because we failed why trying to do a load, i.e. there is still - // some work to do. - // The failure_in1bis and length problem is taken care of at the - // calling side. - // - ;; -.failure_in1bis: // from (.failure_in3) - mov ar.lc=len // Continue with a stupid byte store. - ;; -5: - st1 [dst1]=r0,1 - br.cloop.dptk 5b - ;; - mov pr=saved_pr,0xffffffffffff0000 - mov ar.lc=saved_lc - mov ar.pfs=saved_pfs - br.ret.sptk.many rp - - // - // Here we simply restart the loop but instead - // of doing loads we fill the pipeline with zeroes - // We can't simply store r0 because we may have valid - // data in transit in the pipeline. - // ar.lc and ar.ec are setup correctly at this point - // - // we MUST use src1/endsrc here and not dst1/enddst because - // of the pipeline effect. - // -.failure_in3: - sub ret0=endsrc,src1 // number of bytes to zero, i.e. not copied - ;; -2: -(p16) mov val1[0]=r0 -(p16) mov val2[0]=r0 -(EPI) st8 [dst1]=val1[PIPE_DEPTH-1],16 -(EPI) st8 [dst2]=val2[PIPE_DEPTH-1],16 - br.ctop.dptk 2b - ;; - cmp.ne p6,p0=dst1,enddst // Do we need to finish the tail ? - sub len=enddst,dst1,1 // precompute len -(p6) br.cond.dptk .failure_in1bis - ;; - mov pr=saved_pr,0xffffffffffff0000 - mov ar.lc=saved_lc - mov ar.pfs=saved_pfs - br.ret.sptk.many rp - -.failure_in2: - sub ret0=endsrc,src1 - cmp.ne p6,p0=dst1,enddst // Do we need to finish the tail ? - sub len=enddst,dst1,1 // precompute len -(p6) br.cond.dptk .failure_in1bis - ;; - mov pr=saved_pr,0xffffffffffff0000 - mov ar.lc=saved_lc - mov ar.pfs=saved_pfs - br.ret.sptk.many rp - - // - // handling of failures on stores: that's the easy part - // -.failure_out: - sub ret0=enddst,dst1 - mov pr=saved_pr,0xffffffffffff0000 - mov ar.lc=saved_lc - - mov ar.pfs=saved_pfs - br.ret.sptk.many rp -END(__copy_user) -EXPORT_SYMBOL(__copy_user) diff --git a/arch/ia64/lib/csum_partial_copy.c b/arch/ia64/lib/csum_partial_copy.c deleted file mode 100644 index 917e3138b277..000000000000 --- a/arch/ia64/lib/csum_partial_copy.c +++ /dev/null @@ -1,98 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Network Checksum & Copy routine - * - * Copyright (C) 1999, 2003-2004 Hewlett-Packard Co - * Stephane Eranian - * - * Most of the code has been imported from Linux/Alpha - */ - -#include -#include -#include - -#include - -/* - * XXX Fixme: those 2 inlines are meant for debugging and will go away - */ -static inline unsigned -short from64to16(unsigned long x) -{ - /* add up 32-bit words for 33 bits */ - x = (x & 0xffffffff) + (x >> 32); - /* add up 16-bit and 17-bit words for 17+c bits */ - x = (x & 0xffff) + (x >> 16); - /* add up 16-bit and 2-bit for 16+c bit */ - x = (x & 0xffff) + (x >> 16); - /* add up carry.. */ - x = (x & 0xffff) + (x >> 16); - return x; -} - -static inline -unsigned long do_csum_c(const unsigned char * buff, int len, unsigned int psum) -{ - int odd, count; - unsigned long result = (unsigned long)psum; - - if (len <= 0) - goto out; - odd = 1 & (unsigned long) buff; - if (odd) { - result = *buff << 8; - len--; - buff++; - } - count = len >> 1; /* nr of 16-bit words.. */ - if (count) { - if (2 & (unsigned long) buff) { - result += *(unsigned short *) buff; - count--; - len -= 2; - buff += 2; - } - count >>= 1; /* nr of 32-bit words.. */ - if (count) { - if (4 & (unsigned long) buff) { - result += *(unsigned int *) buff; - count--; - len -= 4; - buff += 4; - } - count >>= 1; /* nr of 64-bit words.. */ - if (count) { - unsigned long carry = 0; - do { - unsigned long w = *(unsigned long *) buff; - count--; - buff += 8; - result += carry; - result += w; - carry = (w > result); - } while (count); - result += carry; - result = (result & 0xffffffff) + (result >> 32); - } - if (len & 4) { - result += *(unsigned int *) buff; - buff += 4; - } - } - if (len & 2) { - result += *(unsigned short *) buff; - buff += 2; - } - } - if (len & 1) - result += *buff; - - result = from64to16(result); - - if (odd) - result = ((result >> 8) & 0xff) | ((result & 0xff) << 8); - -out: - return result; -} diff --git a/arch/ia64/lib/do_csum.S b/arch/ia64/lib/do_csum.S deleted file mode 100644 index 6004dad2597c..000000000000 --- a/arch/ia64/lib/do_csum.S +++ /dev/null @@ -1,324 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * - * Optmized version of the standard do_csum() function - * - * Return: a 64bit quantity containing the 16bit Internet checksum - * - * Inputs: - * in0: address of buffer to checksum (char *) - * in1: length of the buffer (int) - * - * Copyright (C) 1999, 2001-2002 Hewlett-Packard Co - * Stephane Eranian - * - * 02/04/22 Ken Chen - * Data locality study on the checksum buffer. - * More optimization cleanup - remove excessive stop bits. - * 02/04/08 David Mosberger - * More cleanup and tuning. - * 01/04/18 Jun Nakajima - * Clean up and optimize and the software pipeline, loading two - * back-to-back 8-byte words per loop. Clean up the initialization - * for the loop. Support the cases where load latency = 1 or 2. - * Set CONFIG_IA64_LOAD_LATENCY to 1 or 2 (default). - */ - -#include - -// -// Theory of operations: -// The goal is to go as quickly as possible to the point where -// we can checksum 16 bytes/loop. Before reaching that point we must -// take care of incorrect alignment of first byte. -// -// The code hereafter also takes care of the "tail" part of the buffer -// before entering the core loop, if any. The checksum is a sum so it -// allows us to commute operations. So we do the "head" and "tail" -// first to finish at full speed in the body. Once we get the head and -// tail values, we feed them into the pipeline, very handy initialization. -// -// Of course we deal with the special case where the whole buffer fits -// into one 8 byte word. In this case we have only one entry in the pipeline. -// -// We use a (LOAD_LATENCY+2)-stage pipeline in the loop to account for -// possible load latency and also to accommodate for head and tail. -// -// The end of the function deals with folding the checksum from 64bits -// down to 16bits taking care of the carry. -// -// This version avoids synchronization in the core loop by also using a -// pipeline for the accumulation of the checksum in resultx[] (x=1,2). -// -// wordx[] (x=1,2) -// |---| -// | | 0 : new value loaded in pipeline -// |---| -// | | - : in transit data -// |---| -// | | LOAD_LATENCY : current value to add to checksum -// |---| -// | | LOAD_LATENCY+1 : previous value added to checksum -// |---| (previous iteration) -// -// resultx[] (x=1,2) -// |---| -// | | 0 : initial value -// |---| -// | | LOAD_LATENCY-1 : new checksum -// |---| -// | | LOAD_LATENCY : previous value of checksum -// |---| -// | | LOAD_LATENCY+1 : final checksum when out of the loop -// |---| -// -// -// See RFC1071 "Computing the Internet Checksum" for various techniques for -// calculating the Internet checksum. -// -// NOT YET DONE: -// - Maybe another algorithm which would take care of the folding at the -// end in a different manner -// - Work with people more knowledgeable than me on the network stack -// to figure out if we could not split the function depending on the -// type of packet or alignment we get. Like the ip_fast_csum() routine -// where we know we have at least 20bytes worth of data to checksum. -// - Do a better job of handling small packets. -// - Note on prefetching: it was found that under various load, i.e. ftp read/write, -// nfs read/write, the L1 cache hit rate is at 60% and L2 cache hit rate is at 99.8% -// on the data that buffer points to (partly because the checksum is often preceded by -// a copy_from_user()). This finding indiate that lfetch will not be beneficial since -// the data is already in the cache. -// - -#define saved_pfs r11 -#define hmask r16 -#define tmask r17 -#define first1 r18 -#define firstval r19 -#define firstoff r20 -#define last r21 -#define lastval r22 -#define lastoff r23 -#define saved_lc r24 -#define saved_pr r25 -#define tmp1 r26 -#define tmp2 r27 -#define tmp3 r28 -#define carry1 r29 -#define carry2 r30 -#define first2 r31 - -#define buf in0 -#define len in1 - -#define LOAD_LATENCY 2 // XXX fix me - -#if (LOAD_LATENCY != 1) && (LOAD_LATENCY != 2) -# error "Only 1 or 2 is supported/tested for LOAD_LATENCY." -#endif - -#define PIPE_DEPTH (LOAD_LATENCY+2) -#define ELD p[LOAD_LATENCY] // end of load -#define ELD_1 p[LOAD_LATENCY+1] // and next stage - -// unsigned long do_csum(unsigned char *buf,long len) - -GLOBAL_ENTRY(do_csum) - .prologue - .save ar.pfs, saved_pfs - alloc saved_pfs=ar.pfs,2,16,0,16 - .rotr word1[4], word2[4],result1[LOAD_LATENCY+2],result2[LOAD_LATENCY+2] - .rotp p[PIPE_DEPTH], pC1[2], pC2[2] - mov ret0=r0 // in case we have zero length - cmp.lt p0,p6=r0,len // check for zero length or negative (32bit len) - ;; - add tmp1=buf,len // last byte's address - .save pr, saved_pr - mov saved_pr=pr // preserve predicates (rotation) -(p6) br.ret.spnt.many rp // return if zero or negative length - - mov hmask=-1 // initialize head mask - tbit.nz p15,p0=buf,0 // is buf an odd address? - and first1=-8,buf // 8-byte align down address of first1 element - - and firstoff=7,buf // how many bytes off for first1 element - mov tmask=-1 // initialize tail mask - - ;; - adds tmp2=-1,tmp1 // last-1 - and lastoff=7,tmp1 // how many bytes off for last element - ;; - sub tmp1=8,lastoff // complement to lastoff - and last=-8,tmp2 // address of word containing last byte - ;; - sub tmp3=last,first1 // tmp3=distance from first1 to last - .save ar.lc, saved_lc - mov saved_lc=ar.lc // save lc - cmp.eq p8,p9=last,first1 // everything fits in one word ? - - ld8 firstval=[first1],8 // load, ahead of time, "first1" word - and tmp1=7, tmp1 // make sure that if tmp1==8 -> tmp1=0 - shl tmp2=firstoff,3 // number of bits - ;; -(p9) ld8 lastval=[last] // load, ahead of time, "last" word, if needed - shl tmp1=tmp1,3 // number of bits -(p9) adds tmp3=-8,tmp3 // effectively loaded - ;; -(p8) mov lastval=r0 // we don't need lastval if first1==last - shl hmask=hmask,tmp2 // build head mask, mask off [0,first1off[ - shr.u tmask=tmask,tmp1 // build tail mask, mask off ]8,lastoff] - ;; - .body -#define count tmp3 - -(p8) and hmask=hmask,tmask // apply tail mask to head mask if 1 word only -(p9) and word2[0]=lastval,tmask // mask last it as appropriate - shr.u count=count,3 // how many 8-byte? - ;; - // If count is odd, finish this 8-byte word so that we can - // load two back-to-back 8-byte words per loop thereafter. - and word1[0]=firstval,hmask // and mask it as appropriate - tbit.nz p10,p11=count,0 // if (count is odd) - ;; -(p8) mov result1[0]=word1[0] -(p9) add result1[0]=word1[0],word2[0] - ;; - cmp.ltu p6,p0=result1[0],word1[0] // check the carry - cmp.eq.or.andcm p8,p0=0,count // exit if zero 8-byte - ;; -(p6) adds result1[0]=1,result1[0] -(p8) br.cond.dptk .do_csum_exit // if (within an 8-byte word) -(p11) br.cond.dptk .do_csum16 // if (count is even) - - // Here count is odd. - ld8 word1[1]=[first1],8 // load an 8-byte word - cmp.eq p9,p10=1,count // if (count == 1) - adds count=-1,count // loaded an 8-byte word - ;; - add result1[0]=result1[0],word1[1] - ;; - cmp.ltu p6,p0=result1[0],word1[1] - ;; -(p6) adds result1[0]=1,result1[0] -(p9) br.cond.sptk .do_csum_exit // if (count == 1) exit - // Fall through to calculate the checksum, feeding result1[0] as - // the initial value in result1[0]. - // - // Calculate the checksum loading two 8-byte words per loop. - // -.do_csum16: - add first2=8,first1 - shr.u count=count,1 // we do 16 bytes per loop - ;; - adds count=-1,count - mov carry1=r0 - mov carry2=r0 - brp.loop.imp 1f,2f - ;; - mov ar.ec=PIPE_DEPTH - mov ar.lc=count // set lc - mov pr.rot=1<<16 - // result1[0] must be initialized in advance. - mov result2[0]=r0 - ;; - .align 32 -1: -(ELD_1) cmp.ltu pC1[0],p0=result1[LOAD_LATENCY],word1[LOAD_LATENCY+1] -(pC1[1])adds carry1=1,carry1 -(ELD_1) cmp.ltu pC2[0],p0=result2[LOAD_LATENCY],word2[LOAD_LATENCY+1] -(pC2[1])adds carry2=1,carry2 -(ELD) add result1[LOAD_LATENCY-1]=result1[LOAD_LATENCY],word1[LOAD_LATENCY] -(ELD) add result2[LOAD_LATENCY-1]=result2[LOAD_LATENCY],word2[LOAD_LATENCY] -2: -(p[0]) ld8 word1[0]=[first1],16 -(p[0]) ld8 word2[0]=[first2],16 - br.ctop.sptk 1b - ;; - // Since len is a 32-bit value, carry cannot be larger than a 64-bit value. -(pC1[1])adds carry1=1,carry1 // since we miss the last one -(pC2[1])adds carry2=1,carry2 - ;; - add result1[LOAD_LATENCY+1]=result1[LOAD_LATENCY+1],carry1 - add result2[LOAD_LATENCY+1]=result2[LOAD_LATENCY+1],carry2 - ;; - cmp.ltu p6,p0=result1[LOAD_LATENCY+1],carry1 - cmp.ltu p7,p0=result2[LOAD_LATENCY+1],carry2 - ;; -(p6) adds result1[LOAD_LATENCY+1]=1,result1[LOAD_LATENCY+1] -(p7) adds result2[LOAD_LATENCY+1]=1,result2[LOAD_LATENCY+1] - ;; - add result1[0]=result1[LOAD_LATENCY+1],result2[LOAD_LATENCY+1] - ;; - cmp.ltu p6,p0=result1[0],result2[LOAD_LATENCY+1] - ;; -(p6) adds result1[0]=1,result1[0] - ;; -.do_csum_exit: - // - // now fold 64 into 16 bits taking care of carry - // that's not very good because it has lots of sequentiality - // - mov tmp3=0xffff - zxt4 tmp1=result1[0] - shr.u tmp2=result1[0],32 - ;; - add result1[0]=tmp1,tmp2 - ;; - and tmp1=result1[0],tmp3 - shr.u tmp2=result1[0],16 - ;; - add result1[0]=tmp1,tmp2 - ;; - and tmp1=result1[0],tmp3 - shr.u tmp2=result1[0],16 - ;; - add result1[0]=tmp1,tmp2 - ;; - and tmp1=result1[0],tmp3 - shr.u tmp2=result1[0],16 - ;; - add ret0=tmp1,tmp2 - mov pr=saved_pr,0xffffffffffff0000 - ;; - // if buf was odd then swap bytes - mov ar.pfs=saved_pfs // restore ar.ec -(p15) mux1 ret0=ret0,@rev // reverse word - ;; - mov ar.lc=saved_lc -(p15) shr.u ret0=ret0,64-16 // + shift back to position = swap bytes - br.ret.sptk.many rp - -// I (Jun Nakajima) wrote an equivalent code (see below), but it was -// not much better than the original. So keep the original there so that -// someone else can challenge. -// -// shr.u word1[0]=result1[0],32 -// zxt4 result1[0]=result1[0] -// ;; -// add result1[0]=result1[0],word1[0] -// ;; -// zxt2 result2[0]=result1[0] -// extr.u word1[0]=result1[0],16,16 -// shr.u carry1=result1[0],32 -// ;; -// add result2[0]=result2[0],word1[0] -// ;; -// add result2[0]=result2[0],carry1 -// ;; -// extr.u ret0=result2[0],16,16 -// ;; -// add ret0=ret0,result2[0] -// ;; -// zxt2 ret0=ret0 -// mov ar.pfs=saved_pfs // restore ar.ec -// mov pr=saved_pr,0xffffffffffff0000 -// ;; -// // if buf was odd then swap bytes -// mov ar.lc=saved_lc -//(p15) mux1 ret0=ret0,@rev // reverse word -// ;; -//(p15) shr.u ret0=ret0,64-16 // + shift back to position = swap bytes -// br.ret.sptk.many rp - -END(do_csum) diff --git a/arch/ia64/lib/flush.S b/arch/ia64/lib/flush.S deleted file mode 100644 index f8e795fe45cb..000000000000 --- a/arch/ia64/lib/flush.S +++ /dev/null @@ -1,119 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Cache flushing routines. - * - * Copyright (C) 1999-2001, 2005 Hewlett-Packard Co - * David Mosberger-Tang - * - * 05/28/05 Zoltan Menyhart Dynamic stride size - */ - -#include -#include - - /* - * flush_icache_range(start,end) - * - * Make i-cache(s) coherent with d-caches. - * - * Must deal with range from start to end-1 but nothing else (need to - * be careful not to touch addresses that may be unmapped). - * - * Note: "in0" and "in1" are preserved for debugging purposes. - */ - .section .kprobes.text,"ax" -GLOBAL_ENTRY(flush_icache_range) - - .prologue - alloc r2=ar.pfs,2,0,0,0 - movl r3=ia64_i_cache_stride_shift - mov r21=1 - ;; - ld8 r20=[r3] // r20: stride shift - sub r22=in1,r0,1 // last byte address - ;; - shr.u r23=in0,r20 // start / (stride size) - shr.u r22=r22,r20 // (last byte address) / (stride size) - shl r21=r21,r20 // r21: stride size of the i-cache(s) - ;; - sub r8=r22,r23 // number of strides - 1 - shl r24=r23,r20 // r24: addresses for "fc.i" = - // "start" rounded down to stride boundary - .save ar.lc,r3 - mov r3=ar.lc // save ar.lc - ;; - - .body - mov ar.lc=r8 - ;; - /* - * 32 byte aligned loop, even number of (actually 2) bundles - */ -.Loop: fc.i r24 // issuable on M0 only - add r24=r21,r24 // we flush "stride size" bytes per iteration - nop.i 0 - br.cloop.sptk.few .Loop - ;; - sync.i - ;; - srlz.i - ;; - mov ar.lc=r3 // restore ar.lc - br.ret.sptk.many rp -END(flush_icache_range) -EXPORT_SYMBOL_GPL(flush_icache_range) - - /* - * clflush_cache_range(start,size) - * - * Flush cache lines from start to start+size-1. - * - * Must deal with range from start to start+size-1 but nothing else - * (need to be careful not to touch addresses that may be - * unmapped). - * - * Note: "in0" and "in1" are preserved for debugging purposes. - */ - .section .kprobes.text,"ax" -GLOBAL_ENTRY(clflush_cache_range) - - .prologue - alloc r2=ar.pfs,2,0,0,0 - movl r3=ia64_cache_stride_shift - mov r21=1 - add r22=in1,in0 - ;; - ld8 r20=[r3] // r20: stride shift - sub r22=r22,r0,1 // last byte address - ;; - shr.u r23=in0,r20 // start / (stride size) - shr.u r22=r22,r20 // (last byte address) / (stride size) - shl r21=r21,r20 // r21: stride size of the i-cache(s) - ;; - sub r8=r22,r23 // number of strides - 1 - shl r24=r23,r20 // r24: addresses for "fc" = - // "start" rounded down to stride - // boundary - .save ar.lc,r3 - mov r3=ar.lc // save ar.lc - ;; - - .body - mov ar.lc=r8 - ;; - /* - * 32 byte aligned loop, even number of (actually 2) bundles - */ -.Loop_fc: - fc r24 // issuable on M0 only - add r24=r21,r24 // we flush "stride size" bytes per iteration - nop.i 0 - br.cloop.sptk.few .Loop_fc - ;; - sync.i - ;; - srlz.i - ;; - mov ar.lc=r3 // restore ar.lc - br.ret.sptk.many rp -END(clflush_cache_range) diff --git a/arch/ia64/lib/idiv32.S b/arch/ia64/lib/idiv32.S deleted file mode 100644 index 83586fbc51ff..000000000000 --- a/arch/ia64/lib/idiv32.S +++ /dev/null @@ -1,86 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Copyright (C) 2000 Hewlett-Packard Co - * Copyright (C) 2000 David Mosberger-Tang - * - * 32-bit integer division. - * - * This code is based on the application note entitled "Divide, Square Root - * and Remainder Algorithms for the IA-64 Architecture". This document - * is available as Intel document number 248725-002 or via the web at - * http://developer.intel.com/software/opensource/numerics/ - * - * For more details on the theory behind these algorithms, see "IA-64 - * and Elementary Functions" by Peter Markstein; HP Professional Books - * (http://www.goodreads.com/book/show/2019887.Ia_64_and_Elementary_Functions) - */ - -#include -#include - -#ifdef MODULO -# define OP mod -#else -# define OP div -#endif - -#ifdef UNSIGNED -# define SGN u -# define EXTEND zxt4 -# define INT_TO_FP(a,b) fcvt.xuf.s1 a=b -# define FP_TO_INT(a,b) fcvt.fxu.trunc.s1 a=b -#else -# define SGN -# define EXTEND sxt4 -# define INT_TO_FP(a,b) fcvt.xf a=b -# define FP_TO_INT(a,b) fcvt.fx.trunc.s1 a=b -#endif - -#define PASTE1(a,b) a##b -#define PASTE(a,b) PASTE1(a,b) -#define NAME PASTE(PASTE(__,SGN),PASTE(OP,si3)) - -GLOBAL_ENTRY(NAME) - .regstk 2,0,0,0 - // Transfer inputs to FP registers. - mov r2 = 0xffdd // r2 = -34 + 65535 (fp reg format bias) - EXTEND in0 = in0 // in0 = a - EXTEND in1 = in1 // in1 = b - ;; - setf.sig f8 = in0 - setf.sig f9 = in1 -#ifdef MODULO - sub in1 = r0, in1 // in1 = -b -#endif - ;; - // Convert the inputs to FP, to avoid FP software-assist faults. - INT_TO_FP(f8, f8) - INT_TO_FP(f9, f9) - ;; - setf.exp f7 = r2 // f7 = 2^-34 - frcpa.s1 f6, p6 = f8, f9 // y0 = frcpa(b) - ;; -(p6) fmpy.s1 f8 = f8, f6 // q0 = a*y0 -(p6) fnma.s1 f6 = f9, f6, f1 // e0 = -b*y0 + 1 - ;; -#ifdef MODULO - setf.sig f9 = in1 // f9 = -b -#endif -(p6) fma.s1 f8 = f6, f8, f8 // q1 = e0*q0 + q0 -(p6) fma.s1 f6 = f6, f6, f7 // e1 = e0*e0 + 2^-34 - ;; -#ifdef MODULO - setf.sig f7 = in0 -#endif -(p6) fma.s1 f6 = f6, f8, f8 // q2 = e1*q1 + q1 - ;; - FP_TO_INT(f6, f6) // q = trunc(q2) - ;; -#ifdef MODULO - xma.l f6 = f6, f9, f7 // r = q*(-b) + a - ;; -#endif - getf.sig r8 = f6 // transfer result to result register - br.ret.sptk.many rp -END(NAME) -EXPORT_SYMBOL(NAME) diff --git a/arch/ia64/lib/idiv64.S b/arch/ia64/lib/idiv64.S deleted file mode 100644 index 5c9113691f72..000000000000 --- a/arch/ia64/lib/idiv64.S +++ /dev/null @@ -1,83 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Copyright (C) 1999-2000 Hewlett-Packard Co - * Copyright (C) 1999-2000 David Mosberger-Tang - * - * 64-bit integer division. - * - * This code is based on the application note entitled "Divide, Square Root - * and Remainder Algorithms for the IA-64 Architecture". This document - * is available as Intel document number 248725-002 or via the web at - * http://developer.intel.com/software/opensource/numerics/ - * - * For more details on the theory behind these algorithms, see "IA-64 - * and Elementary Functions" by Peter Markstein; HP Professional Books - * (http://www.goodreads.com/book/show/2019887.Ia_64_and_Elementary_Functions) - */ - -#include -#include - -#ifdef MODULO -# define OP mod -#else -# define OP div -#endif - -#ifdef UNSIGNED -# define SGN u -# define INT_TO_FP(a,b) fcvt.xuf.s1 a=b -# define FP_TO_INT(a,b) fcvt.fxu.trunc.s1 a=b -#else -# define SGN -# define INT_TO_FP(a,b) fcvt.xf a=b -# define FP_TO_INT(a,b) fcvt.fx.trunc.s1 a=b -#endif - -#define PASTE1(a,b) a##b -#define PASTE(a,b) PASTE1(a,b) -#define NAME PASTE(PASTE(__,SGN),PASTE(OP,di3)) - -GLOBAL_ENTRY(NAME) - .regstk 2,0,0,0 - // Transfer inputs to FP registers. - setf.sig f8 = in0 - setf.sig f9 = in1 - ;; - // Convert the inputs to FP, to avoid FP software-assist faults. - INT_TO_FP(f8, f8) - INT_TO_FP(f9, f9) - ;; - frcpa.s1 f11, p6 = f8, f9 // y0 = frcpa(b) - ;; -(p6) fmpy.s1 f7 = f8, f11 // q0 = a*y0 -(p6) fnma.s1 f6 = f9, f11, f1 // e0 = -b*y0 + 1 - ;; -(p6) fma.s1 f10 = f7, f6, f7 // q1 = q0*e0 + q0 -(p6) fmpy.s1 f7 = f6, f6 // e1 = e0*e0 - ;; -#ifdef MODULO - sub in1 = r0, in1 // in1 = -b -#endif -(p6) fma.s1 f10 = f10, f7, f10 // q2 = q1*e1 + q1 -(p6) fma.s1 f6 = f11, f6, f11 // y1 = y0*e0 + y0 - ;; -(p6) fma.s1 f6 = f6, f7, f6 // y2 = y1*e1 + y1 -(p6) fnma.s1 f7 = f9, f10, f8 // r = -b*q2 + a - ;; -#ifdef MODULO - setf.sig f8 = in0 // f8 = a - setf.sig f9 = in1 // f9 = -b -#endif -(p6) fma.s1 f11 = f7, f6, f10 // q3 = r*y2 + q2 - ;; - FP_TO_INT(f11, f11) // q = trunc(q3) - ;; -#ifdef MODULO - xma.l f11 = f11, f9, f8 // r = q*(-b) + a - ;; -#endif - getf.sig r8 = f11 // transfer result to result register - br.ret.sptk.many rp -END(NAME) -EXPORT_SYMBOL(NAME) diff --git a/arch/ia64/lib/io.c b/arch/ia64/lib/io.c deleted file mode 100644 index c3e02462ed16..000000000000 --- a/arch/ia64/lib/io.c +++ /dev/null @@ -1,51 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include -#include - -#include - -/* - * Copy data from IO memory space to "real" memory space. - * This needs to be optimized. - */ -void memcpy_fromio(void *to, const volatile void __iomem *from, long count) -{ - char *dst = to; - - while (count) { - count--; - *dst++ = readb(from++); - } -} -EXPORT_SYMBOL(memcpy_fromio); - -/* - * Copy data from "real" memory space to IO memory space. - * This needs to be optimized. - */ -void memcpy_toio(volatile void __iomem *to, const void *from, long count) -{ - const char *src = from; - - while (count) { - count--; - writeb(*src++, to++); - } -} -EXPORT_SYMBOL(memcpy_toio); - -/* - * "memset" on IO memory space. - * This needs to be optimized. - */ -void memset_io(volatile void __iomem *dst, int c, long count) -{ - unsigned char ch = (char)(c & 0xff); - - while (count) { - count--; - writeb(ch, dst); - dst++; - } -} -EXPORT_SYMBOL(memset_io); diff --git a/arch/ia64/lib/ip_fast_csum.S b/arch/ia64/lib/ip_fast_csum.S deleted file mode 100644 index fcc0b812ce2e..000000000000 --- a/arch/ia64/lib/ip_fast_csum.S +++ /dev/null @@ -1,148 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Optmized version of the ip_fast_csum() function - * Used for calculating IP header checksum - * - * Return: 16bit checksum, complemented - * - * Inputs: - * in0: address of buffer to checksum (char *) - * in1: length of the buffer (int) - * - * Copyright (C) 2002, 2006 Intel Corp. - * Copyright (C) 2002, 2006 Ken Chen - */ - -#include -#include - -/* - * Since we know that most likely this function is called with buf aligned - * on 4-byte boundary and 20 bytes in length, we can execution rather quickly - * versus calling generic version of do_csum, which has lots of overhead in - * handling various alignments and sizes. However, due to lack of constrains - * put on the function input argument, cases with alignment not on 4-byte or - * size not equal to 20 bytes will be handled by the generic do_csum function. - */ - -#define in0 r32 -#define in1 r33 -#define in2 r34 -#define in3 r35 -#define in4 r36 -#define ret0 r8 - -GLOBAL_ENTRY(ip_fast_csum) - .prologue - .body - cmp.ne p6,p7=5,in1 // size other than 20 byte? - and r14=3,in0 // is it aligned on 4-byte? - add r15=4,in0 // second source pointer - ;; - cmp.ne.or.andcm p6,p7=r14,r0 - ;; -(p7) ld4 r20=[in0],8 -(p7) ld4 r21=[r15],8 -(p6) br.spnt .generic - ;; - ld4 r22=[in0],8 - ld4 r23=[r15],8 - ;; - ld4 r24=[in0] - add r20=r20,r21 - add r22=r22,r23 - ;; - add r20=r20,r22 - ;; - add r20=r20,r24 - ;; - shr.u ret0=r20,16 // now need to add the carry - zxt2 r20=r20 - ;; - add r20=ret0,r20 - ;; - shr.u ret0=r20,16 // add carry again - zxt2 r20=r20 - ;; - add r20=ret0,r20 - ;; - shr.u ret0=r20,16 - zxt2 r20=r20 - ;; - add r20=ret0,r20 - mov r9=0xffff - ;; - andcm ret0=r9,r20 - .restore sp // reset frame state - br.ret.sptk.many b0 - ;; - -.generic: - .prologue - .save ar.pfs, r35 - alloc r35=ar.pfs,2,2,2,0 - .save rp, r34 - mov r34=b0 - .body - dep.z out1=in1,2,30 - mov out0=in0 - ;; - br.call.sptk.many b0=do_csum - ;; - andcm ret0=-1,ret0 - mov ar.pfs=r35 - mov b0=r34 - br.ret.sptk.many b0 -END(ip_fast_csum) -EXPORT_SYMBOL(ip_fast_csum) - -GLOBAL_ENTRY(csum_ipv6_magic) - ld4 r20=[in0],4 - ld4 r21=[in1],4 - zxt4 in2=in2 - ;; - ld4 r22=[in0],4 - ld4 r23=[in1],4 - dep r15=in3,in2,32,16 - ;; - ld4 r24=[in0],4 - ld4 r25=[in1],4 - mux1 r15=r15,@rev - add r16=r20,r21 - add r17=r22,r23 - zxt4 in4=in4 - ;; - ld4 r26=[in0],4 - ld4 r27=[in1],4 - shr.u r15=r15,16 - add r18=r24,r25 - add r8=r16,r17 - ;; - add r19=r26,r27 - add r8=r8,r18 - ;; - add r8=r8,r19 - add r15=r15,in4 - ;; - add r8=r8,r15 - ;; - shr.u r10=r8,32 // now fold sum into short - zxt4 r11=r8 - ;; - add r8=r10,r11 - ;; - shr.u r10=r8,16 // yeah, keep it rolling - zxt2 r11=r8 - ;; - add r8=r10,r11 - ;; - shr.u r10=r8,16 // three times lucky - zxt2 r11=r8 - ;; - add r8=r10,r11 - mov r9=0xffff - ;; - andcm r8=r9,r8 - br.ret.sptk.many b0 -END(csum_ipv6_magic) -EXPORT_SYMBOL(csum_ipv6_magic) diff --git a/arch/ia64/lib/memcpy.S b/arch/ia64/lib/memcpy.S deleted file mode 100644 index 35c9069a8345..000000000000 --- a/arch/ia64/lib/memcpy.S +++ /dev/null @@ -1,304 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * - * Optimized version of the standard memcpy() function - * - * Inputs: - * in0: destination address - * in1: source address - * in2: number of bytes to copy - * Output: - * no return value - * - * Copyright (C) 2000-2001 Hewlett-Packard Co - * Stephane Eranian - * David Mosberger-Tang - */ -#include -#include - -GLOBAL_ENTRY(memcpy) - -# define MEM_LAT 21 /* latency to memory */ - -# define dst r2 -# define src r3 -# define retval r8 -# define saved_pfs r9 -# define saved_lc r10 -# define saved_pr r11 -# define cnt r16 -# define src2 r17 -# define t0 r18 -# define t1 r19 -# define t2 r20 -# define t3 r21 -# define t4 r22 -# define src_end r23 - -# define N (MEM_LAT + 4) -# define Nrot ((N + 7) & ~7) - - /* - * First, check if everything (src, dst, len) is a multiple of eight. If - * so, we handle everything with no taken branches (other than the loop - * itself) and a small icache footprint. Otherwise, we jump off to - * the more general copy routine handling arbitrary - * sizes/alignment etc. - */ - .prologue - .save ar.pfs, saved_pfs - alloc saved_pfs=ar.pfs,3,Nrot,0,Nrot - .save ar.lc, saved_lc - mov saved_lc=ar.lc - or t0=in0,in1 - ;; - - or t0=t0,in2 - .save pr, saved_pr - mov saved_pr=pr - - .body - - cmp.eq p6,p0=in2,r0 // zero length? - mov retval=in0 // return dst -(p6) br.ret.spnt.many rp // zero length, return immediately - ;; - - mov dst=in0 // copy because of rotation - shr.u cnt=in2,3 // number of 8-byte words to copy - mov pr.rot=1<<16 - ;; - - adds cnt=-1,cnt // br.ctop is repeat/until - cmp.gtu p7,p0=16,in2 // copying less than 16 bytes? - mov ar.ec=N - ;; - - and t0=0x7,t0 - mov ar.lc=cnt - ;; - cmp.ne p6,p0=t0,r0 - - mov src=in1 // copy because of rotation -(p7) br.cond.spnt.few .memcpy_short -(p6) br.cond.spnt.few .memcpy_long - ;; - nop.m 0 - ;; - nop.m 0 - nop.i 0 - ;; - nop.m 0 - ;; - .rotr val[N] - .rotp p[N] - .align 32 -1: { .mib -(p[0]) ld8 val[0]=[src],8 - nop.i 0 - brp.loop.imp 1b, 2f -} -2: { .mfb -(p[N-1])st8 [dst]=val[N-1],8 - nop.f 0 - br.ctop.dptk.few 1b -} - ;; - mov ar.lc=saved_lc - mov pr=saved_pr,-1 - mov ar.pfs=saved_pfs - br.ret.sptk.many rp - - /* - * Small (<16 bytes) unaligned copying is done via a simple byte-at-the-time - * copy loop. This performs relatively poorly on Itanium, but it doesn't - * get used very often (gcc inlines small copies) and due to atomicity - * issues, we want to avoid read-modify-write of entire words. - */ - .align 32 -.memcpy_short: - adds cnt=-1,in2 // br.ctop is repeat/until - mov ar.ec=MEM_LAT - brp.loop.imp 1f, 2f - ;; - mov ar.lc=cnt - ;; - nop.m 0 - ;; - nop.m 0 - nop.i 0 - ;; - nop.m 0 - ;; - nop.m 0 - ;; - /* - * It is faster to put a stop bit in the loop here because it makes - * the pipeline shorter (and latency is what matters on short copies). - */ - .align 32 -1: { .mib -(p[0]) ld1 val[0]=[src],1 - nop.i 0 - brp.loop.imp 1b, 2f -} ;; -2: { .mfb -(p[MEM_LAT-1])st1 [dst]=val[MEM_LAT-1],1 - nop.f 0 - br.ctop.dptk.few 1b -} ;; - mov ar.lc=saved_lc - mov pr=saved_pr,-1 - mov ar.pfs=saved_pfs - br.ret.sptk.many rp - - /* - * Large (>= 16 bytes) copying is done in a fancy way. Latency isn't - * an overriding concern here, but throughput is. We first do - * sub-word copying until the destination is aligned, then we check - * if the source is also aligned. If so, we do a simple load/store-loop - * until there are less than 8 bytes left over and then we do the tail, - * by storing the last few bytes using sub-word copying. If the source - * is not aligned, we branch off to the non-congruent loop. - * - * stage: op: - * 0 ld - * : - * MEM_LAT+3 shrp - * MEM_LAT+4 st - * - * On Itanium, the pipeline itself runs without stalls. However, br.ctop - * seems to introduce an unavoidable bubble in the pipeline so the overall - * latency is 2 cycles/iteration. This gives us a _copy_ throughput - * of 4 byte/cycle. Still not bad. - */ -# undef N -# undef Nrot -# define N (MEM_LAT + 5) /* number of stages */ -# define Nrot ((N+1 + 2 + 7) & ~7) /* number of rotating regs */ - -#define LOG_LOOP_SIZE 6 - -.memcpy_long: - alloc t3=ar.pfs,3,Nrot,0,Nrot // resize register frame - and t0=-8,src // t0 = src & ~7 - and t2=7,src // t2 = src & 7 - ;; - ld8 t0=[t0] // t0 = 1st source word - adds src2=7,src // src2 = (src + 7) - sub t4=r0,dst // t4 = -dst - ;; - and src2=-8,src2 // src2 = (src + 7) & ~7 - shl t2=t2,3 // t2 = 8*(src & 7) - shl t4=t4,3 // t4 = 8*(dst & 7) - ;; - ld8 t1=[src2] // t1 = 1st source word if src is 8-byte aligned, 2nd otherwise - sub t3=64,t2 // t3 = 64-8*(src & 7) - shr.u t0=t0,t2 - ;; - add src_end=src,in2 - shl t1=t1,t3 - mov pr=t4,0x38 // (p5,p4,p3)=(dst & 7) - ;; - or t0=t0,t1 - mov cnt=r0 - adds src_end=-1,src_end - ;; -(p3) st1 [dst]=t0,1 -(p3) shr.u t0=t0,8 -(p3) adds cnt=1,cnt - ;; -(p4) st2 [dst]=t0,2 -(p4) shr.u t0=t0,16 -(p4) adds cnt=2,cnt - ;; -(p5) st4 [dst]=t0,4 -(p5) adds cnt=4,cnt - and src_end=-8,src_end // src_end = last word of source buffer - ;; - - // At this point, dst is aligned to 8 bytes and there at least 16-7=9 bytes left to copy: - -1:{ add src=cnt,src // make src point to remainder of source buffer - sub cnt=in2,cnt // cnt = number of bytes left to copy - mov t4=ip - } ;; - and src2=-8,src // align source pointer - adds t4=.memcpy_loops-1b,t4 - mov ar.ec=N - - and t0=7,src // t0 = src & 7 - shr.u t2=cnt,3 // t2 = number of 8-byte words left to copy - shl cnt=cnt,3 // move bits 0-2 to 3-5 - ;; - - .rotr val[N+1], w[2] - .rotp p[N] - - cmp.ne p6,p0=t0,r0 // is src aligned, too? - shl t0=t0,LOG_LOOP_SIZE // t0 = 8*(src & 7) - adds t2=-1,t2 // br.ctop is repeat/until - ;; - add t4=t0,t4 - mov pr=cnt,0x38 // set (p5,p4,p3) to # of bytes last-word bytes to copy - mov ar.lc=t2 - ;; - nop.m 0 - ;; - nop.m 0 - nop.i 0 - ;; - nop.m 0 - ;; -(p6) ld8 val[1]=[src2],8 // prime the pump... - mov b6=t4 - br.sptk.few b6 - ;; - -.memcpy_tail: - // At this point, (p5,p4,p3) are set to the number of bytes left to copy (which is - // less than 8) and t0 contains the last few bytes of the src buffer: -(p5) st4 [dst]=t0,4 -(p5) shr.u t0=t0,32 - mov ar.lc=saved_lc - ;; -(p4) st2 [dst]=t0,2 -(p4) shr.u t0=t0,16 - mov ar.pfs=saved_pfs - ;; -(p3) st1 [dst]=t0 - mov pr=saved_pr,-1 - br.ret.sptk.many rp - -/////////////////////////////////////////////////////// - .align 64 - -#define COPY(shift,index) \ - 1: { .mib \ - (p[0]) ld8 val[0]=[src2],8; \ - (p[MEM_LAT+3]) shrp w[0]=val[MEM_LAT+3],val[MEM_LAT+4-index],shift; \ - brp.loop.imp 1b, 2f \ - }; \ - 2: { .mfb \ - (p[MEM_LAT+4]) st8 [dst]=w[1],8; \ - nop.f 0; \ - br.ctop.dptk.few 1b; \ - }; \ - ;; \ - ld8 val[N-1]=[src_end]; /* load last word (may be same as val[N]) */ \ - ;; \ - shrp t0=val[N-1],val[N-index],shift; \ - br .memcpy_tail -.memcpy_loops: - COPY(0, 1) /* no point special casing this---it doesn't go any faster without shrp */ - COPY(8, 0) - COPY(16, 0) - COPY(24, 0) - COPY(32, 0) - COPY(40, 0) - COPY(48, 0) - COPY(56, 0) - -END(memcpy) -EXPORT_SYMBOL(memcpy) diff --git a/arch/ia64/lib/memcpy_mck.S b/arch/ia64/lib/memcpy_mck.S deleted file mode 100644 index c0d4362217ae..000000000000 --- a/arch/ia64/lib/memcpy_mck.S +++ /dev/null @@ -1,659 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Itanium 2-optimized version of memcpy and copy_user function - * - * Inputs: - * in0: destination address - * in1: source address - * in2: number of bytes to copy - * Output: - * for memcpy: return dest - * for copy_user: return 0 if success, - * or number of byte NOT copied if error occurred. - * - * Copyright (C) 2002 Intel Corp. - * Copyright (C) 2002 Ken Chen - */ -#include -#include -#include - -#define EK(y...) EX(y) - -/* McKinley specific optimization */ - -#define retval r8 -#define saved_pfs r31 -#define saved_lc r10 -#define saved_pr r11 -#define saved_in0 r14 -#define saved_in1 r15 -#define saved_in2 r16 - -#define src0 r2 -#define src1 r3 -#define dst0 r17 -#define dst1 r18 -#define cnt r9 - -/* r19-r30 are temp for each code section */ -#define PREFETCH_DIST 8 -#define src_pre_mem r19 -#define dst_pre_mem r20 -#define src_pre_l2 r21 -#define dst_pre_l2 r22 -#define t1 r23 -#define t2 r24 -#define t3 r25 -#define t4 r26 -#define t5 t1 // alias! -#define t6 t2 // alias! -#define t7 t3 // alias! -#define n8 r27 -#define t9 t5 // alias! -#define t10 t4 // alias! -#define t11 t7 // alias! -#define t12 t6 // alias! -#define t14 t10 // alias! -#define t13 r28 -#define t15 r29 -#define tmp r30 - -/* defines for long_copy block */ -#define A 0 -#define B (PREFETCH_DIST) -#define C (B + PREFETCH_DIST) -#define D (C + 1) -#define N (D + 1) -#define Nrot ((N + 7) & ~7) - -/* alias */ -#define in0 r32 -#define in1 r33 -#define in2 r34 - -GLOBAL_ENTRY(memcpy) - and r28=0x7,in0 - and r29=0x7,in1 - mov f6=f0 - mov retval=in0 - br.cond.sptk .common_code - ;; -END(memcpy) -EXPORT_SYMBOL(memcpy) -GLOBAL_ENTRY(__copy_user) - .prologue -// check dest alignment - and r28=0x7,in0 - and r29=0x7,in1 - mov f6=f1 - mov saved_in0=in0 // save dest pointer - mov saved_in1=in1 // save src pointer - mov retval=r0 // initialize return value - ;; -.common_code: - cmp.gt p15,p0=8,in2 // check for small size - cmp.ne p13,p0=0,r28 // check dest alignment - cmp.ne p14,p0=0,r29 // check src alignment - add src0=0,in1 - sub r30=8,r28 // for .align_dest - mov saved_in2=in2 // save len - ;; - add dst0=0,in0 - add dst1=1,in0 // dest odd index - cmp.le p6,p0 = 1,r30 // for .align_dest -(p15) br.cond.dpnt .memcpy_short -(p13) br.cond.dpnt .align_dest -(p14) br.cond.dpnt .unaligned_src - ;; - -// both dest and src are aligned on 8-byte boundary -.aligned_src: - .save ar.pfs, saved_pfs - alloc saved_pfs=ar.pfs,3,Nrot-3,0,Nrot - .save pr, saved_pr - mov saved_pr=pr - - shr.u cnt=in2,7 // this much cache line - ;; - cmp.lt p6,p0=2*PREFETCH_DIST,cnt - cmp.lt p7,p8=1,cnt - .save ar.lc, saved_lc - mov saved_lc=ar.lc - .body - add cnt=-1,cnt - add src_pre_mem=0,in1 // prefetch src pointer - add dst_pre_mem=0,in0 // prefetch dest pointer - ;; -(p7) mov ar.lc=cnt // prefetch count -(p8) mov ar.lc=r0 -(p6) br.cond.dpnt .long_copy - ;; - -.prefetch: - lfetch.fault [src_pre_mem], 128 - lfetch.fault.excl [dst_pre_mem], 128 - br.cloop.dptk.few .prefetch - ;; - -.medium_copy: - and tmp=31,in2 // copy length after iteration - shr.u r29=in2,5 // number of 32-byte iteration - add dst1=8,dst0 // 2nd dest pointer - ;; - add cnt=-1,r29 // ctop iteration adjustment - cmp.eq p10,p0=r29,r0 // do we really need to loop? - add src1=8,src0 // 2nd src pointer - cmp.le p6,p0=8,tmp - ;; - cmp.le p7,p0=16,tmp - mov ar.lc=cnt // loop setup - cmp.eq p16,p17 = r0,r0 - mov ar.ec=2 -(p10) br.dpnt.few .aligned_src_tail - ;; - TEXT_ALIGN(32) -1: -EX(.ex_handler, (p16) ld8 r34=[src0],16) -EK(.ex_handler, (p16) ld8 r38=[src1],16) -EX(.ex_handler, (p17) st8 [dst0]=r33,16) -EK(.ex_handler, (p17) st8 [dst1]=r37,16) - ;; -EX(.ex_handler, (p16) ld8 r32=[src0],16) -EK(.ex_handler, (p16) ld8 r36=[src1],16) -EX(.ex_handler, (p16) st8 [dst0]=r34,16) -EK(.ex_handler, (p16) st8 [dst1]=r38,16) - br.ctop.dptk.few 1b - ;; - -.aligned_src_tail: -EX(.ex_handler, (p6) ld8 t1=[src0]) - mov ar.lc=saved_lc - mov ar.pfs=saved_pfs -EX(.ex_hndlr_s, (p7) ld8 t2=[src1],8) - cmp.le p8,p0=24,tmp - and r21=-8,tmp - ;; -EX(.ex_hndlr_s, (p8) ld8 t3=[src1]) -EX(.ex_handler, (p6) st8 [dst0]=t1) // store byte 1 - and in2=7,tmp // remaining length -EX(.ex_hndlr_d, (p7) st8 [dst1]=t2,8) // store byte 2 - add src0=src0,r21 // setting up src pointer - add dst0=dst0,r21 // setting up dest pointer - ;; -EX(.ex_handler, (p8) st8 [dst1]=t3) // store byte 3 - mov pr=saved_pr,-1 - br.dptk.many .memcpy_short - ;; - -/* code taken from copy_page_mck */ -.long_copy: - .rotr v[2*PREFETCH_DIST] - .rotp p[N] - - mov src_pre_mem = src0 - mov pr.rot = 0x10000 - mov ar.ec = 1 // special unrolled loop - - mov dst_pre_mem = dst0 - - add src_pre_l2 = 8*8, src0 - add dst_pre_l2 = 8*8, dst0 - ;; - add src0 = 8, src_pre_mem // first t1 src - mov ar.lc = 2*PREFETCH_DIST - 1 - shr.u cnt=in2,7 // number of lines - add src1 = 3*8, src_pre_mem // first t3 src - add dst0 = 8, dst_pre_mem // first t1 dst - add dst1 = 3*8, dst_pre_mem // first t3 dst - ;; - and tmp=127,in2 // remaining bytes after this block - add cnt = -(2*PREFETCH_DIST) - 1, cnt - // same as .line_copy loop, but with all predicated-off instructions removed: -.prefetch_loop: -EX(.ex_hndlr_lcpy_1, (p[A]) ld8 v[A] = [src_pre_mem], 128) // M0 -EK(.ex_hndlr_lcpy_1, (p[B]) st8 [dst_pre_mem] = v[B], 128) // M2 - br.ctop.sptk .prefetch_loop - ;; - cmp.eq p16, p0 = r0, r0 // reset p16 to 1 - mov ar.lc = cnt - mov ar.ec = N // # of stages in pipeline - ;; -.line_copy: -EX(.ex_handler, (p[D]) ld8 t2 = [src0], 3*8) // M0 -EK(.ex_handler, (p[D]) ld8 t4 = [src1], 3*8) // M1 -EX(.ex_handler_lcpy, (p[B]) st8 [dst_pre_mem] = v[B], 128) // M2 prefetch dst from memory -EK(.ex_handler_lcpy, (p[D]) st8 [dst_pre_l2] = n8, 128) // M3 prefetch dst from L2 - ;; -EX(.ex_handler_lcpy, (p[A]) ld8 v[A] = [src_pre_mem], 128) // M0 prefetch src from memory -EK(.ex_handler_lcpy, (p[C]) ld8 n8 = [src_pre_l2], 128) // M1 prefetch src from L2 -EX(.ex_handler, (p[D]) st8 [dst0] = t1, 8) // M2 -EK(.ex_handler, (p[D]) st8 [dst1] = t3, 8) // M3 - ;; -EX(.ex_handler, (p[D]) ld8 t5 = [src0], 8) -EK(.ex_handler, (p[D]) ld8 t7 = [src1], 3*8) -EX(.ex_handler, (p[D]) st8 [dst0] = t2, 3*8) -EK(.ex_handler, (p[D]) st8 [dst1] = t4, 3*8) - ;; -EX(.ex_handler, (p[D]) ld8 t6 = [src0], 3*8) -EK(.ex_handler, (p[D]) ld8 t10 = [src1], 8) -EX(.ex_handler, (p[D]) st8 [dst0] = t5, 8) -EK(.ex_handler, (p[D]) st8 [dst1] = t7, 3*8) - ;; -EX(.ex_handler, (p[D]) ld8 t9 = [src0], 3*8) -EK(.ex_handler, (p[D]) ld8 t11 = [src1], 3*8) -EX(.ex_handler, (p[D]) st8 [dst0] = t6, 3*8) -EK(.ex_handler, (p[D]) st8 [dst1] = t10, 8) - ;; -EX(.ex_handler, (p[D]) ld8 t12 = [src0], 8) -EK(.ex_handler, (p[D]) ld8 t14 = [src1], 8) -EX(.ex_handler, (p[D]) st8 [dst0] = t9, 3*8) -EK(.ex_handler, (p[D]) st8 [dst1] = t11, 3*8) - ;; -EX(.ex_handler, (p[D]) ld8 t13 = [src0], 4*8) -EK(.ex_handler, (p[D]) ld8 t15 = [src1], 4*8) -EX(.ex_handler, (p[D]) st8 [dst0] = t12, 8) -EK(.ex_handler, (p[D]) st8 [dst1] = t14, 8) - ;; -EX(.ex_handler, (p[C]) ld8 t1 = [src0], 8) -EK(.ex_handler, (p[C]) ld8 t3 = [src1], 8) -EX(.ex_handler, (p[D]) st8 [dst0] = t13, 4*8) -EK(.ex_handler, (p[D]) st8 [dst1] = t15, 4*8) - br.ctop.sptk .line_copy - ;; - - add dst0=-8,dst0 - add src0=-8,src0 - mov in2=tmp - .restore sp - br.sptk.many .medium_copy - ;; - -#define BLOCK_SIZE 128*32 -#define blocksize r23 -#define curlen r24 - -// dest is on 8-byte boundary, src is not. We need to do -// ld8-ld8, shrp, then st8. Max 8 byte copy per cycle. -.unaligned_src: - .prologue - .save ar.pfs, saved_pfs - alloc saved_pfs=ar.pfs,3,5,0,8 - .save ar.lc, saved_lc - mov saved_lc=ar.lc - .save pr, saved_pr - mov saved_pr=pr - .body -.4k_block: - mov saved_in0=dst0 // need to save all input arguments - mov saved_in2=in2 - mov blocksize=BLOCK_SIZE - ;; - cmp.lt p6,p7=blocksize,in2 - mov saved_in1=src0 - ;; -(p6) mov in2=blocksize - ;; - shr.u r21=in2,7 // this much cache line - shr.u r22=in2,4 // number of 16-byte iteration - and curlen=15,in2 // copy length after iteration - and r30=7,src0 // source alignment - ;; - cmp.lt p7,p8=1,r21 - add cnt=-1,r21 - ;; - - add src_pre_mem=0,src0 // prefetch src pointer - add dst_pre_mem=0,dst0 // prefetch dest pointer - and src0=-8,src0 // 1st src pointer -(p7) mov ar.lc = cnt -(p8) mov ar.lc = r0 - ;; - TEXT_ALIGN(32) -1: lfetch.fault [src_pre_mem], 128 - lfetch.fault.excl [dst_pre_mem], 128 - br.cloop.dptk.few 1b - ;; - - shladd dst1=r22,3,dst0 // 2nd dest pointer - shladd src1=r22,3,src0 // 2nd src pointer - cmp.eq p8,p9=r22,r0 // do we really need to loop? - cmp.le p6,p7=8,curlen; // have at least 8 byte remaining? - add cnt=-1,r22 // ctop iteration adjustment - ;; -EX(.ex_handler, (p9) ld8 r33=[src0],8) // loop primer -EK(.ex_handler, (p9) ld8 r37=[src1],8) -(p8) br.dpnt.few .noloop - ;; - -// The jump address is calculated based on src alignment. The COPYU -// macro below need to confine its size to power of two, so an entry -// can be caulated using shl instead of an expensive multiply. The -// size is then hard coded by the following #define to match the -// actual size. This make it somewhat tedious when COPYU macro gets -// changed and this need to be adjusted to match. -#define LOOP_SIZE 6 -1: - mov r29=ip // jmp_table thread - mov ar.lc=cnt - ;; - add r29=.jump_table - 1b - (.jmp1-.jump_table), r29 - shl r28=r30, LOOP_SIZE // jmp_table thread - mov ar.ec=2 // loop setup - ;; - add r29=r29,r28 // jmp_table thread - cmp.eq p16,p17=r0,r0 - ;; - mov b6=r29 // jmp_table thread - ;; - br.cond.sptk.few b6 - -// for 8-15 byte case -// We will skip the loop, but need to replicate the side effect -// that the loop produces. -.noloop: -EX(.ex_handler, (p6) ld8 r37=[src1],8) - add src0=8,src0 -(p6) shl r25=r30,3 - ;; -EX(.ex_handler, (p6) ld8 r27=[src1]) -(p6) shr.u r28=r37,r25 -(p6) sub r26=64,r25 - ;; -(p6) shl r27=r27,r26 - ;; -(p6) or r21=r28,r27 - -.unaligned_src_tail: -/* check if we have more than blocksize to copy, if so go back */ - cmp.gt p8,p0=saved_in2,blocksize - ;; -(p8) add dst0=saved_in0,blocksize -(p8) add src0=saved_in1,blocksize -(p8) sub in2=saved_in2,blocksize -(p8) br.dpnt .4k_block - ;; - -/* we have up to 15 byte to copy in the tail. - * part of work is already done in the jump table code - * we are at the following state. - * src side: - * - * xxxxxx xx <----- r21 has xxxxxxxx already - * -------- -------- -------- - * 0 8 16 - * ^ - * | - * src1 - * - * dst - * -------- -------- -------- - * ^ - * | - * dst1 - */ -EX(.ex_handler, (p6) st8 [dst1]=r21,8) // more than 8 byte to copy -(p6) add curlen=-8,curlen // update length - mov ar.pfs=saved_pfs - ;; - mov ar.lc=saved_lc - mov pr=saved_pr,-1 - mov in2=curlen // remaining length - mov dst0=dst1 // dest pointer - add src0=src1,r30 // forward by src alignment - ;; - -// 7 byte or smaller. -.memcpy_short: - cmp.le p8,p9 = 1,in2 - cmp.le p10,p11 = 2,in2 - cmp.le p12,p13 = 3,in2 - cmp.le p14,p15 = 4,in2 - add src1=1,src0 // second src pointer - add dst1=1,dst0 // second dest pointer - ;; - -EX(.ex_handler_short, (p8) ld1 t1=[src0],2) -EK(.ex_handler_short, (p10) ld1 t2=[src1],2) -(p9) br.ret.dpnt rp // 0 byte copy - ;; - -EX(.ex_handler_short, (p8) st1 [dst0]=t1,2) -EK(.ex_handler_short, (p10) st1 [dst1]=t2,2) -(p11) br.ret.dpnt rp // 1 byte copy - -EX(.ex_handler_short, (p12) ld1 t3=[src0],2) -EK(.ex_handler_short, (p14) ld1 t4=[src1],2) -(p13) br.ret.dpnt rp // 2 byte copy - ;; - - cmp.le p6,p7 = 5,in2 - cmp.le p8,p9 = 6,in2 - cmp.le p10,p11 = 7,in2 - -EX(.ex_handler_short, (p12) st1 [dst0]=t3,2) -EK(.ex_handler_short, (p14) st1 [dst1]=t4,2) -(p15) br.ret.dpnt rp // 3 byte copy - ;; - -EX(.ex_handler_short, (p6) ld1 t5=[src0],2) -EK(.ex_handler_short, (p8) ld1 t6=[src1],2) -(p7) br.ret.dpnt rp // 4 byte copy - ;; - -EX(.ex_handler_short, (p6) st1 [dst0]=t5,2) -EK(.ex_handler_short, (p8) st1 [dst1]=t6,2) -(p9) br.ret.dptk rp // 5 byte copy - -EX(.ex_handler_short, (p10) ld1 t7=[src0],2) -(p11) br.ret.dptk rp // 6 byte copy - ;; - -EX(.ex_handler_short, (p10) st1 [dst0]=t7,2) - br.ret.dptk rp // done all cases - - -/* Align dest to nearest 8-byte boundary. We know we have at - * least 7 bytes to copy, enough to crawl to 8-byte boundary. - * Actual number of byte to crawl depend on the dest alignment. - * 7 byte or less is taken care at .memcpy_short - - * src0 - source even index - * src1 - source odd index - * dst0 - dest even index - * dst1 - dest odd index - * r30 - distance to 8-byte boundary - */ - -.align_dest: - add src1=1,in1 // source odd index - cmp.le p7,p0 = 2,r30 // for .align_dest - cmp.le p8,p0 = 3,r30 // for .align_dest -EX(.ex_handler_short, (p6) ld1 t1=[src0],2) - cmp.le p9,p0 = 4,r30 // for .align_dest - cmp.le p10,p0 = 5,r30 - ;; -EX(.ex_handler_short, (p7) ld1 t2=[src1],2) -EK(.ex_handler_short, (p8) ld1 t3=[src0],2) - cmp.le p11,p0 = 6,r30 -EX(.ex_handler_short, (p6) st1 [dst0] = t1,2) - cmp.le p12,p0 = 7,r30 - ;; -EX(.ex_handler_short, (p9) ld1 t4=[src1],2) -EK(.ex_handler_short, (p10) ld1 t5=[src0],2) -EX(.ex_handler_short, (p7) st1 [dst1] = t2,2) -EK(.ex_handler_short, (p8) st1 [dst0] = t3,2) - ;; -EX(.ex_handler_short, (p11) ld1 t6=[src1],2) -EK(.ex_handler_short, (p12) ld1 t7=[src0],2) - cmp.eq p6,p7=r28,r29 -EX(.ex_handler_short, (p9) st1 [dst1] = t4,2) -EK(.ex_handler_short, (p10) st1 [dst0] = t5,2) - sub in2=in2,r30 - ;; -EX(.ex_handler_short, (p11) st1 [dst1] = t6,2) -EK(.ex_handler_short, (p12) st1 [dst0] = t7) - add dst0=in0,r30 // setup arguments - add src0=in1,r30 -(p6) br.cond.dptk .aligned_src -(p7) br.cond.dpnt .unaligned_src - ;; - -/* main loop body in jump table format */ -#define COPYU(shift) \ -1: \ -EX(.ex_handler, (p16) ld8 r32=[src0],8); /* 1 */ \ -EK(.ex_handler, (p16) ld8 r36=[src1],8); \ - (p17) shrp r35=r33,r34,shift;; /* 1 */ \ -EX(.ex_handler, (p6) ld8 r22=[src1]); /* common, prime for tail section */ \ - nop.m 0; \ - (p16) shrp r38=r36,r37,shift; \ -EX(.ex_handler, (p17) st8 [dst0]=r35,8); /* 1 */ \ -EK(.ex_handler, (p17) st8 [dst1]=r39,8); \ - br.ctop.dptk.few 1b;; \ - (p7) add src1=-8,src1; /* back out for <8 byte case */ \ - shrp r21=r22,r38,shift; /* speculative work */ \ - br.sptk.few .unaligned_src_tail /* branch out of jump table */ \ - ;; - TEXT_ALIGN(32) -.jump_table: - COPYU(8) // unaligned cases -.jmp1: - COPYU(16) - COPYU(24) - COPYU(32) - COPYU(40) - COPYU(48) - COPYU(56) - -#undef A -#undef B -#undef C -#undef D - -/* - * Due to lack of local tag support in gcc 2.x assembler, it is not clear which - * instruction failed in the bundle. The exception algorithm is that we - * first figure out the faulting address, then detect if there is any - * progress made on the copy, if so, redo the copy from last known copied - * location up to the faulting address (exclusive). In the copy_from_user - * case, remaining byte in kernel buffer will be zeroed. - * - * Take copy_from_user as an example, in the code there are multiple loads - * in a bundle and those multiple loads could span over two pages, the - * faulting address is calculated as page_round_down(max(src0, src1)). - * This is based on knowledge that if we can access one byte in a page, we - * can access any byte in that page. - * - * predicate used in the exception handler: - * p6-p7: direction - * p10-p11: src faulting addr calculation - * p12-p13: dst faulting addr calculation - */ - -#define A r19 -#define B r20 -#define C r21 -#define D r22 -#define F r28 - -#define saved_retval loc0 -#define saved_rtlink loc1 -#define saved_pfs_stack loc2 - -.ex_hndlr_s: - add src0=8,src0 - br.sptk .ex_handler - ;; -.ex_hndlr_d: - add dst0=8,dst0 - br.sptk .ex_handler - ;; -.ex_hndlr_lcpy_1: - mov src1=src_pre_mem - mov dst1=dst_pre_mem - cmp.gtu p10,p11=src_pre_mem,saved_in1 - cmp.gtu p12,p13=dst_pre_mem,saved_in0 - ;; -(p10) add src0=8,saved_in1 -(p11) mov src0=saved_in1 -(p12) add dst0=8,saved_in0 -(p13) mov dst0=saved_in0 - br.sptk .ex_handler -.ex_handler_lcpy: - // in line_copy block, the preload addresses should always ahead - // of the other two src/dst pointers. Furthermore, src1/dst1 should - // always ahead of src0/dst0. - mov src1=src_pre_mem - mov dst1=dst_pre_mem -.ex_handler: - mov pr=saved_pr,-1 // first restore pr, lc, and pfs - mov ar.lc=saved_lc - mov ar.pfs=saved_pfs - ;; -.ex_handler_short: // fault occurred in these sections didn't change pr, lc, pfs - cmp.ltu p6,p7=saved_in0, saved_in1 // get the copy direction - cmp.ltu p10,p11=src0,src1 - cmp.ltu p12,p13=dst0,dst1 - fcmp.eq p8,p0=f6,f0 // is it memcpy? - mov tmp = dst0 - ;; -(p11) mov src1 = src0 // pick the larger of the two -(p13) mov dst0 = dst1 // make dst0 the smaller one -(p13) mov dst1 = tmp // and dst1 the larger one - ;; -(p6) dep F = r0,dst1,0,PAGE_SHIFT // usr dst round down to page boundary -(p7) dep F = r0,src1,0,PAGE_SHIFT // usr src round down to page boundary - ;; -(p6) cmp.le p14,p0=dst0,saved_in0 // no progress has been made on store -(p7) cmp.le p14,p0=src0,saved_in1 // no progress has been made on load - mov retval=saved_in2 -(p8) ld1 tmp=[src1] // force an oops for memcpy call -(p8) st1 [dst1]=r0 // force an oops for memcpy call -(p14) br.ret.sptk.many rp - -/* - * The remaining byte to copy is calculated as: - * - * A = (faulting_addr - orig_src) -> len to faulting ld address - * or - * (faulting_addr - orig_dst) -> len to faulting st address - * B = (cur_dst - orig_dst) -> len copied so far - * C = A - B -> len need to be copied - * D = orig_len - A -> len need to be left along - */ -(p6) sub A = F, saved_in0 -(p7) sub A = F, saved_in1 - clrrrb - ;; - alloc saved_pfs_stack=ar.pfs,3,3,3,0 - cmp.lt p8,p0=A,r0 - sub B = dst0, saved_in0 // how many byte copied so far - ;; -(p8) mov A = 0; // A shouldn't be negative, cap it - ;; - sub C = A, B - sub D = saved_in2, A - ;; - cmp.gt p8,p0=C,r0 // more than 1 byte? - mov r8=0 - mov saved_retval = D - mov saved_rtlink = b0 - - add out0=saved_in0, B - add out1=saved_in1, B - mov out2=C -(p8) br.call.sptk.few b0=__copy_user // recursive call - ;; - - add saved_retval=saved_retval,r8 // above might return non-zero value - ;; - - mov retval=saved_retval - mov ar.pfs=saved_pfs_stack - mov b0=saved_rtlink - br.ret.sptk.many rp - -/* end of McKinley specific optimization */ -END(__copy_user) -EXPORT_SYMBOL(__copy_user) diff --git a/arch/ia64/lib/memset.S b/arch/ia64/lib/memset.S deleted file mode 100644 index 552c5c7e4d06..000000000000 --- a/arch/ia64/lib/memset.S +++ /dev/null @@ -1,365 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* Optimized version of the standard memset() function. - - Copyright (c) 2002 Hewlett-Packard Co/CERN - Sverre Jarp - - Return: dest - - Inputs: - in0: dest - in1: value - in2: count - - The algorithm is fairly straightforward: set byte by byte until we - we get to a 16B-aligned address, then loop on 128 B chunks using an - early store as prefetching, then loop on 32B chucks, then clear remaining - words, finally clear remaining bytes. - Since a stf.spill f0 can store 16B in one go, we use this instruction - to get peak speed when value = 0. */ - -#include -#include -#undef ret - -#define dest in0 -#define value in1 -#define cnt in2 - -#define tmp r31 -#define save_lc r30 -#define ptr0 r29 -#define ptr1 r28 -#define ptr2 r27 -#define ptr3 r26 -#define ptr9 r24 -#define loopcnt r23 -#define linecnt r22 -#define bytecnt r21 - -#define fvalue f6 - -// This routine uses only scratch predicate registers (p6 - p15) -#define p_scr p6 // default register for same-cycle branches -#define p_nz p7 -#define p_zr p8 -#define p_unalgn p9 -#define p_y p11 -#define p_n p12 -#define p_yy p13 -#define p_nn p14 - -#define MIN1 15 -#define MIN1P1HALF 8 -#define LINE_SIZE 128 -#define LSIZE_SH 7 // shift amount -#define PREF_AHEAD 8 - -GLOBAL_ENTRY(memset) -{ .mmi - .prologue - alloc tmp = ar.pfs, 3, 0, 0, 0 - lfetch.nt1 [dest] // - .save ar.lc, save_lc - mov.i save_lc = ar.lc - .body -} { .mmi - mov ret0 = dest // return value - cmp.ne p_nz, p_zr = value, r0 // use stf.spill if value is zero - cmp.eq p_scr, p0 = cnt, r0 -;; } -{ .mmi - and ptr2 = -(MIN1+1), dest // aligned address - and tmp = MIN1, dest // prepare to check for correct alignment - tbit.nz p_y, p_n = dest, 0 // Do we have an odd address? (M_B_U) -} { .mib - mov ptr1 = dest - mux1 value = value, @brcst // create 8 identical bytes in word -(p_scr) br.ret.dpnt.many rp // return immediately if count = 0 -;; } -{ .mib - cmp.ne p_unalgn, p0 = tmp, r0 // -} { .mib - sub bytecnt = (MIN1+1), tmp // NB: # of bytes to move is 1 higher than loopcnt - cmp.gt p_scr, p0 = 16, cnt // is it a minimalistic task? -(p_scr) br.cond.dptk.many .move_bytes_unaligned // go move just a few (M_B_U) -;; } -{ .mmi -(p_unalgn) add ptr1 = (MIN1+1), ptr2 // after alignment -(p_unalgn) add ptr2 = MIN1P1HALF, ptr2 // after alignment -(p_unalgn) tbit.nz.unc p_y, p_n = bytecnt, 3 // should we do a st8 ? -;; } -{ .mib -(p_y) add cnt = -8, cnt // -(p_unalgn) tbit.nz.unc p_yy, p_nn = bytecnt, 2 // should we do a st4 ? -} { .mib -(p_y) st8 [ptr2] = value,-4 // -(p_n) add ptr2 = 4, ptr2 // -;; } -{ .mib -(p_yy) add cnt = -4, cnt // -(p_unalgn) tbit.nz.unc p_y, p_n = bytecnt, 1 // should we do a st2 ? -} { .mib -(p_yy) st4 [ptr2] = value,-2 // -(p_nn) add ptr2 = 2, ptr2 // -;; } -{ .mmi - mov tmp = LINE_SIZE+1 // for compare -(p_y) add cnt = -2, cnt // -(p_unalgn) tbit.nz.unc p_yy, p_nn = bytecnt, 0 // should we do a st1 ? -} { .mmi - setf.sig fvalue=value // transfer value to FLP side -(p_y) st2 [ptr2] = value,-1 // -(p_n) add ptr2 = 1, ptr2 // -;; } - -{ .mmi -(p_yy) st1 [ptr2] = value // - cmp.gt p_scr, p0 = tmp, cnt // is it a minimalistic task? -} { .mbb -(p_yy) add cnt = -1, cnt // -(p_scr) br.cond.dpnt.many .fraction_of_line // go move just a few -;; } - -{ .mib - nop.m 0 - shr.u linecnt = cnt, LSIZE_SH -(p_zr) br.cond.dptk.many .l1b // Jump to use stf.spill -;; } - - TEXT_ALIGN(32) // --------------------- // L1A: store ahead into cache lines; fill later -{ .mmi - and tmp = -(LINE_SIZE), cnt // compute end of range - mov ptr9 = ptr1 // used for prefetching - and cnt = (LINE_SIZE-1), cnt // remainder -} { .mmi - mov loopcnt = PREF_AHEAD-1 // default prefetch loop - cmp.gt p_scr, p0 = PREF_AHEAD, linecnt // check against actual value -;; } -{ .mmi -(p_scr) add loopcnt = -1, linecnt // - add ptr2 = 8, ptr1 // start of stores (beyond prefetch stores) - add ptr1 = tmp, ptr1 // first address beyond total range -;; } -{ .mmi - add tmp = -1, linecnt // next loop count - mov.i ar.lc = loopcnt // -;; } -.pref_l1a: -{ .mib - stf8 [ptr9] = fvalue, 128 // Do stores one cache line apart - nop.i 0 - br.cloop.dptk.few .pref_l1a -;; } -{ .mmi - add ptr0 = 16, ptr2 // Two stores in parallel - mov.i ar.lc = tmp // -;; } -.l1ax: - { .mmi - stf8 [ptr2] = fvalue, 8 - stf8 [ptr0] = fvalue, 8 - ;; } - { .mmi - stf8 [ptr2] = fvalue, 24 - stf8 [ptr0] = fvalue, 24 - ;; } - { .mmi - stf8 [ptr2] = fvalue, 8 - stf8 [ptr0] = fvalue, 8 - ;; } - { .mmi - stf8 [ptr2] = fvalue, 24 - stf8 [ptr0] = fvalue, 24 - ;; } - { .mmi - stf8 [ptr2] = fvalue, 8 - stf8 [ptr0] = fvalue, 8 - ;; } - { .mmi - stf8 [ptr2] = fvalue, 24 - stf8 [ptr0] = fvalue, 24 - ;; } - { .mmi - stf8 [ptr2] = fvalue, 8 - stf8 [ptr0] = fvalue, 32 - cmp.lt p_scr, p0 = ptr9, ptr1 // do we need more prefetching? - ;; } -{ .mmb - stf8 [ptr2] = fvalue, 24 -(p_scr) stf8 [ptr9] = fvalue, 128 - br.cloop.dptk.few .l1ax -;; } -{ .mbb - cmp.le p_scr, p0 = 8, cnt // just a few bytes left ? -(p_scr) br.cond.dpnt.many .fraction_of_line // Branch no. 2 - br.cond.dpnt.many .move_bytes_from_alignment // Branch no. 3 -;; } - - TEXT_ALIGN(32) -.l1b: // ------------------------------------ // L1B: store ahead into cache lines; fill later -{ .mmi - and tmp = -(LINE_SIZE), cnt // compute end of range - mov ptr9 = ptr1 // used for prefetching - and cnt = (LINE_SIZE-1), cnt // remainder -} { .mmi - mov loopcnt = PREF_AHEAD-1 // default prefetch loop - cmp.gt p_scr, p0 = PREF_AHEAD, linecnt // check against actual value -;; } -{ .mmi -(p_scr) add loopcnt = -1, linecnt - add ptr2 = 16, ptr1 // start of stores (beyond prefetch stores) - add ptr1 = tmp, ptr1 // first address beyond total range -;; } -{ .mmi - add tmp = -1, linecnt // next loop count - mov.i ar.lc = loopcnt -;; } -.pref_l1b: -{ .mib - stf.spill [ptr9] = f0, 128 // Do stores one cache line apart - nop.i 0 - br.cloop.dptk.few .pref_l1b -;; } -{ .mmi - add ptr0 = 16, ptr2 // Two stores in parallel - mov.i ar.lc = tmp -;; } -.l1bx: - { .mmi - stf.spill [ptr2] = f0, 32 - stf.spill [ptr0] = f0, 32 - ;; } - { .mmi - stf.spill [ptr2] = f0, 32 - stf.spill [ptr0] = f0, 32 - ;; } - { .mmi - stf.spill [ptr2] = f0, 32 - stf.spill [ptr0] = f0, 64 - cmp.lt p_scr, p0 = ptr9, ptr1 // do we need more prefetching? - ;; } -{ .mmb - stf.spill [ptr2] = f0, 32 -(p_scr) stf.spill [ptr9] = f0, 128 - br.cloop.dptk.few .l1bx -;; } -{ .mib - cmp.gt p_scr, p0 = 8, cnt // just a few bytes left ? -(p_scr) br.cond.dpnt.many .move_bytes_from_alignment // -;; } - -.fraction_of_line: -{ .mib - add ptr2 = 16, ptr1 - shr.u loopcnt = cnt, 5 // loopcnt = cnt / 32 -;; } -{ .mib - cmp.eq p_scr, p0 = loopcnt, r0 - add loopcnt = -1, loopcnt -(p_scr) br.cond.dpnt.many .store_words -;; } -{ .mib - and cnt = 0x1f, cnt // compute the remaining cnt - mov.i ar.lc = loopcnt -;; } - TEXT_ALIGN(32) -.l2: // ------------------------------------ // L2A: store 32B in 2 cycles -{ .mmb - stf8 [ptr1] = fvalue, 8 - stf8 [ptr2] = fvalue, 8 -;; } { .mmb - stf8 [ptr1] = fvalue, 24 - stf8 [ptr2] = fvalue, 24 - br.cloop.dptk.many .l2 -;; } -.store_words: -{ .mib - cmp.gt p_scr, p0 = 8, cnt // just a few bytes left ? -(p_scr) br.cond.dpnt.many .move_bytes_from_alignment // Branch -;; } - -{ .mmi - stf8 [ptr1] = fvalue, 8 // store - cmp.le p_y, p_n = 16, cnt - add cnt = -8, cnt // subtract -;; } -{ .mmi -(p_y) stf8 [ptr1] = fvalue, 8 // store -(p_y) cmp.le.unc p_yy, p_nn = 16, cnt -(p_y) add cnt = -8, cnt // subtract -;; } -{ .mmi // store -(p_yy) stf8 [ptr1] = fvalue, 8 -(p_yy) add cnt = -8, cnt // subtract -;; } - -.move_bytes_from_alignment: -{ .mib - cmp.eq p_scr, p0 = cnt, r0 - tbit.nz.unc p_y, p0 = cnt, 2 // should we terminate with a st4 ? -(p_scr) br.cond.dpnt.few .restore_and_exit -;; } -{ .mib -(p_y) st4 [ptr1] = value,4 - tbit.nz.unc p_yy, p0 = cnt, 1 // should we terminate with a st2 ? -;; } -{ .mib -(p_yy) st2 [ptr1] = value,2 - tbit.nz.unc p_y, p0 = cnt, 0 // should we terminate with a st1 ? -;; } - -{ .mib -(p_y) st1 [ptr1] = value -;; } -.restore_and_exit: -{ .mib - nop.m 0 - mov.i ar.lc = save_lc - br.ret.sptk.many rp -;; } - -.move_bytes_unaligned: -{ .mmi - .pred.rel "mutex",p_y, p_n - .pred.rel "mutex",p_yy, p_nn -(p_n) cmp.le p_yy, p_nn = 4, cnt -(p_y) cmp.le p_yy, p_nn = 5, cnt -(p_n) add ptr2 = 2, ptr1 -} { .mmi -(p_y) add ptr2 = 3, ptr1 -(p_y) st1 [ptr1] = value, 1 // fill 1 (odd-aligned) byte [15, 14 (or less) left] -(p_y) add cnt = -1, cnt -;; } -{ .mmi -(p_yy) cmp.le.unc p_y, p0 = 8, cnt - add ptr3 = ptr1, cnt // prepare last store - mov.i ar.lc = save_lc -} { .mmi -(p_yy) st2 [ptr1] = value, 4 // fill 2 (aligned) bytes -(p_yy) st2 [ptr2] = value, 4 // fill 2 (aligned) bytes [11, 10 (o less) left] -(p_yy) add cnt = -4, cnt -;; } -{ .mmi -(p_y) cmp.le.unc p_yy, p0 = 8, cnt - add ptr3 = -1, ptr3 // last store - tbit.nz p_scr, p0 = cnt, 1 // will there be a st2 at the end ? -} { .mmi -(p_y) st2 [ptr1] = value, 4 // fill 2 (aligned) bytes -(p_y) st2 [ptr2] = value, 4 // fill 2 (aligned) bytes [7, 6 (or less) left] -(p_y) add cnt = -4, cnt -;; } -{ .mmi -(p_yy) st2 [ptr1] = value, 4 // fill 2 (aligned) bytes -(p_yy) st2 [ptr2] = value, 4 // fill 2 (aligned) bytes [3, 2 (or less) left] - tbit.nz p_y, p0 = cnt, 0 // will there be a st1 at the end ? -} { .mmi -(p_yy) add cnt = -4, cnt -;; } -{ .mmb -(p_scr) st2 [ptr1] = value // fill 2 (aligned) bytes -(p_y) st1 [ptr3] = value // fill last byte (using ptr3) - br.ret.sptk.many rp -} -END(memset) -EXPORT_SYMBOL(memset) diff --git a/arch/ia64/lib/strlen.S b/arch/ia64/lib/strlen.S deleted file mode 100644 index 1f4a46c15127..000000000000 --- a/arch/ia64/lib/strlen.S +++ /dev/null @@ -1,195 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * - * Optimized version of the standard strlen() function - * - * - * Inputs: - * in0 address of string - * - * Outputs: - * ret0 the number of characters in the string (0 if empty string) - * does not count the \0 - * - * Copyright (C) 1999, 2001 Hewlett-Packard Co - * Stephane Eranian - * - * 09/24/99 S.Eranian add speculation recovery code - */ - -#include -#include - -// -// -// This is an enhanced version of the basic strlen. it includes a combination -// of compute zero index (czx), parallel comparisons, speculative loads and -// loop unroll using rotating registers. -// -// General Ideas about the algorithm: -// The goal is to look at the string in chunks of 8 bytes. -// so we need to do a few extra checks at the beginning because the -// string may not be 8-byte aligned. In this case we load the 8byte -// quantity which includes the start of the string and mask the unused -// bytes with 0xff to avoid confusing czx. -// We use speculative loads and software pipelining to hide memory -// latency and do read ahead safely. This way we defer any exception. -// -// Because we don't want the kernel to be relying on particular -// settings of the DCR register, we provide recovery code in case -// speculation fails. The recovery code is going to "redo" the work using -// only normal loads. If we still get a fault then we generate a -// kernel panic. Otherwise we return the strlen as usual. -// -// The fact that speculation may fail can be caused, for instance, by -// the DCR.dm bit being set. In this case TLB misses are deferred, i.e., -// a NaT bit will be set if the translation is not present. The normal -// load, on the other hand, will cause the translation to be inserted -// if the mapping exists. -// -// It should be noted that we execute recovery code only when we need -// to use the data that has been speculatively loaded: we don't execute -// recovery code on pure read ahead data. -// -// Remarks: -// - the cmp r0,r0 is used as a fast way to initialize a predicate -// register to 1. This is required to make sure that we get the parallel -// compare correct. -// -// - we don't use the epilogue counter to exit the loop but we need to set -// it to zero beforehand. -// -// - after the loop we must test for Nat values because neither the -// czx nor cmp instruction raise a NaT consumption fault. We must be -// careful not to look too far for a Nat for which we don't care. -// For instance we don't need to look at a NaT in val2 if the zero byte -// was in val1. -// -// - Clearly performance tuning is required. -// -// -// -#define saved_pfs r11 -#define tmp r10 -#define base r16 -#define orig r17 -#define saved_pr r18 -#define src r19 -#define mask r20 -#define val r21 -#define val1 r22 -#define val2 r23 - -GLOBAL_ENTRY(strlen) - .prologue - .save ar.pfs, saved_pfs - alloc saved_pfs=ar.pfs,11,0,0,8 // rotating must be multiple of 8 - - .rotr v[2], w[2] // declares our 4 aliases - - extr.u tmp=in0,0,3 // tmp=least significant 3 bits - mov orig=in0 // keep trackof initial byte address - dep src=0,in0,0,3 // src=8byte-aligned in0 address - .save pr, saved_pr - mov saved_pr=pr // preserve predicates (rotation) - ;; - - .body - - ld8 v[1]=[src],8 // must not speculate: can fail here - shl tmp=tmp,3 // multiply by 8bits/byte - mov mask=-1 // our mask - ;; - ld8.s w[1]=[src],8 // speculatively load next - cmp.eq p6,p0=r0,r0 // sets p6 to true for cmp.and - sub tmp=64,tmp // how many bits to shift our mask on the right - ;; - shr.u mask=mask,tmp // zero enough bits to hold v[1] valuable part - mov ar.ec=r0 // clear epilogue counter (saved in ar.pfs) - ;; - add base=-16,src // keep track of aligned base - or v[1]=v[1],mask // now we have a safe initial byte pattern - ;; -1: - ld8.s v[0]=[src],8 // speculatively load next - czx1.r val1=v[1] // search 0 byte from right - czx1.r val2=w[1] // search 0 byte from right following 8bytes - ;; - ld8.s w[0]=[src],8 // speculatively load next to next - cmp.eq.and p6,p0=8,val1 // p6 = p6 and val1==8 - cmp.eq.and p6,p0=8,val2 // p6 = p6 and mask==8 -(p6) br.wtop.dptk 1b // loop until p6 == 0 - ;; - // - // We must return try the recovery code iff - // val1_is_nat || (val1==8 && val2_is_nat) - // - // XXX Fixme - // - there must be a better way of doing the test - // - cmp.eq p8,p9=8,val1 // p6 = val1 had zero (disambiguate) - tnat.nz p6,p7=val1 // test NaT on val1 -(p6) br.cond.spnt .recover // jump to recovery if val1 is NaT - ;; - // - // if we come here p7 is true, i.e., initialized for // cmp - // - cmp.eq.and p7,p0=8,val1// val1==8? - tnat.nz.and p7,p0=val2 // test NaT if val2 -(p7) br.cond.spnt .recover // jump to recovery if val2 is NaT - ;; -(p8) mov val1=val2 // the other test got us out of the loop -(p8) adds src=-16,src // correct position when 3 ahead -(p9) adds src=-24,src // correct position when 4 ahead - ;; - sub ret0=src,orig // distance from base - sub tmp=8,val1 // which byte in word - mov pr=saved_pr,0xffffffffffff0000 - ;; - sub ret0=ret0,tmp // adjust - mov ar.pfs=saved_pfs // because of ar.ec, restore no matter what - br.ret.sptk.many rp // end of normal execution - - // - // Outlined recovery code when speculation failed - // - // This time we don't use speculation and rely on the normal exception - // mechanism. that's why the loop is not as good as the previous one - // because read ahead is not possible - // - // IMPORTANT: - // Please note that in the case of strlen() as opposed to strlen_user() - // we don't use the exception mechanism, as this function is not - // supposed to fail. If that happens it means we have a bug and the - // code will cause of kernel fault. - // - // XXX Fixme - // - today we restart from the beginning of the string instead - // of trying to continue where we left off. - // -.recover: - ld8 val=[base],8 // will fail if unrecoverable fault - ;; - or val=val,mask // remask first bytes - cmp.eq p0,p6=r0,r0 // nullify first ld8 in loop - ;; - // - // ar.ec is still zero here - // -2: -(p6) ld8 val=[base],8 // will fail if unrecoverable fault - ;; - czx1.r val1=val // search 0 byte from right - ;; - cmp.eq p6,p0=8,val1 // val1==8 ? -(p6) br.wtop.dptk 2b // loop until p6 == 0 - ;; // (avoid WAW on p63) - sub ret0=base,orig // distance from base - sub tmp=8,val1 - mov pr=saved_pr,0xffffffffffff0000 - ;; - sub ret0=ret0,tmp // length=now - back -1 - mov ar.pfs=saved_pfs // because of ar.ec, restore no matter what - br.ret.sptk.many rp // end of successful recovery code -END(strlen) -EXPORT_SYMBOL(strlen) diff --git a/arch/ia64/lib/strncpy_from_user.S b/arch/ia64/lib/strncpy_from_user.S deleted file mode 100644 index a287169bd953..000000000000 --- a/arch/ia64/lib/strncpy_from_user.S +++ /dev/null @@ -1,47 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Just like strncpy() except that if a fault occurs during copying, - * -EFAULT is returned. - * - * Inputs: - * in0: address of destination buffer - * in1: address of string to be copied - * in2: length of buffer in bytes - * Outputs: - * r8: -EFAULT in case of fault or number of bytes copied if no fault - * - * Copyright (C) 1998-2001 Hewlett-Packard Co - * Copyright (C) 1998-2001 David Mosberger-Tang - * - * 00/03/06 D. Mosberger Fixed to return proper return value (bug found by - * by Andreas Schwab ). - */ - -#include -#include - -GLOBAL_ENTRY(__strncpy_from_user) - alloc r2=ar.pfs,3,0,0,0 - mov r8=0 - mov r9=in1 - ;; - add r10=in1,in2 - cmp.eq p6,p0=r0,in2 -(p6) br.ret.spnt.many rp - - // XXX braindead copy loop---this needs to be optimized -.Loop1: - EX(.Lexit, ld1 r8=[in1],1) - ;; - EX(.Lexit, st1 [in0]=r8,1) - cmp.ne p6,p7=r8,r0 - ;; -(p6) cmp.ne.unc p8,p0=in1,r10 -(p8) br.cond.dpnt.few .Loop1 - ;; -(p6) mov r8=in2 // buffer filled up---return buffer length -(p7) sub r8=in1,r9,1 // return string length (excluding NUL character) -[.Lexit:] - br.ret.sptk.many rp -END(__strncpy_from_user) -EXPORT_SYMBOL(__strncpy_from_user) diff --git a/arch/ia64/lib/strnlen_user.S b/arch/ia64/lib/strnlen_user.S deleted file mode 100644 index a7eb56e840a9..000000000000 --- a/arch/ia64/lib/strnlen_user.S +++ /dev/null @@ -1,48 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Returns 0 if exception before NUL or reaching the supplied limit (N), - * a value greater than N if the string is longer than the limit, else - * strlen. - * - * Inputs: - * in0: address of buffer - * in1: string length limit N - * Outputs: - * r8: 0 in case of fault, strlen(buffer)+1 otherwise - * - * Copyright (C) 1999, 2001 David Mosberger-Tang - */ - -#include -#include - -GLOBAL_ENTRY(__strnlen_user) - .prologue - alloc r2=ar.pfs,2,0,0,0 - .save ar.lc, r16 - mov r16=ar.lc // preserve ar.lc - - .body - - add r3=-1,in1 - ;; - mov ar.lc=r3 - mov r9=0 - ;; - // XXX braindead strlen loop---this needs to be optimized -.Loop1: - EXCLR(.Lexit, ld1 r8=[in0],1) - add r9=1,r9 - ;; - cmp.eq p6,p0=r8,r0 -(p6) br.cond.dpnt .Lexit - br.cloop.dptk.few .Loop1 - - add r9=1,in1 // NUL not found---return N+1 - ;; -.Lexit: - mov r8=r9 - mov ar.lc=r16 // restore ar.lc - br.ret.sptk.many rp -END(__strnlen_user) -EXPORT_SYMBOL(__strnlen_user) diff --git a/arch/ia64/lib/xor.S b/arch/ia64/lib/xor.S deleted file mode 100644 index 6e2a69662c06..000000000000 --- a/arch/ia64/lib/xor.S +++ /dev/null @@ -1,181 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * arch/ia64/lib/xor.S - * - * Optimized RAID-5 checksumming functions for IA-64. - */ - -#include -#include - -GLOBAL_ENTRY(xor_ia64_2) - .prologue - .fframe 0 - .save ar.pfs, r31 - alloc r31 = ar.pfs, 3, 0, 13, 16 - .save ar.lc, r30 - mov r30 = ar.lc - .save pr, r29 - mov r29 = pr - ;; - .body - mov r8 = in1 - mov ar.ec = 6 + 2 - shr in0 = in0, 3 - ;; - adds in0 = -1, in0 - mov r16 = in1 - mov r17 = in2 - ;; - mov ar.lc = in0 - mov pr.rot = 1 << 16 - ;; - .rotr s1[6+1], s2[6+1], d[2] - .rotp p[6+2] -0: -(p[0]) ld8.nta s1[0] = [r16], 8 -(p[0]) ld8.nta s2[0] = [r17], 8 -(p[6]) xor d[0] = s1[6], s2[6] -(p[6+1])st8.nta [r8] = d[1], 8 - nop.f 0 - br.ctop.dptk.few 0b - ;; - mov ar.lc = r30 - mov pr = r29, -1 - br.ret.sptk.few rp -END(xor_ia64_2) -EXPORT_SYMBOL(xor_ia64_2) - -GLOBAL_ENTRY(xor_ia64_3) - .prologue - .fframe 0 - .save ar.pfs, r31 - alloc r31 = ar.pfs, 4, 0, 20, 24 - .save ar.lc, r30 - mov r30 = ar.lc - .save pr, r29 - mov r29 = pr - ;; - .body - mov r8 = in1 - mov ar.ec = 6 + 2 - shr in0 = in0, 3 - ;; - adds in0 = -1, in0 - mov r16 = in1 - mov r17 = in2 - ;; - mov r18 = in3 - mov ar.lc = in0 - mov pr.rot = 1 << 16 - ;; - .rotr s1[6+1], s2[6+1], s3[6+1], d[2] - .rotp p[6+2] -0: -(p[0]) ld8.nta s1[0] = [r16], 8 -(p[0]) ld8.nta s2[0] = [r17], 8 -(p[6]) xor d[0] = s1[6], s2[6] - ;; -(p[0]) ld8.nta s3[0] = [r18], 8 -(p[6+1])st8.nta [r8] = d[1], 8 -(p[6]) xor d[0] = d[0], s3[6] - br.ctop.dptk.few 0b - ;; - mov ar.lc = r30 - mov pr = r29, -1 - br.ret.sptk.few rp -END(xor_ia64_3) -EXPORT_SYMBOL(xor_ia64_3) - -GLOBAL_ENTRY(xor_ia64_4) - .prologue - .fframe 0 - .save ar.pfs, r31 - alloc r31 = ar.pfs, 5, 0, 27, 32 - .save ar.lc, r30 - mov r30 = ar.lc - .save pr, r29 - mov r29 = pr - ;; - .body - mov r8 = in1 - mov ar.ec = 6 + 2 - shr in0 = in0, 3 - ;; - adds in0 = -1, in0 - mov r16 = in1 - mov r17 = in2 - ;; - mov r18 = in3 - mov ar.lc = in0 - mov pr.rot = 1 << 16 - mov r19 = in4 - ;; - .rotr s1[6+1], s2[6+1], s3[6+1], s4[6+1], d[2] - .rotp p[6+2] -0: -(p[0]) ld8.nta s1[0] = [r16], 8 -(p[0]) ld8.nta s2[0] = [r17], 8 -(p[6]) xor d[0] = s1[6], s2[6] -(p[0]) ld8.nta s3[0] = [r18], 8 -(p[0]) ld8.nta s4[0] = [r19], 8 -(p[6]) xor r20 = s3[6], s4[6] - ;; -(p[6+1])st8.nta [r8] = d[1], 8 -(p[6]) xor d[0] = d[0], r20 - br.ctop.dptk.few 0b - ;; - mov ar.lc = r30 - mov pr = r29, -1 - br.ret.sptk.few rp -END(xor_ia64_4) -EXPORT_SYMBOL(xor_ia64_4) - -GLOBAL_ENTRY(xor_ia64_5) - .prologue - .fframe 0 - .save ar.pfs, r31 - alloc r31 = ar.pfs, 6, 0, 34, 40 - .save ar.lc, r30 - mov r30 = ar.lc - .save pr, r29 - mov r29 = pr - ;; - .body - mov r8 = in1 - mov ar.ec = 6 + 2 - shr in0 = in0, 3 - ;; - adds in0 = -1, in0 - mov r16 = in1 - mov r17 = in2 - ;; - mov r18 = in3 - mov ar.lc = in0 - mov pr.rot = 1 << 16 - mov r19 = in4 - mov r20 = in5 - ;; - .rotr s1[6+1], s2[6+1], s3[6+1], s4[6+1], s5[6+1], d[2] - .rotp p[6+2] -0: -(p[0]) ld8.nta s1[0] = [r16], 8 -(p[0]) ld8.nta s2[0] = [r17], 8 -(p[6]) xor d[0] = s1[6], s2[6] -(p[0]) ld8.nta s3[0] = [r18], 8 -(p[0]) ld8.nta s4[0] = [r19], 8 -(p[6]) xor r21 = s3[6], s4[6] - ;; -(p[0]) ld8.nta s5[0] = [r20], 8 -(p[6+1])st8.nta [r8] = d[1], 8 -(p[6]) xor d[0] = d[0], r21 - ;; -(p[6]) xor d[0] = d[0], s5[6] - nop.f 0 - br.ctop.dptk.few 0b - ;; - mov ar.lc = r30 - mov pr = r29, -1 - br.ret.sptk.few rp -END(xor_ia64_5) -EXPORT_SYMBOL(xor_ia64_5) diff --git a/arch/ia64/mm/Makefile b/arch/ia64/mm/Makefile deleted file mode 100644 index c03f63c62ac4..000000000000 --- a/arch/ia64/mm/Makefile +++ /dev/null @@ -1,11 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0 -# -# Makefile for the ia64-specific parts of the memory manager. -# - -obj-y := init.o fault.o tlb.o extable.o ioremap.o - -obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o -obj-$(CONFIG_NUMA) += numa.o -obj-$(CONFIG_SPARSEMEM) += discontig.o -obj-$(CONFIG_FLATMEM) += contig.o diff --git a/arch/ia64/mm/contig.c b/arch/ia64/mm/contig.c deleted file mode 100644 index 1e9eaa107eb7..000000000000 --- a/arch/ia64/mm/contig.c +++ /dev/null @@ -1,208 +0,0 @@ -/* - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - * - * Copyright (C) 1998-2003 Hewlett-Packard Co - * David Mosberger-Tang - * Stephane Eranian - * Copyright (C) 2000, Rohit Seth - * Copyright (C) 1999 VA Linux Systems - * Copyright (C) 1999 Walt Drummond - * Copyright (C) 2003 Silicon Graphics, Inc. All rights reserved. - * - * Routines used by ia64 machines with contiguous (or virtually contiguous) - * memory. - */ -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -/* physical address where the bootmem map is located */ -unsigned long bootmap_start; - -#ifdef CONFIG_SMP -static void *cpu_data; -/** - * per_cpu_init - setup per-cpu variables - * - * Allocate and setup per-cpu data areas. - */ -void *per_cpu_init(void) -{ - static bool first_time = true; - void *cpu0_data = __cpu0_per_cpu; - unsigned int cpu; - - if (!first_time) - goto skip; - first_time = false; - - /* - * get_free_pages() cannot be used before cpu_init() done. - * BSP allocates PERCPU_PAGE_SIZE bytes for all possible CPUs - * to avoid that AP calls get_zeroed_page(). - */ - for_each_possible_cpu(cpu) { - void *src = cpu == 0 ? cpu0_data : __phys_per_cpu_start; - - memcpy(cpu_data, src, __per_cpu_end - __per_cpu_start); - __per_cpu_offset[cpu] = (char *)cpu_data - __per_cpu_start; - per_cpu(local_per_cpu_offset, cpu) = __per_cpu_offset[cpu]; - - /* - * percpu area for cpu0 is moved from the __init area - * which is setup by head.S and used till this point. - * Update ar.k3. This move is ensures that percpu - * area for cpu0 is on the correct node and its - * virtual address isn't insanely far from other - * percpu areas which is important for congruent - * percpu allocator. - */ - if (cpu == 0) - ia64_set_kr(IA64_KR_PER_CPU_DATA, __pa(cpu_data) - - (unsigned long)__per_cpu_start); - - cpu_data += PERCPU_PAGE_SIZE; - } -skip: - return __per_cpu_start + __per_cpu_offset[smp_processor_id()]; -} - -static inline __init void -alloc_per_cpu_data(void) -{ - size_t size = PERCPU_PAGE_SIZE * num_possible_cpus(); - - cpu_data = memblock_alloc_from(size, PERCPU_PAGE_SIZE, - __pa(MAX_DMA_ADDRESS)); - if (!cpu_data) - panic("%s: Failed to allocate %lu bytes align=%lx from=%lx\n", - __func__, size, PERCPU_PAGE_SIZE, __pa(MAX_DMA_ADDRESS)); -} - -/** - * setup_per_cpu_areas - setup percpu areas - * - * Arch code has already allocated and initialized percpu areas. All - * this function has to do is to teach the determined layout to the - * dynamic percpu allocator, which happens to be more complex than - * creating whole new ones using helpers. - */ -void __init -setup_per_cpu_areas(void) -{ - struct pcpu_alloc_info *ai; - struct pcpu_group_info *gi; - unsigned int cpu; - ssize_t static_size, reserved_size, dyn_size; - - ai = pcpu_alloc_alloc_info(1, num_possible_cpus()); - if (!ai) - panic("failed to allocate pcpu_alloc_info"); - gi = &ai->groups[0]; - - /* units are assigned consecutively to possible cpus */ - for_each_possible_cpu(cpu) - gi->cpu_map[gi->nr_units++] = cpu; - - /* set parameters */ - static_size = __per_cpu_end - __per_cpu_start; - reserved_size = PERCPU_MODULE_RESERVE; - dyn_size = PERCPU_PAGE_SIZE - static_size - reserved_size; - if (dyn_size < 0) - panic("percpu area overflow static=%zd reserved=%zd\n", - static_size, reserved_size); - - ai->static_size = static_size; - ai->reserved_size = reserved_size; - ai->dyn_size = dyn_size; - ai->unit_size = PERCPU_PAGE_SIZE; - ai->atom_size = PAGE_SIZE; - ai->alloc_size = PERCPU_PAGE_SIZE; - - pcpu_setup_first_chunk(ai, __per_cpu_start + __per_cpu_offset[0]); - pcpu_free_alloc_info(ai); -} -#else -#define alloc_per_cpu_data() do { } while (0) -#endif /* CONFIG_SMP */ - -/** - * find_memory - setup memory map - * - * Walk the EFI memory map and find usable memory for the system, taking - * into account reserved areas. - */ -void __init -find_memory (void) -{ - reserve_memory(); - - /* first find highest page frame number */ - min_low_pfn = ~0UL; - max_low_pfn = 0; - efi_memmap_walk(find_max_min_low_pfn, NULL); - max_pfn = max_low_pfn; - - memblock_add_node(0, PFN_PHYS(max_low_pfn), 0, MEMBLOCK_NONE); - - find_initrd(); - - alloc_per_cpu_data(); -} - -static int __init find_largest_hole(u64 start, u64 end, void *arg) -{ - u64 *max_gap = arg; - - static u64 last_end = PAGE_OFFSET; - - /* NOTE: this algorithm assumes efi memmap table is ordered */ - - if (*max_gap < (start - last_end)) - *max_gap = start - last_end; - last_end = end; - return 0; -} - -static void __init verify_gap_absence(void) -{ - unsigned long max_gap; - - /* Forbid FLATMEM if hole is > than 1G */ - efi_memmap_walk(find_largest_hole, (u64 *)&max_gap); - if (max_gap >= SZ_1G) - panic("Cannot use FLATMEM with %ldMB hole\n" - "Please switch over to SPARSEMEM\n", - (max_gap >> 20)); -} - -/* - * Set up the page tables. - */ - -void __init -paging_init (void) -{ - unsigned long max_dma; - unsigned long max_zone_pfns[MAX_NR_ZONES]; - - memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); - max_dma = virt_to_phys((void *) MAX_DMA_ADDRESS) >> PAGE_SHIFT; - max_zone_pfns[ZONE_DMA32] = max_dma; - max_zone_pfns[ZONE_NORMAL] = max_low_pfn; - - verify_gap_absence(); - - free_area_init(max_zone_pfns); - zero_page_memmap_ptr = virt_to_page(ia64_imva(empty_zero_page)); -} diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c deleted file mode 100644 index 73d0db36edb6..000000000000 --- a/arch/ia64/mm/discontig.c +++ /dev/null @@ -1,635 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Copyright (c) 2000, 2003 Silicon Graphics, Inc. All rights reserved. - * Copyright (c) 2001 Intel Corp. - * Copyright (c) 2001 Tony Luck - * Copyright (c) 2002 NEC Corp. - * Copyright (c) 2002 Kimio Suganuma - * Copyright (c) 2004 Silicon Graphics, Inc - * Russ Anderson - * Jesse Barnes - * Jack Steiner - */ - -/* - * Platform initialization for Discontig Memory - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -/* - * Track per-node information needed to setup the boot memory allocator, the - * per-node areas, and the real VM. - */ -struct early_node_data { - struct ia64_node_data *node_data; - unsigned long pernode_addr; - unsigned long pernode_size; - unsigned long min_pfn; - unsigned long max_pfn; -}; - -static struct early_node_data mem_data[MAX_NUMNODES] __initdata; -static nodemask_t memory_less_mask __initdata; - -pg_data_t *pgdat_list[MAX_NUMNODES]; - -/* - * To prevent cache aliasing effects, align per-node structures so that they - * start at addresses that are strided by node number. - */ -#define MAX_NODE_ALIGN_OFFSET (32 * 1024 * 1024) -#define NODEDATA_ALIGN(addr, node) \ - ((((addr) + 1024*1024-1) & ~(1024*1024-1)) + \ - (((node)*PERCPU_PAGE_SIZE) & (MAX_NODE_ALIGN_OFFSET - 1))) - -/** - * build_node_maps - callback to setup mem_data structs for each node - * @start: physical start of range - * @len: length of range - * @node: node where this range resides - * - * Detect extents of each piece of memory that we wish to - * treat as a virtually contiguous block (i.e. each node). Each such block - * must start on an %IA64_GRANULE_SIZE boundary, so we round the address down - * if necessary. Any non-existent pages will simply be part of the virtual - * memmap. - */ -static int __init build_node_maps(unsigned long start, unsigned long len, - int node) -{ - unsigned long spfn, epfn, end = start + len; - - epfn = GRANULEROUNDUP(end) >> PAGE_SHIFT; - spfn = GRANULEROUNDDOWN(start) >> PAGE_SHIFT; - - if (!mem_data[node].min_pfn) { - mem_data[node].min_pfn = spfn; - mem_data[node].max_pfn = epfn; - } else { - mem_data[node].min_pfn = min(spfn, mem_data[node].min_pfn); - mem_data[node].max_pfn = max(epfn, mem_data[node].max_pfn); - } - - return 0; -} - -/** - * early_nr_cpus_node - return number of cpus on a given node - * @node: node to check - * - * Count the number of cpus on @node. We can't use nr_cpus_node() yet because - * acpi_boot_init() (which builds the node_to_cpu_mask array) hasn't been - * called yet. Note that node 0 will also count all non-existent cpus. - */ -static int early_nr_cpus_node(int node) -{ - int cpu, n = 0; - - for_each_possible_early_cpu(cpu) - if (node == node_cpuid[cpu].nid) - n++; - - return n; -} - -/** - * compute_pernodesize - compute size of pernode data - * @node: the node id. - */ -static unsigned long compute_pernodesize(int node) -{ - unsigned long pernodesize = 0, cpus; - - cpus = early_nr_cpus_node(node); - pernodesize += PERCPU_PAGE_SIZE * cpus; - pernodesize += node * L1_CACHE_BYTES; - pernodesize += L1_CACHE_ALIGN(sizeof(pg_data_t)); - pernodesize += L1_CACHE_ALIGN(sizeof(struct ia64_node_data)); - pernodesize += L1_CACHE_ALIGN(sizeof(pg_data_t)); - pernodesize = PAGE_ALIGN(pernodesize); - return pernodesize; -} - -/** - * per_cpu_node_setup - setup per-cpu areas on each node - * @cpu_data: per-cpu area on this node - * @node: node to setup - * - * Copy the static per-cpu data into the region we just set aside and then - * setup __per_cpu_offset for each CPU on this node. Return a pointer to - * the end of the area. - */ -static void *per_cpu_node_setup(void *cpu_data, int node) -{ -#ifdef CONFIG_SMP - int cpu; - - for_each_possible_early_cpu(cpu) { - void *src = cpu == 0 ? __cpu0_per_cpu : __phys_per_cpu_start; - - if (node != node_cpuid[cpu].nid) - continue; - - memcpy(__va(cpu_data), src, __per_cpu_end - __per_cpu_start); - __per_cpu_offset[cpu] = (char *)__va(cpu_data) - - __per_cpu_start; - - /* - * percpu area for cpu0 is moved from the __init area - * which is setup by head.S and used till this point. - * Update ar.k3. This move is ensures that percpu - * area for cpu0 is on the correct node and its - * virtual address isn't insanely far from other - * percpu areas which is important for congruent - * percpu allocator. - */ - if (cpu == 0) - ia64_set_kr(IA64_KR_PER_CPU_DATA, - (unsigned long)cpu_data - - (unsigned long)__per_cpu_start); - - cpu_data += PERCPU_PAGE_SIZE; - } -#endif - return cpu_data; -} - -#ifdef CONFIG_SMP -/** - * setup_per_cpu_areas - setup percpu areas - * - * Arch code has already allocated and initialized percpu areas. All - * this function has to do is to teach the determined layout to the - * dynamic percpu allocator, which happens to be more complex than - * creating whole new ones using helpers. - */ -void __init setup_per_cpu_areas(void) -{ - struct pcpu_alloc_info *ai; - struct pcpu_group_info *gi; - unsigned int *cpu_map; - void *base; - unsigned long base_offset; - unsigned int cpu; - ssize_t static_size, reserved_size, dyn_size; - int node, prev_node, unit, nr_units; - - ai = pcpu_alloc_alloc_info(MAX_NUMNODES, nr_cpu_ids); - if (!ai) - panic("failed to allocate pcpu_alloc_info"); - cpu_map = ai->groups[0].cpu_map; - - /* determine base */ - base = (void *)ULONG_MAX; - for_each_possible_cpu(cpu) - base = min(base, - (void *)(__per_cpu_offset[cpu] + __per_cpu_start)); - base_offset = (void *)__per_cpu_start - base; - - /* build cpu_map, units are grouped by node */ - unit = 0; - for_each_node(node) - for_each_possible_cpu(cpu) - if (node == node_cpuid[cpu].nid) - cpu_map[unit++] = cpu; - nr_units = unit; - - /* set basic parameters */ - static_size = __per_cpu_end - __per_cpu_start; - reserved_size = PERCPU_MODULE_RESERVE; - dyn_size = PERCPU_PAGE_SIZE - static_size - reserved_size; - if (dyn_size < 0) - panic("percpu area overflow static=%zd reserved=%zd\n", - static_size, reserved_size); - - ai->static_size = static_size; - ai->reserved_size = reserved_size; - ai->dyn_size = dyn_size; - ai->unit_size = PERCPU_PAGE_SIZE; - ai->atom_size = PAGE_SIZE; - ai->alloc_size = PERCPU_PAGE_SIZE; - - /* - * CPUs are put into groups according to node. Walk cpu_map - * and create new groups at node boundaries. - */ - prev_node = NUMA_NO_NODE; - ai->nr_groups = 0; - for (unit = 0; unit < nr_units; unit++) { - cpu = cpu_map[unit]; - node = node_cpuid[cpu].nid; - - if (node == prev_node) { - gi->nr_units++; - continue; - } - prev_node = node; - - gi = &ai->groups[ai->nr_groups++]; - gi->nr_units = 1; - gi->base_offset = __per_cpu_offset[cpu] + base_offset; - gi->cpu_map = &cpu_map[unit]; - } - - pcpu_setup_first_chunk(ai, base); - pcpu_free_alloc_info(ai); -} -#endif - -/** - * fill_pernode - initialize pernode data. - * @node: the node id. - * @pernode: physical address of pernode data - * @pernodesize: size of the pernode data - */ -static void __init fill_pernode(int node, unsigned long pernode, - unsigned long pernodesize) -{ - void *cpu_data; - int cpus = early_nr_cpus_node(node); - - mem_data[node].pernode_addr = pernode; - mem_data[node].pernode_size = pernodesize; - memset(__va(pernode), 0, pernodesize); - - cpu_data = (void *)pernode; - pernode += PERCPU_PAGE_SIZE * cpus; - pernode += node * L1_CACHE_BYTES; - - pgdat_list[node] = __va(pernode); - pernode += L1_CACHE_ALIGN(sizeof(pg_data_t)); - - mem_data[node].node_data = __va(pernode); - pernode += L1_CACHE_ALIGN(sizeof(struct ia64_node_data)); - pernode += L1_CACHE_ALIGN(sizeof(pg_data_t)); - - cpu_data = per_cpu_node_setup(cpu_data, node); - - return; -} - -/** - * find_pernode_space - allocate memory for memory map and per-node structures - * @start: physical start of range - * @len: length of range - * @node: node where this range resides - * - * This routine reserves space for the per-cpu data struct, the list of - * pg_data_ts and the per-node data struct. Each node will have something like - * the following in the first chunk of addr. space large enough to hold it. - * - * ________________________ - * | | - * |~~~~~~~~~~~~~~~~~~~~~~~~| <-- NODEDATA_ALIGN(start, node) for the first - * | PERCPU_PAGE_SIZE * | start and length big enough - * | cpus_on_this_node | Node 0 will also have entries for all non-existent cpus. - * |------------------------| - * | local pg_data_t * | - * |------------------------| - * | local ia64_node_data | - * |------------------------| - * | ??? | - * |________________________| - * - * Once this space has been set aside, the bootmem maps are initialized. We - * could probably move the allocation of the per-cpu and ia64_node_data space - * outside of this function and use alloc_bootmem_node(), but doing it here - * is straightforward and we get the alignments we want so... - */ -static int __init find_pernode_space(unsigned long start, unsigned long len, - int node) -{ - unsigned long spfn, epfn; - unsigned long pernodesize = 0, pernode; - - spfn = start >> PAGE_SHIFT; - epfn = (start + len) >> PAGE_SHIFT; - - /* - * Make sure this memory falls within this node's usable memory - * since we may have thrown some away in build_maps(). - */ - if (spfn < mem_data[node].min_pfn || epfn > mem_data[node].max_pfn) - return 0; - - /* Don't setup this node's local space twice... */ - if (mem_data[node].pernode_addr) - return 0; - - /* - * Calculate total size needed, incl. what's necessary - * for good alignment and alias prevention. - */ - pernodesize = compute_pernodesize(node); - pernode = NODEDATA_ALIGN(start, node); - - /* Is this range big enough for what we want to store here? */ - if (start + len > (pernode + pernodesize)) - fill_pernode(node, pernode, pernodesize); - - return 0; -} - -/** - * reserve_pernode_space - reserve memory for per-node space - * - * Reserve the space used by the bootmem maps & per-node space in the boot - * allocator so that when we actually create the real mem maps we don't - * use their memory. - */ -static void __init reserve_pernode_space(void) -{ - unsigned long base, size; - int node; - - for_each_online_node(node) { - if (node_isset(node, memory_less_mask)) - continue; - - /* Now the per-node space */ - size = mem_data[node].pernode_size; - base = __pa(mem_data[node].pernode_addr); - memblock_reserve(base, size); - } -} - -static void scatter_node_data(void) -{ - pg_data_t **dst; - int node; - - /* - * for_each_online_node() can't be used at here. - * node_online_map is not set for hot-added nodes at this time, - * because we are halfway through initialization of the new node's - * structures. If for_each_online_node() is used, a new node's - * pg_data_ptrs will be not initialized. Instead of using it, - * pgdat_list[] is checked. - */ - for_each_node(node) { - if (pgdat_list[node]) { - dst = LOCAL_DATA_ADDR(pgdat_list[node])->pg_data_ptrs; - memcpy(dst, pgdat_list, sizeof(pgdat_list)); - } - } -} - -/** - * initialize_pernode_data - fixup per-cpu & per-node pointers - * - * Each node's per-node area has a copy of the global pg_data_t list, so - * we copy that to each node here, as well as setting the per-cpu pointer - * to the local node data structure. - */ -static void __init initialize_pernode_data(void) -{ - int cpu, node; - - scatter_node_data(); - -#ifdef CONFIG_SMP - /* Set the node_data pointer for each per-cpu struct */ - for_each_possible_early_cpu(cpu) { - node = node_cpuid[cpu].nid; - per_cpu(ia64_cpu_info, cpu).node_data = - mem_data[node].node_data; - } -#else - { - struct cpuinfo_ia64 *cpu0_cpu_info; - cpu = 0; - node = node_cpuid[cpu].nid; - cpu0_cpu_info = (struct cpuinfo_ia64 *)(__phys_per_cpu_start + - ((char *)&ia64_cpu_info - __per_cpu_start)); - cpu0_cpu_info->node_data = mem_data[node].node_data; - } -#endif /* CONFIG_SMP */ -} - -/** - * memory_less_node_alloc - * attempt to allocate memory on the best NUMA slit - * node but fall back to any other node when __alloc_bootmem_node fails - * for best. - * @nid: node id - * @pernodesize: size of this node's pernode data - */ -static void __init *memory_less_node_alloc(int nid, unsigned long pernodesize) -{ - void *ptr = NULL; - u8 best = 0xff; - int bestnode = NUMA_NO_NODE, node, anynode = 0; - - for_each_online_node(node) { - if (node_isset(node, memory_less_mask)) - continue; - else if (node_distance(nid, node) < best) { - best = node_distance(nid, node); - bestnode = node; - } - anynode = node; - } - - if (bestnode == NUMA_NO_NODE) - bestnode = anynode; - - ptr = memblock_alloc_try_nid(pernodesize, PERCPU_PAGE_SIZE, - __pa(MAX_DMA_ADDRESS), - MEMBLOCK_ALLOC_ACCESSIBLE, - bestnode); - if (!ptr) - panic("%s: Failed to allocate %lu bytes align=0x%lx nid=%d from=%lx\n", - __func__, pernodesize, PERCPU_PAGE_SIZE, bestnode, - __pa(MAX_DMA_ADDRESS)); - - return ptr; -} - -/** - * memory_less_nodes - allocate and initialize CPU only nodes pernode - * information. - */ -static void __init memory_less_nodes(void) -{ - unsigned long pernodesize; - void *pernode; - int node; - - for_each_node_mask(node, memory_less_mask) { - pernodesize = compute_pernodesize(node); - pernode = memory_less_node_alloc(node, pernodesize); - fill_pernode(node, __pa(pernode), pernodesize); - } - - return; -} - -/** - * find_memory - walk the EFI memory map and setup the bootmem allocator - * - * Called early in boot to setup the bootmem allocator, and to - * allocate the per-cpu and per-node structures. - */ -void __init find_memory(void) -{ - int node; - - reserve_memory(); - efi_memmap_walk(filter_memory, register_active_ranges); - - if (num_online_nodes() == 0) { - printk(KERN_ERR "node info missing!\n"); - node_set_online(0); - } - - nodes_or(memory_less_mask, memory_less_mask, node_online_map); - min_low_pfn = -1; - max_low_pfn = 0; - - /* These actually end up getting called by call_pernode_memory() */ - efi_memmap_walk(filter_rsvd_memory, build_node_maps); - efi_memmap_walk(filter_rsvd_memory, find_pernode_space); - efi_memmap_walk(find_max_min_low_pfn, NULL); - - for_each_online_node(node) - if (mem_data[node].min_pfn) - node_clear(node, memory_less_mask); - - reserve_pernode_space(); - memory_less_nodes(); - initialize_pernode_data(); - - max_pfn = max_low_pfn; - - find_initrd(); -} - -#ifdef CONFIG_SMP -/** - * per_cpu_init - setup per-cpu variables - * - * find_pernode_space() does most of this already, we just need to set - * local_per_cpu_offset - */ -void *per_cpu_init(void) -{ - int cpu; - static int first_time = 1; - - if (first_time) { - first_time = 0; - for_each_possible_early_cpu(cpu) - per_cpu(local_per_cpu_offset, cpu) = __per_cpu_offset[cpu]; - } - - return __per_cpu_start + __per_cpu_offset[smp_processor_id()]; -} -#endif /* CONFIG_SMP */ - -/** - * call_pernode_memory - use SRAT to call callback functions with node info - * @start: physical start of range - * @len: length of range - * @arg: function to call for each range - * - * efi_memmap_walk() knows nothing about layout of memory across nodes. Find - * out to which node a block of memory belongs. Ignore memory that we cannot - * identify, and split blocks that run across multiple nodes. - * - * Take this opportunity to round the start address up and the end address - * down to page boundaries. - */ -void call_pernode_memory(unsigned long start, unsigned long len, void *arg) -{ - unsigned long rs, re, end = start + len; - void (*func)(unsigned long, unsigned long, int); - int i; - - start = PAGE_ALIGN(start); - end &= PAGE_MASK; - if (start >= end) - return; - - func = arg; - - if (!num_node_memblks) { - /* No SRAT table, so assume one node (node 0) */ - if (start < end) - (*func)(start, end - start, 0); - return; - } - - for (i = 0; i < num_node_memblks; i++) { - rs = max(start, node_memblk[i].start_paddr); - re = min(end, node_memblk[i].start_paddr + - node_memblk[i].size); - - if (rs < re) - (*func)(rs, re - rs, node_memblk[i].nid); - - if (re == end) - break; - } -} - -/** - * paging_init - setup page tables - * - * paging_init() sets up the page tables for each node of the system and frees - * the bootmem allocator memory for general use. - */ -void __init paging_init(void) -{ - unsigned long max_dma; - unsigned long max_zone_pfns[MAX_NR_ZONES]; - - max_dma = virt_to_phys((void *) MAX_DMA_ADDRESS) >> PAGE_SHIFT; - - sparse_init(); - - memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); - max_zone_pfns[ZONE_DMA32] = max_dma; - max_zone_pfns[ZONE_NORMAL] = max_low_pfn; - free_area_init(max_zone_pfns); - - zero_page_memmap_ptr = virt_to_page(ia64_imva(empty_zero_page)); -} - -pg_data_t * __init arch_alloc_nodedata(int nid) -{ - unsigned long size = compute_pernodesize(nid); - - return memblock_alloc(size, SMP_CACHE_BYTES); -} - -void arch_refresh_nodedata(int update_node, pg_data_t *update_pgdat) -{ - pgdat_list[update_node] = update_pgdat; - scatter_node_data(); -} - -#ifdef CONFIG_SPARSEMEM_VMEMMAP -int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node, - struct vmem_altmap *altmap) -{ - return vmemmap_populate_basepages(start, end, node, NULL); -} - -void vmemmap_free(unsigned long start, unsigned long end, - struct vmem_altmap *altmap) -{ -} -#endif diff --git a/arch/ia64/mm/extable.c b/arch/ia64/mm/extable.c deleted file mode 100644 index da477c11770b..000000000000 --- a/arch/ia64/mm/extable.c +++ /dev/null @@ -1,24 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Kernel exception handling table support. Derived from arch/alpha/mm/extable.c. - * - * Copyright (C) 1998, 1999, 2001-2002, 2004 Hewlett-Packard Co - * David Mosberger-Tang - */ - -#include -#include -#include -#include - -void -ia64_handle_exception (struct pt_regs *regs, const struct exception_table_entry *e) -{ - long fix = (u64) &e->fixup + e->fixup; - - regs->r8 = -EFAULT; - if (fix & 4) - regs->r9 = 0; - regs->cr_iip = fix & ~0xf; - ia64_psr(regs)->ri = fix & 0x3; /* set continuation slot number */ -} diff --git a/arch/ia64/mm/fault.c b/arch/ia64/mm/fault.c deleted file mode 100644 index 5458b52b4009..000000000000 --- a/arch/ia64/mm/fault.c +++ /dev/null @@ -1,251 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * MMU fault handling support. - * - * Copyright (C) 1998-2002 Hewlett-Packard Co - * David Mosberger-Tang - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -extern int die(char *, struct pt_regs *, long); - -/* - * Return TRUE if ADDRESS points at a page in the kernel's mapped segment - * (inside region 5, on ia64) and that page is present. - */ -static int -mapped_kernel_page_is_present (unsigned long address) -{ - pgd_t *pgd; - p4d_t *p4d; - pud_t *pud; - pmd_t *pmd; - pte_t *ptep, pte; - - pgd = pgd_offset_k(address); - if (pgd_none(*pgd) || pgd_bad(*pgd)) - return 0; - - p4d = p4d_offset(pgd, address); - if (p4d_none(*p4d) || p4d_bad(*p4d)) - return 0; - - pud = pud_offset(p4d, address); - if (pud_none(*pud) || pud_bad(*pud)) - return 0; - - pmd = pmd_offset(pud, address); - if (pmd_none(*pmd) || pmd_bad(*pmd)) - return 0; - - ptep = pte_offset_kernel(pmd, address); - if (!ptep) - return 0; - - pte = *ptep; - return pte_present(pte); -} - -# define VM_READ_BIT 0 -# define VM_WRITE_BIT 1 -# define VM_EXEC_BIT 2 - -void __kprobes -ia64_do_page_fault (unsigned long address, unsigned long isr, struct pt_regs *regs) -{ - int signal = SIGSEGV, code = SEGV_MAPERR; - struct vm_area_struct *vma, *prev_vma; - struct mm_struct *mm = current->mm; - unsigned long mask; - vm_fault_t fault; - unsigned int flags = FAULT_FLAG_DEFAULT; - - mask = ((((isr >> IA64_ISR_X_BIT) & 1UL) << VM_EXEC_BIT) - | (((isr >> IA64_ISR_W_BIT) & 1UL) << VM_WRITE_BIT)); - - /* mmap_lock is performance critical.... */ - prefetchw(&mm->mmap_lock); - - /* - * If we're in an interrupt or have no user context, we must not take the fault.. - */ - if (faulthandler_disabled() || !mm) - goto no_context; - - /* - * This is to handle the kprobes on user space access instructions - */ - if (kprobe_page_fault(regs, TRAP_BRKPT)) - return; - - if (user_mode(regs)) - flags |= FAULT_FLAG_USER; - if (mask & VM_WRITE) - flags |= FAULT_FLAG_WRITE; - - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); -retry: - mmap_read_lock(mm); - - vma = find_vma_prev(mm, address, &prev_vma); - if (!vma && !prev_vma ) - goto bad_area; - - /* - * find_vma_prev() returns vma such that address < vma->vm_end or NULL - * - * May find no vma, but could be that the last vm area is the - * register backing store that needs to expand upwards, in - * this case vma will be null, but prev_vma will ne non-null - */ - if (( !vma && prev_vma ) || (address < vma->vm_start) ) { - vma = expand_stack(mm, address); - if (!vma) - goto bad_area_nosemaphore; - } - - code = SEGV_ACCERR; - - /* OK, we've got a good vm_area for this memory area. Check the access permissions: */ - -# if (((1 << VM_READ_BIT) != VM_READ || (1 << VM_WRITE_BIT) != VM_WRITE) \ - || (1 << VM_EXEC_BIT) != VM_EXEC) -# error File is out of sync with . Please update. -# endif - - if (((isr >> IA64_ISR_R_BIT) & 1UL) && (!(vma->vm_flags & (VM_READ | VM_WRITE)))) - goto bad_area; - - if ((vma->vm_flags & mask) != mask) - goto bad_area; - - /* - * If for any reason at all we couldn't handle the fault, make - * sure we exit gracefully rather than endlessly redo the - * fault. - */ - fault = handle_mm_fault(vma, address, flags, regs); - - if (fault_signal_pending(fault, regs)) { - if (!user_mode(regs)) - goto no_context; - return; - } - - /* The fault is fully completed (including releasing mmap lock) */ - if (fault & VM_FAULT_COMPLETED) - return; - - if (unlikely(fault & VM_FAULT_ERROR)) { - /* - * We ran out of memory, or some other thing happened - * to us that made us unable to handle the page fault - * gracefully. - */ - if (fault & VM_FAULT_OOM) { - goto out_of_memory; - } else if (fault & VM_FAULT_SIGSEGV) { - goto bad_area; - } else if (fault & VM_FAULT_SIGBUS) { - signal = SIGBUS; - goto bad_area; - } - BUG(); - } - - if (fault & VM_FAULT_RETRY) { - flags |= FAULT_FLAG_TRIED; - - /* No need to mmap_read_unlock(mm) as we would - * have already released it in __lock_page_or_retry - * in mm/filemap.c. - */ - - goto retry; - } - - mmap_read_unlock(mm); - return; - - bad_area: - mmap_read_unlock(mm); - bad_area_nosemaphore: - if ((isr & IA64_ISR_SP) - || ((isr & IA64_ISR_NA) && (isr & IA64_ISR_CODE_MASK) == IA64_ISR_CODE_LFETCH)) - { - /* - * This fault was due to a speculative load or lfetch.fault, set the "ed" - * bit in the psr to ensure forward progress. (Target register will get a - * NaT for ld.s, lfetch will be canceled.) - */ - ia64_psr(regs)->ed = 1; - return; - } - if (user_mode(regs)) { - force_sig_fault(signal, code, (void __user *) address, - 0, __ISR_VALID, isr); - return; - } - - no_context: - if ((isr & IA64_ISR_SP) - || ((isr & IA64_ISR_NA) && (isr & IA64_ISR_CODE_MASK) == IA64_ISR_CODE_LFETCH)) - { - /* - * This fault was due to a speculative load or lfetch.fault, set the "ed" - * bit in the psr to ensure forward progress. (Target register will get a - * NaT for ld.s, lfetch will be canceled.) - */ - ia64_psr(regs)->ed = 1; - return; - } - - /* - * Since we have no vma's for region 5, we might get here even if the address is - * valid, due to the VHPT walker inserting a non present translation that becomes - * stale. If that happens, the non present fault handler already purged the stale - * translation, which fixed the problem. So, we check to see if the translation is - * valid, and return if it is. - */ - if (REGION_NUMBER(address) == 5 && mapped_kernel_page_is_present(address)) - return; - - if (ia64_done_with_exception(regs)) - return; - - /* - * Oops. The kernel tried to access some bad page. We'll have to terminate things - * with extreme prejudice. - */ - bust_spinlocks(1); - - if (address < PAGE_SIZE) - printk(KERN_ALERT "Unable to handle kernel NULL pointer dereference (address %016lx)\n", address); - else - printk(KERN_ALERT "Unable to handle kernel paging request at " - "virtual address %016lx\n", address); - if (die("Oops", regs, isr)) - regs = NULL; - bust_spinlocks(0); - if (regs) - make_task_dead(SIGKILL); - return; - - out_of_memory: - mmap_read_unlock(mm); - if (!user_mode(regs)) - goto no_context; - pagefault_out_of_memory(); -} diff --git a/arch/ia64/mm/hugetlbpage.c b/arch/ia64/mm/hugetlbpage.c deleted file mode 100644 index adc49f2d22e8..000000000000 --- a/arch/ia64/mm/hugetlbpage.c +++ /dev/null @@ -1,186 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * IA-64 Huge TLB Page Support for Kernel. - * - * Copyright (C) 2002-2004 Rohit Seth - * Copyright (C) 2003-2004 Ken Chen - * - * Sep, 2003: add numa support - * Feb, 2004: dynamic hugetlb page size via boot parameter - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -unsigned int hpage_shift = HPAGE_SHIFT_DEFAULT; -EXPORT_SYMBOL(hpage_shift); - -pte_t * -huge_pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma, - unsigned long addr, unsigned long sz) -{ - unsigned long taddr = htlbpage_to_page(addr); - pgd_t *pgd; - p4d_t *p4d; - pud_t *pud; - pmd_t *pmd; - pte_t *pte = NULL; - - pgd = pgd_offset(mm, taddr); - p4d = p4d_offset(pgd, taddr); - pud = pud_alloc(mm, p4d, taddr); - if (pud) { - pmd = pmd_alloc(mm, pud, taddr); - if (pmd) - pte = pte_alloc_huge(mm, pmd, taddr); - } - return pte; -} - -pte_t * -huge_pte_offset (struct mm_struct *mm, unsigned long addr, unsigned long sz) -{ - unsigned long taddr = htlbpage_to_page(addr); - pgd_t *pgd; - p4d_t *p4d; - pud_t *pud; - pmd_t *pmd; - pte_t *pte = NULL; - - pgd = pgd_offset(mm, taddr); - if (pgd_present(*pgd)) { - p4d = p4d_offset(pgd, taddr); - if (p4d_present(*p4d)) { - pud = pud_offset(p4d, taddr); - if (pud_present(*pud)) { - pmd = pmd_offset(pud, taddr); - if (pmd_present(*pmd)) - pte = pte_offset_huge(pmd, taddr); - } - } - } - - return pte; -} - -#define mk_pte_huge(entry) { pte_val(entry) |= _PAGE_P; } - -/* - * Don't actually need to do any preparation, but need to make sure - * the address is in the right region. - */ -int prepare_hugepage_range(struct file *file, - unsigned long addr, unsigned long len) -{ - if (len & ~HPAGE_MASK) - return -EINVAL; - if (addr & ~HPAGE_MASK) - return -EINVAL; - if (REGION_NUMBER(addr) != RGN_HPAGE) - return -EINVAL; - - return 0; -} - -int pmd_huge(pmd_t pmd) -{ - return 0; -} - -int pud_huge(pud_t pud) -{ - return 0; -} - -void hugetlb_free_pgd_range(struct mmu_gather *tlb, - unsigned long addr, unsigned long end, - unsigned long floor, unsigned long ceiling) -{ - /* - * This is called to free hugetlb page tables. - * - * The offset of these addresses from the base of the hugetlb - * region must be scaled down by HPAGE_SIZE/PAGE_SIZE so that - * the standard free_pgd_range will free the right page tables. - * - * If floor and ceiling are also in the hugetlb region, they - * must likewise be scaled down; but if outside, left unchanged. - */ - - addr = htlbpage_to_page(addr); - end = htlbpage_to_page(end); - if (REGION_NUMBER(floor) == RGN_HPAGE) - floor = htlbpage_to_page(floor); - if (REGION_NUMBER(ceiling) == RGN_HPAGE) - ceiling = htlbpage_to_page(ceiling); - - free_pgd_range(tlb, addr, end, floor, ceiling); -} - -unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, - unsigned long pgoff, unsigned long flags) -{ - struct vm_unmapped_area_info info; - - if (len > RGN_MAP_LIMIT) - return -ENOMEM; - if (len & ~HPAGE_MASK) - return -EINVAL; - - /* Handle MAP_FIXED */ - if (flags & MAP_FIXED) { - if (prepare_hugepage_range(file, addr, len)) - return -EINVAL; - return addr; - } - - /* This code assumes that RGN_HPAGE != 0. */ - if ((REGION_NUMBER(addr) != RGN_HPAGE) || (addr & (HPAGE_SIZE - 1))) - addr = HPAGE_REGION_BASE; - - info.flags = 0; - info.length = len; - info.low_limit = addr; - info.high_limit = HPAGE_REGION_BASE + RGN_MAP_LIMIT; - info.align_mask = PAGE_MASK & (HPAGE_SIZE - 1); - info.align_offset = 0; - return vm_unmapped_area(&info); -} - -static int __init hugetlb_setup_sz(char *str) -{ - u64 tr_pages; - unsigned long long size; - - if (ia64_pal_vm_page_size(&tr_pages, NULL) != 0) - /* - * shouldn't happen, but just in case. - */ - tr_pages = 0x15557000UL; - - size = memparse(str, &str); - if (*str || !is_power_of_2(size) || !(tr_pages & size) || - size <= PAGE_SIZE || - size > (1UL << PAGE_SHIFT << MAX_ORDER)) { - printk(KERN_WARNING "Invalid huge page size specified\n"); - return 1; - } - - hpage_shift = __ffs(size); - /* - * boot cpu already executed ia64_mmu_init, and has HPAGE_SHIFT_DEFAULT - * override here with new page shift. - */ - ia64_set_rr(HPAGE_REGION_BASE, hpage_shift << 2); - return 0; -} -early_param("hugepagesz", hugetlb_setup_sz); diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c deleted file mode 100644 index 05b0f2f0c073..000000000000 --- a/arch/ia64/mm/init.c +++ /dev/null @@ -1,532 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Initialize MMU support. - * - * Copyright (C) 1998-2003 Hewlett-Packard Co - * David Mosberger-Tang - */ -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -extern void ia64_tlb_init (void); - -unsigned long MAX_DMA_ADDRESS = PAGE_OFFSET + 0x100000000UL; - -struct page *zero_page_memmap_ptr; /* map entry for zero page */ -EXPORT_SYMBOL(zero_page_memmap_ptr); - -void -__ia64_sync_icache_dcache (pte_t pte) -{ - unsigned long addr; - struct folio *folio; - - folio = page_folio(pte_page(pte)); - addr = (unsigned long)folio_address(folio); - - if (test_bit(PG_arch_1, &folio->flags)) - return; /* i-cache is already coherent with d-cache */ - - flush_icache_range(addr, addr + folio_size(folio)); - set_bit(PG_arch_1, &folio->flags); /* mark page as clean */ -} - -/* - * Since DMA is i-cache coherent, any (complete) folios that were written via - * DMA can be marked as "clean" so that lazy_mmu_prot_update() doesn't have to - * flush them when they get mapped into an executable vm-area. - */ -void arch_dma_mark_clean(phys_addr_t paddr, size_t size) -{ - unsigned long pfn = PHYS_PFN(paddr); - struct folio *folio = page_folio(pfn_to_page(pfn)); - ssize_t left = size; - size_t offset = offset_in_folio(folio, paddr); - - if (offset) { - left -= folio_size(folio) - offset; - if (left <= 0) - return; - folio = folio_next(folio); - } - - while (left >= (ssize_t)folio_size(folio)) { - left -= folio_size(folio); - set_bit(PG_arch_1, &pfn_to_page(pfn)->flags); - if (!left) - break; - folio = folio_next(folio); - } -} - -inline void -ia64_set_rbs_bot (void) -{ - unsigned long stack_size = rlimit_max(RLIMIT_STACK) & -16; - - if (stack_size > MAX_USER_STACK_SIZE) - stack_size = MAX_USER_STACK_SIZE; - current->thread.rbs_bot = PAGE_ALIGN(current->mm->start_stack - stack_size); -} - -/* - * This performs some platform-dependent address space initialization. - * On IA-64, we want to setup the VM area for the register backing - * store (which grows upwards) and install the gateway page which is - * used for signal trampolines, etc. - */ -void -ia64_init_addr_space (void) -{ - struct vm_area_struct *vma; - - ia64_set_rbs_bot(); - - /* - * If we're out of memory and kmem_cache_alloc() returns NULL, we simply ignore - * the problem. When the process attempts to write to the register backing store - * for the first time, it will get a SEGFAULT in this case. - */ - vma = vm_area_alloc(current->mm); - if (vma) { - vma_set_anonymous(vma); - vma->vm_start = current->thread.rbs_bot & PAGE_MASK; - vma->vm_end = vma->vm_start + PAGE_SIZE; - vm_flags_init(vma, VM_DATA_DEFAULT_FLAGS|VM_GROWSUP|VM_ACCOUNT); - vma->vm_page_prot = vm_get_page_prot(vma->vm_flags); - mmap_write_lock(current->mm); - if (insert_vm_struct(current->mm, vma)) { - mmap_write_unlock(current->mm); - vm_area_free(vma); - return; - } - mmap_write_unlock(current->mm); - } - - /* map NaT-page at address zero to speed up speculative dereferencing of NULL: */ - if (!(current->personality & MMAP_PAGE_ZERO)) { - vma = vm_area_alloc(current->mm); - if (vma) { - vma_set_anonymous(vma); - vma->vm_end = PAGE_SIZE; - vma->vm_page_prot = __pgprot(pgprot_val(PAGE_READONLY) | _PAGE_MA_NAT); - vm_flags_init(vma, VM_READ | VM_MAYREAD | VM_IO | - VM_DONTEXPAND | VM_DONTDUMP); - mmap_write_lock(current->mm); - if (insert_vm_struct(current->mm, vma)) { - mmap_write_unlock(current->mm); - vm_area_free(vma); - return; - } - mmap_write_unlock(current->mm); - } - } -} - -void -free_initmem (void) -{ - free_reserved_area(ia64_imva(__init_begin), ia64_imva(__init_end), - -1, "unused kernel"); -} - -void __init -free_initrd_mem (unsigned long start, unsigned long end) -{ - /* - * EFI uses 4KB pages while the kernel can use 4KB or bigger. - * Thus EFI and the kernel may have different page sizes. It is - * therefore possible to have the initrd share the same page as - * the end of the kernel (given current setup). - * - * To avoid freeing/using the wrong page (kernel sized) we: - * - align up the beginning of initrd - * - align down the end of initrd - * - * | | - * |=============| a000 - * | | - * | | - * | | 9000 - * |/////////////| - * |/////////////| - * |=============| 8000 - * |///INITRD////| - * |/////////////| - * |/////////////| 7000 - * | | - * |KKKKKKKKKKKKK| - * |=============| 6000 - * |KKKKKKKKKKKKK| - * |KKKKKKKKKKKKK| - * K=kernel using 8KB pages - * - * In this example, we must free page 8000 ONLY. So we must align up - * initrd_start and keep initrd_end as is. - */ - start = PAGE_ALIGN(start); - end = end & PAGE_MASK; - - if (start < end) - printk(KERN_INFO "Freeing initrd memory: %ldkB freed\n", (end - start) >> 10); - - for (; start < end; start += PAGE_SIZE) { - if (!virt_addr_valid(start)) - continue; - free_reserved_page(virt_to_page(start)); - } -} - -/* - * This installs a clean page in the kernel's page table. - */ -static struct page * __init -put_kernel_page (struct page *page, unsigned long address, pgprot_t pgprot) -{ - pgd_t *pgd; - p4d_t *p4d; - pud_t *pud; - pmd_t *pmd; - pte_t *pte; - - pgd = pgd_offset_k(address); /* note: this is NOT pgd_offset()! */ - - { - p4d = p4d_alloc(&init_mm, pgd, address); - if (!p4d) - goto out; - pud = pud_alloc(&init_mm, p4d, address); - if (!pud) - goto out; - pmd = pmd_alloc(&init_mm, pud, address); - if (!pmd) - goto out; - pte = pte_alloc_kernel(pmd, address); - if (!pte) - goto out; - if (!pte_none(*pte)) - goto out; - set_pte(pte, mk_pte(page, pgprot)); - } - out: - /* no need for flush_tlb */ - return page; -} - -static void __init -setup_gate (void) -{ - struct page *page; - - /* - * Map the gate page twice: once read-only to export the ELF - * headers etc. and once execute-only page to enable - * privilege-promotion via "epc": - */ - page = virt_to_page(ia64_imva(__start_gate_section)); - put_kernel_page(page, GATE_ADDR, PAGE_READONLY); -#ifdef HAVE_BUGGY_SEGREL - page = virt_to_page(ia64_imva(__start_gate_section + PAGE_SIZE)); - put_kernel_page(page, GATE_ADDR + PAGE_SIZE, PAGE_GATE); -#else - put_kernel_page(page, GATE_ADDR + PERCPU_PAGE_SIZE, PAGE_GATE); - /* Fill in the holes (if any) with read-only zero pages: */ - { - unsigned long addr; - - for (addr = GATE_ADDR + PAGE_SIZE; - addr < GATE_ADDR + PERCPU_PAGE_SIZE; - addr += PAGE_SIZE) - { - put_kernel_page(ZERO_PAGE(0), addr, - PAGE_READONLY); - put_kernel_page(ZERO_PAGE(0), addr + PERCPU_PAGE_SIZE, - PAGE_READONLY); - } - } -#endif - ia64_patch_gate(); -} - -static struct vm_area_struct gate_vma; - -static int __init gate_vma_init(void) -{ - vma_init(&gate_vma, NULL); - gate_vma.vm_start = FIXADDR_USER_START; - gate_vma.vm_end = FIXADDR_USER_END; - vm_flags_init(&gate_vma, VM_READ | VM_MAYREAD | VM_EXEC | VM_MAYEXEC); - gate_vma.vm_page_prot = __pgprot(__ACCESS_BITS | _PAGE_PL_3 | _PAGE_AR_RX); - - return 0; -} -__initcall(gate_vma_init); - -struct vm_area_struct *get_gate_vma(struct mm_struct *mm) -{ - return &gate_vma; -} - -int in_gate_area_no_mm(unsigned long addr) -{ - if ((addr >= FIXADDR_USER_START) && (addr < FIXADDR_USER_END)) - return 1; - return 0; -} - -int in_gate_area(struct mm_struct *mm, unsigned long addr) -{ - return in_gate_area_no_mm(addr); -} - -void ia64_mmu_init(void *my_cpu_data) -{ - unsigned long pta, impl_va_bits; - extern void tlb_init(void); - -#ifdef CONFIG_DISABLE_VHPT -# define VHPT_ENABLE_BIT 0 -#else -# define VHPT_ENABLE_BIT 1 -#endif - - /* - * Check if the virtually mapped linear page table (VMLPT) overlaps with a mapped - * address space. The IA-64 architecture guarantees that at least 50 bits of - * virtual address space are implemented but if we pick a large enough page size - * (e.g., 64KB), the mapped address space is big enough that it will overlap with - * VMLPT. I assume that once we run on machines big enough to warrant 64KB pages, - * IMPL_VA_MSB will be significantly bigger, so this is unlikely to become a - * problem in practice. Alternatively, we could truncate the top of the mapped - * address space to not permit mappings that would overlap with the VMLPT. - * --davidm 00/12/06 - */ -# define pte_bits 3 -# define mapped_space_bits (3*(PAGE_SHIFT - pte_bits) + PAGE_SHIFT) - /* - * The virtual page table has to cover the entire implemented address space within - * a region even though not all of this space may be mappable. The reason for - * this is that the Access bit and Dirty bit fault handlers perform - * non-speculative accesses to the virtual page table, so the address range of the - * virtual page table itself needs to be covered by virtual page table. - */ -# define vmlpt_bits (impl_va_bits - PAGE_SHIFT + pte_bits) -# define POW2(n) (1ULL << (n)) - - impl_va_bits = ffz(~(local_cpu_data->unimpl_va_mask | (7UL << 61))); - - if (impl_va_bits < 51 || impl_va_bits > 61) - panic("CPU has bogus IMPL_VA_MSB value of %lu!\n", impl_va_bits - 1); - /* - * mapped_space_bits - PAGE_SHIFT is the total number of ptes we need, - * which must fit into "vmlpt_bits - pte_bits" slots. Second half of - * the test makes sure that our mapped space doesn't overlap the - * unimplemented hole in the middle of the region. - */ - if ((mapped_space_bits - PAGE_SHIFT > vmlpt_bits - pte_bits) || - (mapped_space_bits > impl_va_bits - 1)) - panic("Cannot build a big enough virtual-linear page table" - " to cover mapped address space.\n" - " Try using a smaller page size.\n"); - - - /* place the VMLPT at the end of each page-table mapped region: */ - pta = POW2(61) - POW2(vmlpt_bits); - - /* - * Set the (virtually mapped linear) page table address. Bit - * 8 selects between the short and long format, bits 2-7 the - * size of the table, and bit 0 whether the VHPT walker is - * enabled. - */ - ia64_set_pta(pta | (0 << 8) | (vmlpt_bits << 2) | VHPT_ENABLE_BIT); - - ia64_tlb_init(); - -#ifdef CONFIG_HUGETLB_PAGE - ia64_set_rr(HPAGE_REGION_BASE, HPAGE_SHIFT << 2); - ia64_srlz_d(); -#endif -} - -int __init register_active_ranges(u64 start, u64 len, int nid) -{ - u64 end = start + len; - -#ifdef CONFIG_KEXEC - if (start > crashk_res.start && start < crashk_res.end) - start = crashk_res.end; - if (end > crashk_res.start && end < crashk_res.end) - end = crashk_res.start; -#endif - - if (start < end) - memblock_add_node(__pa(start), end - start, nid, MEMBLOCK_NONE); - return 0; -} - -int -find_max_min_low_pfn (u64 start, u64 end, void *arg) -{ - unsigned long pfn_start, pfn_end; -#ifdef CONFIG_FLATMEM - pfn_start = (PAGE_ALIGN(__pa(start))) >> PAGE_SHIFT; - pfn_end = (PAGE_ALIGN(__pa(end - 1))) >> PAGE_SHIFT; -#else - pfn_start = GRANULEROUNDDOWN(__pa(start)) >> PAGE_SHIFT; - pfn_end = GRANULEROUNDUP(__pa(end - 1)) >> PAGE_SHIFT; -#endif - min_low_pfn = min(min_low_pfn, pfn_start); - max_low_pfn = max(max_low_pfn, pfn_end); - return 0; -} - -/* - * Boot command-line option "nolwsys" can be used to disable the use of any light-weight - * system call handler. When this option is in effect, all fsyscalls will end up bubbling - * down into the kernel and calling the normal (heavy-weight) syscall handler. This is - * useful for performance testing, but conceivably could also come in handy for debugging - * purposes. - */ - -static int nolwsys __initdata; - -static int __init -nolwsys_setup (char *s) -{ - nolwsys = 1; - return 1; -} - -__setup("nolwsys", nolwsys_setup); - -void __init -mem_init (void) -{ - int i; - - BUG_ON(PTRS_PER_PGD * sizeof(pgd_t) != PAGE_SIZE); - BUG_ON(PTRS_PER_PMD * sizeof(pmd_t) != PAGE_SIZE); - BUG_ON(PTRS_PER_PTE * sizeof(pte_t) != PAGE_SIZE); - - /* - * This needs to be called _after_ the command line has been parsed but - * _before_ any drivers that may need the PCI DMA interface are - * initialized or bootmem has been freed. - */ - do { -#ifdef CONFIG_INTEL_IOMMU - detect_intel_iommu(); - if (iommu_detected) - break; -#endif - swiotlb_init(true, SWIOTLB_VERBOSE); - } while (0); - -#ifdef CONFIG_FLATMEM - BUG_ON(!mem_map); -#endif - - set_max_mapnr(max_low_pfn); - high_memory = __va(max_low_pfn * PAGE_SIZE); - memblock_free_all(); - - /* - * For fsyscall entrypoints with no light-weight handler, use the ordinary - * (heavy-weight) handler, but mark it by setting bit 0, so the fsyscall entry - * code can tell them apart. - */ - for (i = 0; i < NR_syscalls; ++i) { - extern unsigned long fsyscall_table[NR_syscalls]; - extern unsigned long sys_call_table[NR_syscalls]; - - if (!fsyscall_table[i] || nolwsys) - fsyscall_table[i] = sys_call_table[i] | 1; - } - setup_gate(); -} - -#ifdef CONFIG_MEMORY_HOTPLUG -int arch_add_memory(int nid, u64 start, u64 size, - struct mhp_params *params) -{ - unsigned long start_pfn = start >> PAGE_SHIFT; - unsigned long nr_pages = size >> PAGE_SHIFT; - int ret; - - if (WARN_ON_ONCE(params->pgprot.pgprot != PAGE_KERNEL.pgprot)) - return -EINVAL; - - ret = __add_pages(nid, start_pfn, nr_pages, params); - if (ret) - printk("%s: Problem encountered in __add_pages() as ret=%d\n", - __func__, ret); - - return ret; -} - -void arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap) -{ - unsigned long start_pfn = start >> PAGE_SHIFT; - unsigned long nr_pages = size >> PAGE_SHIFT; - - __remove_pages(start_pfn, nr_pages, altmap); -} -#endif - -static const pgprot_t protection_map[16] = { - [VM_NONE] = PAGE_NONE, - [VM_READ] = PAGE_READONLY, - [VM_WRITE] = PAGE_READONLY, - [VM_WRITE | VM_READ] = PAGE_READONLY, - [VM_EXEC] = __pgprot(__ACCESS_BITS | _PAGE_PL_3 | - _PAGE_AR_X_RX), - [VM_EXEC | VM_READ] = __pgprot(__ACCESS_BITS | _PAGE_PL_3 | - _PAGE_AR_RX), - [VM_EXEC | VM_WRITE] = PAGE_COPY_EXEC, - [VM_EXEC | VM_WRITE | VM_READ] = PAGE_COPY_EXEC, - [VM_SHARED] = PAGE_NONE, - [VM_SHARED | VM_READ] = PAGE_READONLY, - [VM_SHARED | VM_WRITE] = PAGE_SHARED, - [VM_SHARED | VM_WRITE | VM_READ] = PAGE_SHARED, - [VM_SHARED | VM_EXEC] = __pgprot(__ACCESS_BITS | _PAGE_PL_3 | - _PAGE_AR_X_RX), - [VM_SHARED | VM_EXEC | VM_READ] = __pgprot(__ACCESS_BITS | _PAGE_PL_3 | - _PAGE_AR_RX), - [VM_SHARED | VM_EXEC | VM_WRITE] = __pgprot(__ACCESS_BITS | _PAGE_PL_3 | - _PAGE_AR_RWX), - [VM_SHARED | VM_EXEC | VM_WRITE | VM_READ] = __pgprot(__ACCESS_BITS | _PAGE_PL_3 | - _PAGE_AR_RWX) -}; -DECLARE_VM_GET_PAGE_PROT diff --git a/arch/ia64/mm/ioremap.c b/arch/ia64/mm/ioremap.c deleted file mode 100644 index 711b6abc822e..000000000000 --- a/arch/ia64/mm/ioremap.c +++ /dev/null @@ -1,94 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * (c) Copyright 2006, 2007 Hewlett-Packard Development Company, L.P. - * Bjorn Helgaas - */ - -#include -#include -#include -#include -#include -#include -#include -#include - -static inline void __iomem * -__ioremap_uc(unsigned long phys_addr) -{ - return (void __iomem *) (__IA64_UNCACHED_OFFSET | phys_addr); -} - -void __iomem * -early_ioremap (unsigned long phys_addr, unsigned long size) -{ - u64 attr; - attr = kern_mem_attribute(phys_addr, size); - if (attr & EFI_MEMORY_WB) - return (void __iomem *) phys_to_virt(phys_addr); - return __ioremap_uc(phys_addr); -} - -void __iomem *ioremap_prot(phys_addr_t phys_addr, size_t size, - unsigned long flags) -{ - u64 attr; - unsigned long gran_base, gran_size; - unsigned long page_base; - - /* - * For things in kern_memmap, we must use the same attribute - * as the rest of the kernel. For more details, see - * Documentation/arch/ia64/aliasing.rst. - */ - attr = kern_mem_attribute(phys_addr, size); - if (attr & EFI_MEMORY_WB) - return (void __iomem *) phys_to_virt(phys_addr); - else if (attr & EFI_MEMORY_UC) - return __ioremap_uc(phys_addr); - - /* - * Some chipsets don't support UC access to memory. If - * WB is supported for the whole granule, we prefer that. - */ - gran_base = GRANULEROUNDDOWN(phys_addr); - gran_size = GRANULEROUNDUP(phys_addr + size) - gran_base; - if (efi_mem_attribute(gran_base, gran_size) & EFI_MEMORY_WB) - return (void __iomem *) phys_to_virt(phys_addr); - - /* - * WB is not supported for the whole granule, so we can't use - * the region 7 identity mapping. If we can safely cover the - * area with kernel page table mappings, we can use those - * instead. - */ - page_base = phys_addr & PAGE_MASK; - size = PAGE_ALIGN(phys_addr + size) - page_base; - if (efi_mem_attribute(page_base, size) & EFI_MEMORY_WB) - return generic_ioremap_prot(phys_addr, size, __pgprot(flags)); - - return __ioremap_uc(phys_addr); -} -EXPORT_SYMBOL(ioremap_prot); - -void __iomem * -ioremap_uc(unsigned long phys_addr, unsigned long size) -{ - if (kern_mem_attribute(phys_addr, size) & EFI_MEMORY_WB) - return NULL; - - return __ioremap_uc(phys_addr); -} -EXPORT_SYMBOL(ioremap_uc); - -void -early_iounmap (volatile void __iomem *addr, unsigned long size) -{ -} - -void iounmap(volatile void __iomem *addr) -{ - if (REGION_NUMBER(addr) == RGN_GATE) - vunmap((void *) ((unsigned long) addr & PAGE_MASK)); -} -EXPORT_SYMBOL(iounmap); diff --git a/arch/ia64/mm/numa.c b/arch/ia64/mm/numa.c deleted file mode 100644 index 4c7b1f50e3b7..000000000000 --- a/arch/ia64/mm/numa.c +++ /dev/null @@ -1,80 +0,0 @@ -/* - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - * - * This file contains NUMA specific variables and functions which are used on - * NUMA machines with contiguous memory. - * - * 2002/08/07 Erich Focht - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include - - -/* - * The following structures are usually initialized by ACPI or - * similar mechanisms and describe the NUMA characteristics of the machine. - */ -int num_node_memblks; -struct node_memblk_s node_memblk[NR_NODE_MEMBLKS]; -struct node_cpuid_s node_cpuid[NR_CPUS] = - { [0 ... NR_CPUS-1] = { .phys_id = 0, .nid = NUMA_NO_NODE } }; - -/* - * This is a matrix with "distances" between nodes, they should be - * proportional to the memory access latency ratios. - */ -u8 numa_slit[MAX_NUMNODES * MAX_NUMNODES]; - -int __node_distance(int from, int to) -{ - return slit_distance(from, to); -} -EXPORT_SYMBOL(__node_distance); - -/* Identify which cnode a physical address resides on */ -int -paddr_to_nid(unsigned long paddr) -{ - int i; - - for (i = 0; i < num_node_memblks; i++) - if (paddr >= node_memblk[i].start_paddr && - paddr < node_memblk[i].start_paddr + node_memblk[i].size) - break; - - return (i < num_node_memblks) ? node_memblk[i].nid : (num_node_memblks ? -1 : 0); -} -EXPORT_SYMBOL(paddr_to_nid); - -#if defined(CONFIG_SPARSEMEM) && defined(CONFIG_NUMA) -void numa_clear_node(int cpu) -{ - unmap_cpu_from_node(cpu, NUMA_NO_NODE); -} - -#ifdef CONFIG_MEMORY_HOTPLUG -/* - * SRAT information is stored in node_memblk[], then we can use SRAT - * information at memory-hot-add if necessary. - */ - -int memory_add_physaddr_to_nid(u64 addr) -{ - int nid = paddr_to_nid(addr); - if (nid < 0) - return 0; - return nid; -} -EXPORT_SYMBOL(memory_add_physaddr_to_nid); -#endif -#endif diff --git a/arch/ia64/mm/tlb.c b/arch/ia64/mm/tlb.c deleted file mode 100644 index ca060e7a2a46..000000000000 --- a/arch/ia64/mm/tlb.c +++ /dev/null @@ -1,591 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * TLB support routines. - * - * Copyright (C) 1998-2001, 2003 Hewlett-Packard Co - * David Mosberger-Tang - * - * 08/02/00 A. Mallick - * Modified RID allocation for SMP - * Goutham Rao - * IPI based ptc implementation and A-step IPI implementation. - * Rohit Seth - * Ken Chen - * Christophe de Dinechin : Avoid ptc.e on memory allocation - * Copyright (C) 2007 Intel Corp - * Fenghua Yu - * Add multiple ptc.g/ptc.ga instruction support in global tlb purge. - */ -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include - -static struct { - u64 mask; /* mask of supported purge page-sizes */ - unsigned long max_bits; /* log2 of largest supported purge page-size */ -} purge; - -struct ia64_ctx ia64_ctx = { - .lock = __SPIN_LOCK_UNLOCKED(ia64_ctx.lock), - .next = 1, - .max_ctx = ~0U -}; - -DEFINE_PER_CPU(u8, ia64_need_tlb_flush); -DEFINE_PER_CPU(u8, ia64_tr_num); /*Number of TR slots in current processor*/ -DEFINE_PER_CPU(u8, ia64_tr_used); /*Max Slot number used by kernel*/ - -struct ia64_tr_entry *ia64_idtrs[NR_CPUS]; - -/* - * Initializes the ia64_ctx.bitmap array based on max_ctx+1. - * Called after cpu_init() has setup ia64_ctx.max_ctx based on - * maximum RID that is supported by boot CPU. - */ -void __init -mmu_context_init (void) -{ - ia64_ctx.bitmap = memblock_alloc((ia64_ctx.max_ctx + 1) >> 3, - SMP_CACHE_BYTES); - if (!ia64_ctx.bitmap) - panic("%s: Failed to allocate %u bytes\n", __func__, - (ia64_ctx.max_ctx + 1) >> 3); - ia64_ctx.flushmap = memblock_alloc((ia64_ctx.max_ctx + 1) >> 3, - SMP_CACHE_BYTES); - if (!ia64_ctx.flushmap) - panic("%s: Failed to allocate %u bytes\n", __func__, - (ia64_ctx.max_ctx + 1) >> 3); -} - -/* - * Acquire the ia64_ctx.lock before calling this function! - */ -void -wrap_mmu_context (struct mm_struct *mm) -{ - int i, cpu; - unsigned long flush_bit; - - for (i=0; i <= ia64_ctx.max_ctx / BITS_PER_LONG; i++) { - flush_bit = xchg(&ia64_ctx.flushmap[i], 0); - ia64_ctx.bitmap[i] ^= flush_bit; - } - - /* use offset at 300 to skip daemons */ - ia64_ctx.next = find_next_zero_bit(ia64_ctx.bitmap, - ia64_ctx.max_ctx, 300); - ia64_ctx.limit = find_next_bit(ia64_ctx.bitmap, - ia64_ctx.max_ctx, ia64_ctx.next); - - /* - * can't call flush_tlb_all() here because of race condition - * with O(1) scheduler [EF] - */ - cpu = get_cpu(); /* prevent preemption/migration */ - for_each_online_cpu(i) - if (i != cpu) - per_cpu(ia64_need_tlb_flush, i) = 1; - put_cpu(); - local_flush_tlb_all(); -} - -/* - * Implement "spinaphores" ... like counting semaphores, but they - * spin instead of sleeping. If there are ever any other users for - * this primitive it can be moved up to a spinaphore.h header. - */ -struct spinaphore { - unsigned long ticket; - unsigned long serve; -}; - -static inline void spinaphore_init(struct spinaphore *ss, int val) -{ - ss->ticket = 0; - ss->serve = val; -} - -static inline void down_spin(struct spinaphore *ss) -{ - unsigned long t = ia64_fetchadd(1, &ss->ticket, acq), serve; - - if (time_before(t, ss->serve)) - return; - - ia64_invala(); - - for (;;) { - asm volatile ("ld8.c.nc %0=[%1]" : "=r"(serve) : "r"(&ss->serve) : "memory"); - if (time_before(t, serve)) - return; - cpu_relax(); - } -} - -static inline void up_spin(struct spinaphore *ss) -{ - ia64_fetchadd(1, &ss->serve, rel); -} - -static struct spinaphore ptcg_sem; -static u16 nptcg = 1; -static int need_ptcg_sem = 1; -static int toolatetochangeptcgsem = 0; - -/* - * Kernel parameter "nptcg=" overrides max number of concurrent global TLB - * purges which is reported from either PAL or SAL PALO. - * - * We don't have sanity checking for nptcg value. It's the user's responsibility - * for valid nptcg value on the platform. Otherwise, kernel may hang in some - * cases. - */ -static int __init -set_nptcg(char *str) -{ - int value = 0; - - get_option(&str, &value); - setup_ptcg_sem(value, NPTCG_FROM_KERNEL_PARAMETER); - - return 1; -} - -__setup("nptcg=", set_nptcg); - -/* - * Maximum number of simultaneous ptc.g purges in the system can - * be defined by PAL_VM_SUMMARY (in which case we should take - * the smallest value for any cpu in the system) or by the PAL - * override table (in which case we should ignore the value from - * PAL_VM_SUMMARY). - * - * Kernel parameter "nptcg=" overrides maximum number of simultaneous ptc.g - * purges defined in either PAL_VM_SUMMARY or PAL override table. In this case, - * we should ignore the value from either PAL_VM_SUMMARY or PAL override table. - * - * Complicating the logic here is the fact that num_possible_cpus() - * isn't fully setup until we start bringing cpus online. - */ -void -setup_ptcg_sem(int max_purges, int nptcg_from) -{ - static int kp_override; - static int palo_override; - static int firstcpu = 1; - - if (toolatetochangeptcgsem) { - if (nptcg_from == NPTCG_FROM_PAL && max_purges == 0) - BUG_ON(1 < nptcg); - else - BUG_ON(max_purges < nptcg); - return; - } - - if (nptcg_from == NPTCG_FROM_KERNEL_PARAMETER) { - kp_override = 1; - nptcg = max_purges; - goto resetsema; - } - if (kp_override) { - need_ptcg_sem = num_possible_cpus() > nptcg; - return; - } - - if (nptcg_from == NPTCG_FROM_PALO) { - palo_override = 1; - - /* In PALO max_purges == 0 really means it! */ - if (max_purges == 0) - panic("Whoa! Platform does not support global TLB purges.\n"); - nptcg = max_purges; - if (nptcg == PALO_MAX_TLB_PURGES) { - need_ptcg_sem = 0; - return; - } - goto resetsema; - } - if (palo_override) { - if (nptcg != PALO_MAX_TLB_PURGES) - need_ptcg_sem = (num_possible_cpus() > nptcg); - return; - } - - /* In PAL_VM_SUMMARY max_purges == 0 actually means 1 */ - if (max_purges == 0) max_purges = 1; - - if (firstcpu) { - nptcg = max_purges; - firstcpu = 0; - } - if (max_purges < nptcg) - nptcg = max_purges; - if (nptcg == PAL_MAX_PURGES) { - need_ptcg_sem = 0; - return; - } else - need_ptcg_sem = (num_possible_cpus() > nptcg); - -resetsema: - spinaphore_init(&ptcg_sem, max_purges); -} - -#ifdef CONFIG_SMP -static void -ia64_global_tlb_purge (struct mm_struct *mm, unsigned long start, - unsigned long end, unsigned long nbits) -{ - struct mm_struct *active_mm = current->active_mm; - - toolatetochangeptcgsem = 1; - - if (mm != active_mm) { - /* Restore region IDs for mm */ - if (mm && active_mm) { - activate_context(mm); - } else { - flush_tlb_all(); - return; - } - } - - if (need_ptcg_sem) - down_spin(&ptcg_sem); - - do { - /* - * Flush ALAT entries also. - */ - ia64_ptcga(start, (nbits << 2)); - ia64_srlz_i(); - start += (1UL << nbits); - } while (start < end); - - if (need_ptcg_sem) - up_spin(&ptcg_sem); - - if (mm != active_mm) { - activate_context(active_mm); - } -} -#endif /* CONFIG_SMP */ - -void -local_flush_tlb_all (void) -{ - unsigned long i, j, flags, count0, count1, stride0, stride1, addr; - - addr = local_cpu_data->ptce_base; - count0 = local_cpu_data->ptce_count[0]; - count1 = local_cpu_data->ptce_count[1]; - stride0 = local_cpu_data->ptce_stride[0]; - stride1 = local_cpu_data->ptce_stride[1]; - - local_irq_save(flags); - for (i = 0; i < count0; ++i) { - for (j = 0; j < count1; ++j) { - ia64_ptce(addr); - addr += stride1; - } - addr += stride0; - } - local_irq_restore(flags); - ia64_srlz_i(); /* srlz.i implies srlz.d */ -} - -static void -__flush_tlb_range (struct vm_area_struct *vma, unsigned long start, - unsigned long end) -{ - struct mm_struct *mm = vma->vm_mm; - unsigned long size = end - start; - unsigned long nbits; - -#ifndef CONFIG_SMP - if (mm != current->active_mm) { - mm->context = 0; - return; - } -#endif - - nbits = ia64_fls(size + 0xfff); - while (unlikely (((1UL << nbits) & purge.mask) == 0) && - (nbits < purge.max_bits)) - ++nbits; - if (nbits > purge.max_bits) - nbits = purge.max_bits; - start &= ~((1UL << nbits) - 1); - - preempt_disable(); -#ifdef CONFIG_SMP - if (mm != current->active_mm || cpumask_weight(mm_cpumask(mm)) != 1) { - ia64_global_tlb_purge(mm, start, end, nbits); - preempt_enable(); - return; - } -#endif - do { - ia64_ptcl(start, (nbits<<2)); - start += (1UL << nbits); - } while (start < end); - preempt_enable(); - ia64_srlz_i(); /* srlz.i implies srlz.d */ -} - -void flush_tlb_range(struct vm_area_struct *vma, - unsigned long start, unsigned long end) -{ - if (unlikely(end - start >= 1024*1024*1024*1024UL - || REGION_NUMBER(start) != REGION_NUMBER(end - 1))) { - /* - * If we flush more than a tera-byte or across regions, we're - * probably better off just flushing the entire TLB(s). This - * should be very rare and is not worth optimizing for. - */ - flush_tlb_all(); - } else { - /* flush the address range from the tlb */ - __flush_tlb_range(vma, start, end); - /* flush the virt. page-table area mapping the addr range */ - __flush_tlb_range(vma, ia64_thash(start), ia64_thash(end)); - } -} -EXPORT_SYMBOL(flush_tlb_range); - -void ia64_tlb_init(void) -{ - ia64_ptce_info_t ptce_info; - u64 tr_pgbits; - long status; - pal_vm_info_1_u_t vm_info_1; - pal_vm_info_2_u_t vm_info_2; - int cpu = smp_processor_id(); - - if ((status = ia64_pal_vm_page_size(&tr_pgbits, &purge.mask)) != 0) { - printk(KERN_ERR "PAL_VM_PAGE_SIZE failed with status=%ld; " - "defaulting to architected purge page-sizes.\n", status); - purge.mask = 0x115557000UL; - } - purge.max_bits = ia64_fls(purge.mask); - - ia64_get_ptce(&ptce_info); - local_cpu_data->ptce_base = ptce_info.base; - local_cpu_data->ptce_count[0] = ptce_info.count[0]; - local_cpu_data->ptce_count[1] = ptce_info.count[1]; - local_cpu_data->ptce_stride[0] = ptce_info.stride[0]; - local_cpu_data->ptce_stride[1] = ptce_info.stride[1]; - - local_flush_tlb_all(); /* nuke left overs from bootstrapping... */ - status = ia64_pal_vm_summary(&vm_info_1, &vm_info_2); - - if (status) { - printk(KERN_ERR "ia64_pal_vm_summary=%ld\n", status); - per_cpu(ia64_tr_num, cpu) = 8; - return; - } - per_cpu(ia64_tr_num, cpu) = vm_info_1.pal_vm_info_1_s.max_itr_entry+1; - if (per_cpu(ia64_tr_num, cpu) > - (vm_info_1.pal_vm_info_1_s.max_dtr_entry+1)) - per_cpu(ia64_tr_num, cpu) = - vm_info_1.pal_vm_info_1_s.max_dtr_entry+1; - if (per_cpu(ia64_tr_num, cpu) > IA64_TR_ALLOC_MAX) { - static int justonce = 1; - per_cpu(ia64_tr_num, cpu) = IA64_TR_ALLOC_MAX; - if (justonce) { - justonce = 0; - printk(KERN_DEBUG "TR register number exceeds " - "IA64_TR_ALLOC_MAX!\n"); - } - } -} - -/* - * is_tr_overlap - * - * Check overlap with inserted TRs. - */ -static int is_tr_overlap(struct ia64_tr_entry *p, u64 va, u64 log_size) -{ - u64 tr_log_size; - u64 tr_end; - u64 va_rr = ia64_get_rr(va); - u64 va_rid = RR_TO_RID(va_rr); - u64 va_end = va + (1<rr)) - return 0; - tr_log_size = (p->itir & 0xff) >> 2; - tr_end = p->ifa + (1< tr_end || p->ifa > va_end) - return 0; - return 1; - -} - -/* - * ia64_insert_tr in virtual mode. Allocate a TR slot - * - * target_mask : 0x1 : itr, 0x2 : dtr, 0x3 : idtr - * - * va : virtual address. - * pte : pte entries inserted. - * log_size: range to be covered. - * - * Return value: <0 : error No. - * - * >=0 : slot number allocated for TR. - * Must be called with preemption disabled. - */ -int ia64_itr_entry(u64 target_mask, u64 va, u64 pte, u64 log_size) -{ - int i, r; - unsigned long psr; - struct ia64_tr_entry *p; - int cpu = smp_processor_id(); - - if (!ia64_idtrs[cpu]) { - ia64_idtrs[cpu] = kmalloc_array(2 * IA64_TR_ALLOC_MAX, - sizeof(struct ia64_tr_entry), - GFP_KERNEL); - if (!ia64_idtrs[cpu]) - return -ENOMEM; - } - r = -EINVAL; - /*Check overlap with existing TR entries*/ - if (target_mask & 0x1) { - p = ia64_idtrs[cpu]; - for (i = IA64_TR_ALLOC_BASE; i <= per_cpu(ia64_tr_used, cpu); - i++, p++) { - if (p->pte & 0x1) - if (is_tr_overlap(p, va, log_size)) { - printk(KERN_DEBUG "Overlapped Entry" - "Inserted for TR Register!!\n"); - goto out; - } - } - } - if (target_mask & 0x2) { - p = ia64_idtrs[cpu] + IA64_TR_ALLOC_MAX; - for (i = IA64_TR_ALLOC_BASE; i <= per_cpu(ia64_tr_used, cpu); - i++, p++) { - if (p->pte & 0x1) - if (is_tr_overlap(p, va, log_size)) { - printk(KERN_DEBUG "Overlapped Entry" - "Inserted for TR Register!!\n"); - goto out; - } - } - } - - for (i = IA64_TR_ALLOC_BASE; i < per_cpu(ia64_tr_num, cpu); i++) { - switch (target_mask & 0x3) { - case 1: - if (!((ia64_idtrs[cpu] + i)->pte & 0x1)) - goto found; - continue; - case 2: - if (!((ia64_idtrs[cpu] + IA64_TR_ALLOC_MAX + i)->pte & 0x1)) - goto found; - continue; - case 3: - if (!((ia64_idtrs[cpu] + i)->pte & 0x1) && - !((ia64_idtrs[cpu] + IA64_TR_ALLOC_MAX + i)->pte & 0x1)) - goto found; - continue; - default: - r = -EINVAL; - goto out; - } - } -found: - if (i >= per_cpu(ia64_tr_num, cpu)) - return -EBUSY; - - /*Record tr info for mca handler use!*/ - if (i > per_cpu(ia64_tr_used, cpu)) - per_cpu(ia64_tr_used, cpu) = i; - - psr = ia64_clear_ic(); - if (target_mask & 0x1) { - ia64_itr(0x1, i, va, pte, log_size); - ia64_srlz_i(); - p = ia64_idtrs[cpu] + i; - p->ifa = va; - p->pte = pte; - p->itir = log_size << 2; - p->rr = ia64_get_rr(va); - } - if (target_mask & 0x2) { - ia64_itr(0x2, i, va, pte, log_size); - ia64_srlz_i(); - p = ia64_idtrs[cpu] + IA64_TR_ALLOC_MAX + i; - p->ifa = va; - p->pte = pte; - p->itir = log_size << 2; - p->rr = ia64_get_rr(va); - } - ia64_set_psr(psr); - r = i; -out: - return r; -} -EXPORT_SYMBOL_GPL(ia64_itr_entry); - -/* - * ia64_purge_tr - * - * target_mask: 0x1: purge itr, 0x2 : purge dtr, 0x3 purge idtr. - * slot: slot number to be freed. - * - * Must be called with preemption disabled. - */ -void ia64_ptr_entry(u64 target_mask, int slot) -{ - int cpu = smp_processor_id(); - int i; - struct ia64_tr_entry *p; - - if (slot < IA64_TR_ALLOC_BASE || slot >= per_cpu(ia64_tr_num, cpu)) - return; - - if (target_mask & 0x1) { - p = ia64_idtrs[cpu] + slot; - if ((p->pte&0x1) && is_tr_overlap(p, p->ifa, p->itir>>2)) { - p->pte = 0; - ia64_ptr(0x1, p->ifa, p->itir>>2); - ia64_srlz_i(); - } - } - - if (target_mask & 0x2) { - p = ia64_idtrs[cpu] + IA64_TR_ALLOC_MAX + slot; - if ((p->pte & 0x1) && is_tr_overlap(p, p->ifa, p->itir>>2)) { - p->pte = 0; - ia64_ptr(0x2, p->ifa, p->itir>>2); - ia64_srlz_i(); - } - } - - for (i = per_cpu(ia64_tr_used, cpu); i >= IA64_TR_ALLOC_BASE; i--) { - if (((ia64_idtrs[cpu] + i)->pte & 0x1) || - ((ia64_idtrs[cpu] + IA64_TR_ALLOC_MAX + i)->pte & 0x1)) - break; - } - per_cpu(ia64_tr_used, cpu) = i; -} -EXPORT_SYMBOL_GPL(ia64_ptr_entry); diff --git a/arch/ia64/pci/Makefile b/arch/ia64/pci/Makefile deleted file mode 100644 index 81ea50eeb527..000000000000 --- a/arch/ia64/pci/Makefile +++ /dev/null @@ -1,5 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only -# -# Makefile for the ia64-specific parts of the pci bus -# -obj-y := pci.o fixup.o diff --git a/arch/ia64/pci/fixup.c b/arch/ia64/pci/fixup.c deleted file mode 100644 index 2bcdd7d3a1ad..000000000000 --- a/arch/ia64/pci/fixup.c +++ /dev/null @@ -1,80 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Exceptions for specific devices. Usually work-arounds for fatal design flaws. - * Derived from fixup.c of i386 tree. - */ - -#include -#include -#include -#include -#include - -/* - * Fixup to mark boot BIOS video selected by BIOS before it changes - * - * From information provided by "Jon Smirl" - * - * The standard boot ROM sequence for an x86 machine uses the BIOS - * to select an initial video card for boot display. This boot video - * card will have its BIOS copied to 0xC0000 in system RAM. - * IORESOURCE_ROM_SHADOW is used to associate the boot video - * card with this copy. On laptops this copy has to be used since - * the main ROM may be compressed or combined with another image. - * See pci_map_rom() for use of this flag. Before marking the device - * with IORESOURCE_ROM_SHADOW check if a vga_default_device is already set - * by either arch code or vga-arbitration; if so only apply the fixup to this - * already-determined primary video card. - */ - -static void pci_fixup_video(struct pci_dev *pdev) -{ - struct pci_dev *bridge; - struct pci_bus *bus; - u16 config; - struct resource *res; - - if (is_uv_system()) - return; - /* Maybe, this machine supports legacy memory map. */ - - /* Is VGA routed to us? */ - bus = pdev->bus; - while (bus) { - bridge = bus->self; - - /* - * From information provided by - * "David Miller" - * The bridge control register is valid for PCI header - * type BRIDGE, or CARDBUS. Host to PCI controllers use - * PCI header type NORMAL. - */ - if (bridge && (pci_is_bridge(bridge))) { - pci_read_config_word(bridge, PCI_BRIDGE_CONTROL, - &config); - if (!(config & PCI_BRIDGE_CTL_VGA)) - return; - } - bus = bus->parent; - } - if (!vga_default_device() || pdev == vga_default_device()) { - pci_read_config_word(pdev, PCI_COMMAND, &config); - if (config & (PCI_COMMAND_IO | PCI_COMMAND_MEMORY)) { - res = &pdev->resource[PCI_ROM_RESOURCE]; - - pci_disable_rom(pdev); - if (res->parent) - release_resource(res); - - res->start = 0xC0000; - res->end = res->start + 0x20000 - 1; - res->flags = IORESOURCE_MEM | IORESOURCE_ROM_SHADOW | - IORESOURCE_PCI_FIXED; - dev_info(&pdev->dev, "Video device with shadowed ROM at %pR\n", - res); - } - } -} -DECLARE_PCI_FIXUP_CLASS_HEADER(PCI_ANY_ID, PCI_ANY_ID, - PCI_CLASS_DISPLAY_VGA, 8, pci_fixup_video); diff --git a/arch/ia64/pci/pci.c b/arch/ia64/pci/pci.c deleted file mode 100644 index 0a0328e61bef..000000000000 --- a/arch/ia64/pci/pci.c +++ /dev/null @@ -1,576 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * pci.c - Low-Level PCI Access in IA-64 - * - * Derived from bios32.c of i386 tree. - * - * (c) Copyright 2002, 2005 Hewlett-Packard Development Company, L.P. - * David Mosberger-Tang - * Bjorn Helgaas - * Copyright (C) 2004 Silicon Graphics, Inc. - * - * Note: Above list of copyright holders is incomplete... - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include - -/* - * Low-level SAL-based PCI configuration access functions. Note that SAL - * calls are already serialized (via sal_lock), so we don't need another - * synchronization mechanism here. - */ - -#define PCI_SAL_ADDRESS(seg, bus, devfn, reg) \ - (((u64) seg << 24) | (bus << 16) | (devfn << 8) | (reg)) - -/* SAL 3.2 adds support for extended config space. */ - -#define PCI_SAL_EXT_ADDRESS(seg, bus, devfn, reg) \ - (((u64) seg << 28) | (bus << 20) | (devfn << 12) | (reg)) - -int raw_pci_read(unsigned int seg, unsigned int bus, unsigned int devfn, - int reg, int len, u32 *value) -{ - u64 addr, data = 0; - int mode, result; - - if (!value || (seg > 65535) || (bus > 255) || (devfn > 255) || (reg > 4095)) - return -EINVAL; - - if ((seg | reg) <= 255) { - addr = PCI_SAL_ADDRESS(seg, bus, devfn, reg); - mode = 0; - } else if (sal_revision >= SAL_VERSION_CODE(3,2)) { - addr = PCI_SAL_EXT_ADDRESS(seg, bus, devfn, reg); - mode = 1; - } else { - return -EINVAL; - } - - result = ia64_sal_pci_config_read(addr, mode, len, &data); - if (result != 0) - return -EINVAL; - - *value = (u32) data; - return 0; -} - -int raw_pci_write(unsigned int seg, unsigned int bus, unsigned int devfn, - int reg, int len, u32 value) -{ - u64 addr; - int mode, result; - - if ((seg > 65535) || (bus > 255) || (devfn > 255) || (reg > 4095)) - return -EINVAL; - - if ((seg | reg) <= 255) { - addr = PCI_SAL_ADDRESS(seg, bus, devfn, reg); - mode = 0; - } else if (sal_revision >= SAL_VERSION_CODE(3,2)) { - addr = PCI_SAL_EXT_ADDRESS(seg, bus, devfn, reg); - mode = 1; - } else { - return -EINVAL; - } - result = ia64_sal_pci_config_write(addr, mode, len, value); - if (result != 0) - return -EINVAL; - return 0; -} - -static int pci_read(struct pci_bus *bus, unsigned int devfn, int where, - int size, u32 *value) -{ - return raw_pci_read(pci_domain_nr(bus), bus->number, - devfn, where, size, value); -} - -static int pci_write(struct pci_bus *bus, unsigned int devfn, int where, - int size, u32 value) -{ - return raw_pci_write(pci_domain_nr(bus), bus->number, - devfn, where, size, value); -} - -struct pci_ops pci_root_ops = { - .read = pci_read, - .write = pci_write, -}; - -struct pci_root_info { - struct acpi_pci_root_info common; - struct pci_controller controller; - struct list_head io_resources; -}; - -static unsigned int new_space(u64 phys_base, int sparse) -{ - u64 mmio_base; - int i; - - if (phys_base == 0) - return 0; /* legacy I/O port space */ - - mmio_base = (u64) ioremap(phys_base, 0); - for (i = 0; i < num_io_spaces; i++) - if (io_space[i].mmio_base == mmio_base && - io_space[i].sparse == sparse) - return i; - - if (num_io_spaces == MAX_IO_SPACES) { - pr_err("PCI: Too many IO port spaces " - "(MAX_IO_SPACES=%lu)\n", MAX_IO_SPACES); - return ~0; - } - - i = num_io_spaces++; - io_space[i].mmio_base = mmio_base; - io_space[i].sparse = sparse; - - return i; -} - -static int add_io_space(struct device *dev, struct pci_root_info *info, - struct resource_entry *entry) -{ - struct resource_entry *iospace; - struct resource *resource, *res = entry->res; - char *name; - unsigned long base, min, max, base_port; - unsigned int sparse = 0, space_nr, len; - - len = strlen(info->common.name) + 32; - iospace = resource_list_create_entry(NULL, len); - if (!iospace) { - dev_err(dev, "PCI: No memory for %s I/O port space\n", - info->common.name); - return -ENOMEM; - } - - if (res->flags & IORESOURCE_IO_SPARSE) - sparse = 1; - space_nr = new_space(entry->offset, sparse); - if (space_nr == ~0) - goto free_resource; - - name = (char *)(iospace + 1); - min = res->start - entry->offset; - max = res->end - entry->offset; - base = __pa(io_space[space_nr].mmio_base); - base_port = IO_SPACE_BASE(space_nr); - snprintf(name, len, "%s I/O Ports %08lx-%08lx", info->common.name, - base_port + min, base_port + max); - - /* - * The SDM guarantees the legacy 0-64K space is sparse, but if the - * mapping is done by the processor (not the bridge), ACPI may not - * mark it as sparse. - */ - if (space_nr == 0) - sparse = 1; - - resource = iospace->res; - resource->name = name; - resource->flags = IORESOURCE_MEM; - resource->start = base + (sparse ? IO_SPACE_SPARSE_ENCODING(min) : min); - resource->end = base + (sparse ? IO_SPACE_SPARSE_ENCODING(max) : max); - if (insert_resource(&iomem_resource, resource)) { - dev_err(dev, - "can't allocate host bridge io space resource %pR\n", - resource); - goto free_resource; - } - - entry->offset = base_port; - res->start = min + base_port; - res->end = max + base_port; - resource_list_add_tail(iospace, &info->io_resources); - - return 0; - -free_resource: - resource_list_free_entry(iospace); - return -ENOSPC; -} - -/* - * An IO port or MMIO resource assigned to a PCI host bridge may be - * consumed by the host bridge itself or available to its child - * bus/devices. The ACPI specification defines a bit (Producer/Consumer) - * to tell whether the resource is consumed by the host bridge itself, - * but firmware hasn't used that bit consistently, so we can't rely on it. - * - * On x86 and IA64 platforms, all IO port and MMIO resources are assumed - * to be available to child bus/devices except one special case: - * IO port [0xCF8-0xCFF] is consumed by the host bridge itself - * to access PCI configuration space. - * - * So explicitly filter out PCI CFG IO ports[0xCF8-0xCFF]. - */ -static bool resource_is_pcicfg_ioport(struct resource *res) -{ - return (res->flags & IORESOURCE_IO) && - res->start == 0xCF8 && res->end == 0xCFF; -} - -static int pci_acpi_root_prepare_resources(struct acpi_pci_root_info *ci) -{ - struct device *dev = &ci->bridge->dev; - struct pci_root_info *info; - struct resource *res; - struct resource_entry *entry, *tmp; - int status; - - status = acpi_pci_probe_root_resources(ci); - if (status > 0) { - info = container_of(ci, struct pci_root_info, common); - resource_list_for_each_entry_safe(entry, tmp, &ci->resources) { - res = entry->res; - if (res->flags & IORESOURCE_MEM) { - /* - * HP's firmware has a hack to work around a - * Windows bug. Ignore these tiny memory ranges. - */ - if (resource_size(res) <= 16) { - resource_list_del(entry); - insert_resource(&iomem_resource, - entry->res); - resource_list_add_tail(entry, - &info->io_resources); - } - } else if (res->flags & IORESOURCE_IO) { - if (resource_is_pcicfg_ioport(entry->res)) - resource_list_destroy_entry(entry); - else if (add_io_space(dev, info, entry)) - resource_list_destroy_entry(entry); - } - } - } - - return status; -} - -static void pci_acpi_root_release_info(struct acpi_pci_root_info *ci) -{ - struct pci_root_info *info; - struct resource_entry *entry, *tmp; - - info = container_of(ci, struct pci_root_info, common); - resource_list_for_each_entry_safe(entry, tmp, &info->io_resources) { - release_resource(entry->res); - resource_list_destroy_entry(entry); - } - kfree(info); -} - -static struct acpi_pci_root_ops pci_acpi_root_ops = { - .pci_ops = &pci_root_ops, - .release_info = pci_acpi_root_release_info, - .prepare_resources = pci_acpi_root_prepare_resources, -}; - -struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root) -{ - struct acpi_device *device = root->device; - struct pci_root_info *info; - - info = kzalloc(sizeof(*info), GFP_KERNEL); - if (!info) { - dev_err(&device->dev, - "pci_bus %04x:%02x: ignored (out of memory)\n", - root->segment, (int)root->secondary.start); - return NULL; - } - - info->controller.segment = root->segment; - info->controller.companion = device; - info->controller.node = acpi_get_node(device->handle); - INIT_LIST_HEAD(&info->io_resources); - return acpi_pci_root_create(root, &pci_acpi_root_ops, - &info->common, &info->controller); -} - -int pcibios_root_bridge_prepare(struct pci_host_bridge *bridge) -{ - /* - * We pass NULL as parent to pci_create_root_bus(), so if it is not NULL - * here, pci_create_root_bus() has been called by someone else and - * sysdata is likely to be different from what we expect. Let it go in - * that case. - */ - if (!bridge->dev.parent) { - struct pci_controller *controller = bridge->bus->sysdata; - ACPI_COMPANION_SET(&bridge->dev, controller->companion); - } - return 0; -} - -void pcibios_fixup_device_resources(struct pci_dev *dev) -{ - int idx; - - if (!dev->bus) - return; - - for (idx = 0; idx < PCI_BRIDGE_RESOURCES; idx++) { - struct resource *r = &dev->resource[idx]; - - if (!r->flags || r->parent || !r->start) - continue; - - pci_claim_resource(dev, idx); - } -} -EXPORT_SYMBOL_GPL(pcibios_fixup_device_resources); - -static void pcibios_fixup_bridge_resources(struct pci_dev *dev) -{ - int idx; - - if (!dev->bus) - return; - - for (idx = PCI_BRIDGE_RESOURCES; idx < PCI_NUM_RESOURCES; idx++) { - struct resource *r = &dev->resource[idx]; - - if (!r->flags || r->parent || !r->start) - continue; - - pci_claim_bridge_resource(dev, idx); - } -} - -/* - * Called after each bus is probed, but before its children are examined. - */ -void pcibios_fixup_bus(struct pci_bus *b) -{ - struct pci_dev *dev; - - if (b->self) { - pci_read_bridge_bases(b); - pcibios_fixup_bridge_resources(b->self); - } - list_for_each_entry(dev, &b->devices, bus_list) - pcibios_fixup_device_resources(dev); -} - -void pcibios_add_bus(struct pci_bus *bus) -{ - acpi_pci_add_bus(bus); -} - -void pcibios_remove_bus(struct pci_bus *bus) -{ - acpi_pci_remove_bus(bus); -} - -void pcibios_set_master (struct pci_dev *dev) -{ - /* No special bus mastering setup handling */ -} - -int -pcibios_enable_device (struct pci_dev *dev, int mask) -{ - int ret; - - ret = pci_enable_resources(dev, mask); - if (ret < 0) - return ret; - - if (!pci_dev_msi_enabled(dev)) - return acpi_pci_irq_enable(dev); - return 0; -} - -void -pcibios_disable_device (struct pci_dev *dev) -{ - BUG_ON(atomic_read(&dev->enable_cnt)); - if (!pci_dev_msi_enabled(dev)) - acpi_pci_irq_disable(dev); -} - -/** - * pci_get_legacy_mem - generic legacy mem routine - * @bus: bus to get legacy memory base address for - * - * Find the base of legacy memory for @bus. This is typically the first - * megabyte of bus address space for @bus or is simply 0 on platforms whose - * chipsets support legacy I/O and memory routing. Returns the base address - * or an error pointer if an error occurred. - * - * This is the ia64 generic version of this routine. Other platforms - * are free to override it with a machine vector. - */ -char *pci_get_legacy_mem(struct pci_bus *bus) -{ - return (char *)__IA64_UNCACHED_OFFSET; -} - -/** - * pci_mmap_legacy_page_range - map legacy memory space to userland - * @bus: bus whose legacy space we're mapping - * @vma: vma passed in by mmap - * - * Map legacy memory space for this device back to userspace using a machine - * vector to get the base address. - */ -int -pci_mmap_legacy_page_range(struct pci_bus *bus, struct vm_area_struct *vma, - enum pci_mmap_state mmap_state) -{ - unsigned long size = vma->vm_end - vma->vm_start; - pgprot_t prot; - char *addr; - - /* We only support mmap'ing of legacy memory space */ - if (mmap_state != pci_mmap_mem) - return -ENOSYS; - - /* - * Avoid attribute aliasing. See Documentation/arch/ia64/aliasing.rst - * for more details. - */ - if (!valid_mmap_phys_addr_range(vma->vm_pgoff, size)) - return -EINVAL; - prot = phys_mem_access_prot(NULL, vma->vm_pgoff, size, - vma->vm_page_prot); - - addr = pci_get_legacy_mem(bus); - if (IS_ERR(addr)) - return PTR_ERR(addr); - - vma->vm_pgoff += (unsigned long)addr >> PAGE_SHIFT; - vma->vm_page_prot = prot; - - if (remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, - size, vma->vm_page_prot)) - return -EAGAIN; - - return 0; -} - -/** - * pci_legacy_read - read from legacy I/O space - * @bus: bus to read - * @port: legacy port value - * @val: caller allocated storage for returned value - * @size: number of bytes to read - * - * Simply reads @size bytes from @port and puts the result in @val. - * - * Again, this (and the write routine) are generic versions that can be - * overridden by the platform. This is necessary on platforms that don't - * support legacy I/O routing or that hard fail on legacy I/O timeouts. - */ -int pci_legacy_read(struct pci_bus *bus, u16 port, u32 *val, u8 size) -{ - int ret = size; - - switch (size) { - case 1: - *val = inb(port); - break; - case 2: - *val = inw(port); - break; - case 4: - *val = inl(port); - break; - default: - ret = -EINVAL; - break; - } - - return ret; -} - -/** - * pci_legacy_write - perform a legacy I/O write - * @bus: bus pointer - * @port: port to write - * @val: value to write - * @size: number of bytes to write from @val - * - * Simply writes @size bytes of @val to @port. - */ -int pci_legacy_write(struct pci_bus *bus, u16 port, u32 val, u8 size) -{ - int ret = size; - - switch (size) { - case 1: - outb(val, port); - break; - case 2: - outw(val, port); - break; - case 4: - outl(val, port); - break; - default: - ret = -EINVAL; - break; - } - - return ret; -} - -/** - * set_pci_cacheline_size - determine cacheline size for PCI devices - * - * We want to use the line-size of the outer-most cache. We assume - * that this line-size is the same for all CPUs. - * - * Code mostly taken from arch/ia64/kernel/palinfo.c:cache_info(). - */ -static void __init set_pci_dfl_cacheline_size(void) -{ - unsigned long levels, unique_caches; - long status; - pal_cache_config_info_t cci; - - status = ia64_pal_cache_summary(&levels, &unique_caches); - if (status != 0) { - pr_err("%s: ia64_pal_cache_summary() failed " - "(status=%ld)\n", __func__, status); - return; - } - - status = ia64_pal_cache_config_info(levels - 1, - /* cache_type (data_or_unified)= */ 2, &cci); - if (status != 0) { - pr_err("%s: ia64_pal_cache_config_info() failed " - "(status=%ld)\n", __func__, status); - return; - } - pci_dfl_cache_line_size = (1 << cci.pcci_line_size) / 4; -} - -static int __init pcibios_init(void) -{ - set_pci_dfl_cacheline_size(); - return 0; -} - -subsys_initcall(pcibios_init); diff --git a/arch/ia64/scripts/check-gas b/arch/ia64/scripts/check-gas deleted file mode 100755 index 787cf9b6b04a..000000000000 --- a/arch/ia64/scripts/check-gas +++ /dev/null @@ -1,16 +0,0 @@ -#!/bin/sh -# SPDX-License-Identifier: GPL-2.0 -dir=$(dirname $0) -CC=$1 -OBJDUMP=$2 -tmp=${TMPDIR:-/tmp} -out=$tmp/out$$.o -$CC -c $dir/check-gas-asm.S -o $out -res=$($OBJDUMP -r --section .data $out | fgrep 00004 | tr -s ' ' |cut -f3 -d' ') -rm -f $out -if [ $res != ".text" ]; then - echo buggy -else - echo good -fi -exit 0 diff --git a/arch/ia64/scripts/check-gas-asm.S b/arch/ia64/scripts/check-gas-asm.S deleted file mode 100644 index 010e1d227e5d..000000000000 --- a/arch/ia64/scripts/check-gas-asm.S +++ /dev/null @@ -1,2 +0,0 @@ -[1:] nop 0 - .xdata4 ".data", 0, 1b-. diff --git a/arch/ia64/scripts/check-model.c b/arch/ia64/scripts/check-model.c deleted file mode 100644 index e1d4e86e3d63..000000000000 --- a/arch/ia64/scripts/check-model.c +++ /dev/null @@ -1 +0,0 @@ -int __attribute__ ((__model__ (__small__))) x; diff --git a/arch/ia64/scripts/check-segrel.S b/arch/ia64/scripts/check-segrel.S deleted file mode 100644 index 65d6378adaaa..000000000000 --- a/arch/ia64/scripts/check-segrel.S +++ /dev/null @@ -1,5 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ - .rodata - data4 @segrel(start) - .data -start: diff --git a/arch/ia64/scripts/check-segrel.lds b/arch/ia64/scripts/check-segrel.lds deleted file mode 100644 index c385d246e458..000000000000 --- a/arch/ia64/scripts/check-segrel.lds +++ /dev/null @@ -1,13 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -SECTIONS { - . = SIZEOF_HEADERS; - .rodata : { *(.rodata) } :ro - .note : { *(.note*) } - . = 0xa0000; - .data : { *(.data) } :dat - /DISCARD/ : { *(*) } -} -PHDRS { - ro PT_LOAD FILEHDR PHDRS; - dat PT_LOAD; -} diff --git a/arch/ia64/scripts/check-serialize.S b/arch/ia64/scripts/check-serialize.S deleted file mode 100644 index 0400c106806c..000000000000 --- a/arch/ia64/scripts/check-serialize.S +++ /dev/null @@ -1,2 +0,0 @@ - .serialize.data - .serialize.instruction diff --git a/arch/ia64/scripts/check-text-align.S b/arch/ia64/scripts/check-text-align.S deleted file mode 100644 index 107fa1c88c2e..000000000000 --- a/arch/ia64/scripts/check-text-align.S +++ /dev/null @@ -1,7 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ - .proc foo - .prologue -foo: .save rp, r2 - nop 0 - .align 64 - .endp foo diff --git a/arch/ia64/scripts/toolchain-flags b/arch/ia64/scripts/toolchain-flags deleted file mode 100755 index 12dff5c981cf..000000000000 --- a/arch/ia64/scripts/toolchain-flags +++ /dev/null @@ -1,54 +0,0 @@ -#!/bin/sh -# SPDX-License-Identifier: GPL-2.0 -# -# Check whether linker can handle cross-segment @segrel(): -# -CPPFLAGS="" -CC=$1 -OBJDUMP=$2 -READELF=$3 -dir=$(dirname $0) -tmp=${TMPDIR:-/tmp} -out=$tmp/out$$ - -# Check whether cross-segment segment-relative relocs work fine. We need -# that for building the gate DSO: - -$CC -nostdlib -static -Wl,-T$dir/check-segrel.lds $dir/check-segrel.S -o $out -res=$($OBJDUMP --full --section .rodata $out | fgrep 000 | cut -f3 -d' ') -rm -f $out -if [ $res != 00000a00 ]; then - CPPFLAGS="$CPPFLAGS -DHAVE_BUGGY_SEGREL" - cat >&2 <&1 | grep __model__ | grep -q attrib -then - CPPFLAGS="$CPPFLAGS -DHAVE_MODEL_SMALL_ATTRIBUTE" -fi -rm -f $out - -# Check whether assembler supports .serialize.{data,instruction} directive. - -$CC -c $dir/check-serialize.S -o $out 2>/dev/null -res=$? -rm -f $out -if [ $res -eq 0 ]; then - CPPFLAGS="$CPPFLAGS -DHAVE_SERIALIZE_DIRECTIVE" -fi - -echo $CPPFLAGS diff --git a/arch/ia64/scripts/unwcheck.py b/arch/ia64/scripts/unwcheck.py deleted file mode 100644 index 9581742f0db2..000000000000 --- a/arch/ia64/scripts/unwcheck.py +++ /dev/null @@ -1,65 +0,0 @@ -#!/usr/bin/env python3 -# SPDX-License-Identifier: GPL-2.0 -# -# Usage: unwcheck.py FILE -# -# This script checks the unwind info of each function in file FILE -# and verifies that the sum of the region-lengths matches the total -# length of the function. -# -# Based on a shell/awk script originally written by Harish Patil, -# which was converted to Perl by Matthew Chapman, which was converted -# to Python by David Mosberger. -# -import os -import re -import sys - -if len(sys.argv) != 2: - print("Usage: %s FILE" % sys.argv[0]) - sys.exit(2) - -readelf = os.getenv("READELF", "readelf") - -start_pattern = re.compile("<([^>]*)>: \[0x([0-9a-f]+)-0x([0-9a-f]+)\]") -rlen_pattern = re.compile(".*rlen=([0-9]+)") - -def check_func (func, slots, rlen_sum): - if slots != rlen_sum: - global num_errors - num_errors += 1 - if not func: func = "[%#x-%#x]" % (start, end) - print("ERROR: %s: %lu slots, total region length = %lu" % (func, slots, rlen_sum)) - return - -num_funcs = 0 -num_errors = 0 -func = False -slots = 0 -rlen_sum = 0 -for line in os.popen("%s -u %s" % (readelf, sys.argv[1])): - m = start_pattern.match(line) - if m: - check_func(func, slots, rlen_sum) - - func = m.group(1) - start = int(m.group(2), 16) - end = int(m.group(3), 16) - slots = 3 * (end - start) / 16 - rlen_sum = 0 - num_funcs += 1 - else: - m = rlen_pattern.match(line) - if m: - rlen_sum += int(m.group(1)) -check_func(func, slots, rlen_sum) - -if num_errors == 0: - print("No errors detected in %u functions." % num_funcs) -else: - if num_errors > 1: - err="errors" - else: - err="error" - print("%u %s detected in %u functions." % (num_errors, err, num_funcs)) - sys.exit(1) diff --git a/arch/ia64/uv/Makefile b/arch/ia64/uv/Makefile deleted file mode 100644 index aa9f91947c49..000000000000 --- a/arch/ia64/uv/Makefile +++ /dev/null @@ -1,12 +0,0 @@ -# arch/ia64/uv/Makefile -# -# This file is subject to the terms and conditions of the GNU General Public -# License. See the file "COPYING" in the main directory of this archive -# for more details. -# -# Copyright (C) 2008 Silicon Graphics, Inc. All Rights Reserved. -# -# Makefile for the sn uv subplatform -# - -obj-y += kernel/ diff --git a/arch/ia64/uv/kernel/Makefile b/arch/ia64/uv/kernel/Makefile deleted file mode 100644 index 297196578d19..000000000000 --- a/arch/ia64/uv/kernel/Makefile +++ /dev/null @@ -1,12 +0,0 @@ -# arch/ia64/uv/kernel/Makefile -# -# This file is subject to the terms and conditions of the GNU General Public -# License. See the file "COPYING" in the main directory of this archive -# for more details. -# -# Copyright (C) 2008 Silicon Graphics, Inc. All Rights Reserved. -# - -ccflags-y := -Iarch/ia64/sn/include - -obj-y += setup.o diff --git a/arch/ia64/uv/kernel/setup.c b/arch/ia64/uv/kernel/setup.c deleted file mode 100644 index bb025486d791..000000000000 --- a/arch/ia64/uv/kernel/setup.c +++ /dev/null @@ -1,120 +0,0 @@ -/* - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - * - * SGI UV Core Functions - * - * Copyright (C) 2008 Silicon Graphics, Inc. All rights reserved. - */ - -#include -#include -#include -#include -#include -#include -#include - -bool ia64_is_uv; -EXPORT_SYMBOL_GPL(ia64_is_uv); - -DEFINE_PER_CPU(struct uv_hub_info_s, __uv_hub_info); -EXPORT_PER_CPU_SYMBOL_GPL(__uv_hub_info); - -struct redir_addr { - unsigned long redirect; - unsigned long alias; -}; - -#define DEST_SHIFT UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_SHFT - -static __initdata struct redir_addr redir_addrs[] = { - {UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR, UVH_SI_ALIAS0_OVERLAY_CONFIG}, - {UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR, UVH_SI_ALIAS1_OVERLAY_CONFIG}, - {UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR, UVH_SI_ALIAS2_OVERLAY_CONFIG}, -}; - -static __init void get_lowmem_redirect(unsigned long *base, unsigned long *size) -{ - union uvh_si_alias0_overlay_config_u alias; - union uvh_rh_gam_alias210_redirect_config_2_mmr_u redirect; - int i; - - for (i = 0; i < ARRAY_SIZE(redir_addrs); i++) { - alias.v = uv_read_local_mmr(redir_addrs[i].alias); - if (alias.s.base == 0) { - *size = (1UL << alias.s.m_alias); - redirect.v = uv_read_local_mmr(redir_addrs[i].redirect); - *base = (unsigned long)redirect.s.dest_base << DEST_SHIFT; - return; - } - } - BUG(); -} - -void __init uv_probe_system_type(void) -{ - struct acpi_table_rsdp *rsdp; - struct acpi_table_xsdt *xsdt; - - if (efi.acpi20 == EFI_INVALID_TABLE_ADDR) { - pr_err("ACPI 2.0 RSDP not found.\n"); - return; - } - - rsdp = (struct acpi_table_rsdp *)__va(efi.acpi20); - if (strncmp(rsdp->signature, ACPI_SIG_RSDP, sizeof(ACPI_SIG_RSDP) - 1)) { - pr_err("ACPI 2.0 RSDP signature incorrect.\n"); - return; - } - - xsdt = (struct acpi_table_xsdt *)__va(rsdp->xsdt_physical_address); - if (strncmp(xsdt->header.signature, ACPI_SIG_XSDT, - sizeof(ACPI_SIG_XSDT) - 1)) { - pr_err("ACPI 2.0 XSDT signature incorrect.\n"); - return; - } - - if (!strcmp(xsdt->header.oem_id, "SGI") && - !strcmp(xsdt->header.oem_table_id + 4, "UV")) - ia64_is_uv = true; -} - -void __init uv_setup(char **cmdline_p) -{ - union uvh_si_addr_map_config_u m_n_config; - union uvh_node_id_u node_id; - unsigned long gnode_upper; - int nid, cpu, m_val, n_val; - unsigned long mmr_base, lowmem_redir_base, lowmem_redir_size; - - get_lowmem_redirect(&lowmem_redir_base, &lowmem_redir_size); - node_id.v = uv_read_local_mmr(UVH_NODE_ID); - m_n_config.v = uv_read_local_mmr(UVH_SI_ADDR_MAP_CONFIG); - mmr_base = uv_read_local_mmr(UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR) & - ~UV_MMR_ENABLE; - - m_val = m_n_config.s.m_skt; - n_val = m_n_config.s.n_skt; - printk(KERN_DEBUG "UV: global MMR base 0x%lx\n", mmr_base); - - gnode_upper = (((unsigned long)node_id.s.node_id) & - ~((1 << n_val) - 1)) << m_val; - - for_each_present_cpu(cpu) { - nid = cpu_to_node(cpu); - uv_cpu_hub_info(cpu)->lowmem_remap_base = lowmem_redir_base; - uv_cpu_hub_info(cpu)->lowmem_remap_top = - lowmem_redir_base + lowmem_redir_size; - uv_cpu_hub_info(cpu)->m_val = m_val; - uv_cpu_hub_info(cpu)->n_val = n_val; - uv_cpu_hub_info(cpu)->pnode_mask = (1 << n_val) -1; - uv_cpu_hub_info(cpu)->gpa_mask = (1 << (m_val + n_val)) - 1; - uv_cpu_hub_info(cpu)->gnode_upper = gnode_upper; - uv_cpu_hub_info(cpu)->global_mmr_base = mmr_base; - uv_cpu_hub_info(cpu)->coherency_domain_number = 0;/* ZZZ */ - printk(KERN_DEBUG "UV cpu %d, nid %d\n", cpu, nid); - } -} - diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig index cee82b473dc5..554e487cbfab 100644 --- a/drivers/acpi/Kconfig +++ b/drivers/acpi/Kconfig @@ -257,7 +257,7 @@ config ACPI_CPU_FREQ_PSS config ACPI_PROCESSOR_CSTATE def_bool y depends on ACPI_PROCESSOR - depends on IA64 || X86 + depends on X86 config ACPI_PROCESSOR_IDLE bool @@ -281,9 +281,9 @@ config ACPI_CPPC_LIB config ACPI_PROCESSOR tristate "Processor" - depends on X86 || IA64 || ARM64 || LOONGARCH + depends on X86 || ARM64 || LOONGARCH select ACPI_PROCESSOR_IDLE - select ACPI_CPU_FREQ_PSS if X86 || IA64 || LOONGARCH + select ACPI_CPU_FREQ_PSS if X86 || LOONGARCH select THERMAL default y help diff --git a/drivers/acpi/numa/Kconfig b/drivers/acpi/numa/Kconfig index 39b1f34c21df..849c2bd820b9 100644 --- a/drivers/acpi/numa/Kconfig +++ b/drivers/acpi/numa/Kconfig @@ -2,8 +2,8 @@ config ACPI_NUMA bool "NUMA support" depends on NUMA - depends on (X86 || IA64 || ARM64 || LOONGARCH) - default y if IA64 || ARM64 + depends on (X86 || ARM64 || LOONGARCH) + default y if ARM64 config ACPI_HMAT bool "ACPI Heterogeneous Memory Attribute Table Support" diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c index f725813d0cce..06289a93d3d2 100644 --- a/drivers/acpi/osl.c +++ b/drivers/acpi/osl.c @@ -276,7 +276,7 @@ acpi_map_lookup_virt(void __iomem *virt, acpi_size size) return NULL; } -#if defined(CONFIG_IA64) || defined(CONFIG_ARM64) || defined(CONFIG_RISCV) +#if defined(CONFIG_ARM64) || defined(CONFIG_RISCV) /* ioremap will take care of cache attributes */ #define should_use_kmap(pfn) 0 #else diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig index 625af75833fc..7c8dd0abcfdf 100644 --- a/drivers/char/Kconfig +++ b/drivers/char/Kconfig @@ -348,7 +348,7 @@ config DEVPORT device is similar to /dev/mem, but for I/O ports. config HPET - bool "HPET - High Precision Event Timer" if (X86 || IA64) + bool "HPET - High Precision Event Timer" if X86 default n depends on ACPI help @@ -377,7 +377,7 @@ config HPET_MMAP_DEFAULT config HANGCHECK_TIMER tristate "Hangcheck timer" - depends on X86 || IA64 || PPC64 || S390 + depends on X86 || PPC64 || S390 help The hangcheck-timer module detects when the system has gone out to lunch past a certain margin. It can reboot the system diff --git a/drivers/char/Makefile b/drivers/char/Makefile index c5f532e412f1..e9b360cdc99a 100644 --- a/drivers/char/Makefile +++ b/drivers/char/Makefile @@ -8,7 +8,6 @@ obj-$(CONFIG_TTY_PRINTK) += ttyprintk.o obj-y += misc.o obj-$(CONFIG_ATARI_DSP56K) += dsp56k.o obj-$(CONFIG_VIRTIO_CONSOLE) += virtio_console.o -obj-$(CONFIG_MSPEC) += mspec.o obj-$(CONFIG_UV_MMTIMER) += uv_mmtimer.o obj-$(CONFIG_IBM_BSR) += bsr.o diff --git a/drivers/char/agp/Kconfig b/drivers/char/agp/Kconfig index 4f501e4842ab..c47eb7bf06d4 100644 --- a/drivers/char/agp/Kconfig +++ b/drivers/char/agp/Kconfig @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 menuconfig AGP tristate "/dev/agpgart (AGP Support)" - depends on ALPHA || IA64 || PARISC || PPC || X86 + depends on ALPHA || PARISC || PPC || X86 depends on PCI help AGP (Accelerated Graphics Port) is a bus system mainly used to @@ -109,20 +109,6 @@ config AGP_VIA This option gives you AGP support for the GLX component of X on VIA MVP3/Apollo Pro chipsets. -config AGP_I460 - tristate "Intel 460GX chipset support" - depends on AGP && IA64 - help - This option gives you AGP GART support for the Intel 460GX chipset - for IA64 processors. - -config AGP_HP_ZX1 - tristate "HP ZX1 chipset AGP support" - depends on AGP && IA64 - help - This option gives you AGP GART support for the HP ZX1 chipset - for IA64 processors. - config AGP_PARISC tristate "HP Quicksilver AGP support" depends on AGP && PARISC && 64BIT && IOMMU_SBA diff --git a/drivers/char/agp/Makefile b/drivers/char/agp/Makefile index 90ed8c789e48..25834557e486 100644 --- a/drivers/char/agp/Makefile +++ b/drivers/char/agp/Makefile @@ -14,9 +14,7 @@ obj-$(CONFIG_AGP_AMD) += amd-k7-agp.o obj-$(CONFIG_AGP_AMD64) += amd64-agp.o obj-$(CONFIG_AGP_ALPHA_CORE) += alpha-agp.o obj-$(CONFIG_AGP_EFFICEON) += efficeon-agp.o -obj-$(CONFIG_AGP_HP_ZX1) += hp-agp.o obj-$(CONFIG_AGP_PARISC) += parisc-agp.o -obj-$(CONFIG_AGP_I460) += i460-agp.o obj-$(CONFIG_AGP_INTEL) += intel-agp.o obj-$(CONFIG_INTEL_GTT) += intel-gtt.o obj-$(CONFIG_AGP_NVIDIA) += nvidia-agp.o diff --git a/drivers/char/agp/hp-agp.c b/drivers/char/agp/hp-agp.c deleted file mode 100644 index 84d9adbb62f6..000000000000 --- a/drivers/char/agp/hp-agp.c +++ /dev/null @@ -1,550 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * HP zx1 AGPGART routines. - * - * (c) Copyright 2002, 2003 Hewlett-Packard Development Company, L.P. - * Bjorn Helgaas - */ - -#include -#include -#include -#include -#include -#include -#include - -#include - -#include "agp.h" - -#define HP_ZX1_IOC_OFFSET 0x1000 /* ACPI reports SBA, we want IOC */ - -/* HP ZX1 IOC registers */ -#define HP_ZX1_IBASE 0x300 -#define HP_ZX1_IMASK 0x308 -#define HP_ZX1_PCOM 0x310 -#define HP_ZX1_TCNFG 0x318 -#define HP_ZX1_PDIR_BASE 0x320 - -#define HP_ZX1_IOVA_BASE GB(1UL) -#define HP_ZX1_IOVA_SIZE GB(1UL) -#define HP_ZX1_GART_SIZE (HP_ZX1_IOVA_SIZE / 2) -#define HP_ZX1_SBA_IOMMU_COOKIE 0x0000badbadc0ffeeUL - -#define HP_ZX1_PDIR_VALID_BIT 0x8000000000000000UL -#define HP_ZX1_IOVA_TO_PDIR(va) ((va - hp_private.iova_base) >> hp_private.io_tlb_shift) - -#define AGP8X_MODE_BIT 3 -#define AGP8X_MODE (1 << AGP8X_MODE_BIT) - -/* AGP bridge need not be PCI device, but DRM thinks it is. */ -static struct pci_dev fake_bridge_dev; - -static int hp_zx1_gart_found; - -static struct aper_size_info_fixed hp_zx1_sizes[] = -{ - {0, 0, 0}, /* filled in by hp_zx1_fetch_size() */ -}; - -static struct gatt_mask hp_zx1_masks[] = -{ - {.mask = HP_ZX1_PDIR_VALID_BIT, .type = 0} -}; - -static struct _hp_private { - volatile u8 __iomem *ioc_regs; - volatile u8 __iomem *lba_regs; - int lba_cap_offset; - u64 *io_pdir; // PDIR for entire IOVA - u64 *gatt; // PDIR just for GART (subset of above) - u64 gatt_entries; - u64 iova_base; - u64 gart_base; - u64 gart_size; - u64 io_pdir_size; - int io_pdir_owner; // do we own it, or share it with sba_iommu? - int io_page_size; - int io_tlb_shift; - int io_tlb_ps; // IOC ps config - int io_pages_per_kpage; -} hp_private; - -static int __init hp_zx1_ioc_shared(void) -{ - struct _hp_private *hp = &hp_private; - - printk(KERN_INFO PFX "HP ZX1 IOC: IOPDIR shared with sba_iommu\n"); - - /* - * IOC already configured by sba_iommu module; just use - * its setup. We assume: - * - IOVA space is 1Gb in size - * - first 512Mb is IOMMU, second 512Mb is GART - */ - hp->io_tlb_ps = readq(hp->ioc_regs+HP_ZX1_TCNFG); - switch (hp->io_tlb_ps) { - case 0: hp->io_tlb_shift = 12; break; - case 1: hp->io_tlb_shift = 13; break; - case 2: hp->io_tlb_shift = 14; break; - case 3: hp->io_tlb_shift = 16; break; - default: - printk(KERN_ERR PFX "Invalid IOTLB page size " - "configuration 0x%x\n", hp->io_tlb_ps); - hp->gatt = NULL; - hp->gatt_entries = 0; - return -ENODEV; - } - hp->io_page_size = 1 << hp->io_tlb_shift; - hp->io_pages_per_kpage = PAGE_SIZE / hp->io_page_size; - - hp->iova_base = readq(hp->ioc_regs+HP_ZX1_IBASE) & ~0x1; - hp->gart_base = hp->iova_base + HP_ZX1_IOVA_SIZE - HP_ZX1_GART_SIZE; - - hp->gart_size = HP_ZX1_GART_SIZE; - hp->gatt_entries = hp->gart_size / hp->io_page_size; - - hp->io_pdir = phys_to_virt(readq(hp->ioc_regs+HP_ZX1_PDIR_BASE)); - hp->gatt = &hp->io_pdir[HP_ZX1_IOVA_TO_PDIR(hp->gart_base)]; - - if (hp->gatt[0] != HP_ZX1_SBA_IOMMU_COOKIE) { - /* Normal case when no AGP device in system */ - hp->gatt = NULL; - hp->gatt_entries = 0; - printk(KERN_ERR PFX "No reserved IO PDIR entry found; " - "GART disabled\n"); - return -ENODEV; - } - - return 0; -} - -static int __init -hp_zx1_ioc_owner (void) -{ - struct _hp_private *hp = &hp_private; - - printk(KERN_INFO PFX "HP ZX1 IOC: IOPDIR dedicated to GART\n"); - - /* - * Select an IOV page size no larger than system page size. - */ - if (PAGE_SIZE >= KB(64)) { - hp->io_tlb_shift = 16; - hp->io_tlb_ps = 3; - } else if (PAGE_SIZE >= KB(16)) { - hp->io_tlb_shift = 14; - hp->io_tlb_ps = 2; - } else if (PAGE_SIZE >= KB(8)) { - hp->io_tlb_shift = 13; - hp->io_tlb_ps = 1; - } else { - hp->io_tlb_shift = 12; - hp->io_tlb_ps = 0; - } - hp->io_page_size = 1 << hp->io_tlb_shift; - hp->io_pages_per_kpage = PAGE_SIZE / hp->io_page_size; - - hp->iova_base = HP_ZX1_IOVA_BASE; - hp->gart_size = HP_ZX1_GART_SIZE; - hp->gart_base = hp->iova_base + HP_ZX1_IOVA_SIZE - hp->gart_size; - - hp->gatt_entries = hp->gart_size / hp->io_page_size; - hp->io_pdir_size = (HP_ZX1_IOVA_SIZE / hp->io_page_size) * sizeof(u64); - - return 0; -} - -static int __init -hp_zx1_ioc_init (u64 hpa) -{ - struct _hp_private *hp = &hp_private; - - hp->ioc_regs = ioremap(hpa, 1024); - if (!hp->ioc_regs) - return -ENOMEM; - - /* - * If the IOTLB is currently disabled, we can take it over. - * Otherwise, we have to share with sba_iommu. - */ - hp->io_pdir_owner = (readq(hp->ioc_regs+HP_ZX1_IBASE) & 0x1) == 0; - - if (hp->io_pdir_owner) - return hp_zx1_ioc_owner(); - - return hp_zx1_ioc_shared(); -} - -static int -hp_zx1_lba_find_capability (volatile u8 __iomem *hpa, int cap) -{ - u16 status; - u8 pos, id; - int ttl = 48; - - status = readw(hpa+PCI_STATUS); - if (!(status & PCI_STATUS_CAP_LIST)) - return 0; - pos = readb(hpa+PCI_CAPABILITY_LIST); - while (ttl-- && pos >= 0x40) { - pos &= ~3; - id = readb(hpa+pos+PCI_CAP_LIST_ID); - if (id == 0xff) - break; - if (id == cap) - return pos; - pos = readb(hpa+pos+PCI_CAP_LIST_NEXT); - } - return 0; -} - -static int __init -hp_zx1_lba_init (u64 hpa) -{ - struct _hp_private *hp = &hp_private; - int cap; - - hp->lba_regs = ioremap(hpa, 256); - if (!hp->lba_regs) - return -ENOMEM; - - hp->lba_cap_offset = hp_zx1_lba_find_capability(hp->lba_regs, PCI_CAP_ID_AGP); - - cap = readl(hp->lba_regs+hp->lba_cap_offset) & 0xff; - if (cap != PCI_CAP_ID_AGP) { - printk(KERN_ERR PFX "Invalid capability ID 0x%02x at 0x%x\n", - cap, hp->lba_cap_offset); - iounmap(hp->lba_regs); - return -ENODEV; - } - - return 0; -} - -static int -hp_zx1_fetch_size(void) -{ - int size; - - size = hp_private.gart_size / MB(1); - hp_zx1_sizes[0].size = size; - agp_bridge->current_size = (void *) &hp_zx1_sizes[0]; - return size; -} - -static int -hp_zx1_configure (void) -{ - struct _hp_private *hp = &hp_private; - - agp_bridge->gart_bus_addr = hp->gart_base; - agp_bridge->capndx = hp->lba_cap_offset; - agp_bridge->mode = readl(hp->lba_regs+hp->lba_cap_offset+PCI_AGP_STATUS); - - if (hp->io_pdir_owner) { - writel(virt_to_phys(hp->io_pdir), hp->ioc_regs+HP_ZX1_PDIR_BASE); - readl(hp->ioc_regs+HP_ZX1_PDIR_BASE); - writel(hp->io_tlb_ps, hp->ioc_regs+HP_ZX1_TCNFG); - readl(hp->ioc_regs+HP_ZX1_TCNFG); - writel((unsigned int)(~(HP_ZX1_IOVA_SIZE-1)), hp->ioc_regs+HP_ZX1_IMASK); - readl(hp->ioc_regs+HP_ZX1_IMASK); - writel(hp->iova_base|1, hp->ioc_regs+HP_ZX1_IBASE); - readl(hp->ioc_regs+HP_ZX1_IBASE); - writel(hp->iova_base|ilog2(HP_ZX1_IOVA_SIZE), hp->ioc_regs+HP_ZX1_PCOM); - readl(hp->ioc_regs+HP_ZX1_PCOM); - } - - return 0; -} - -static void -hp_zx1_cleanup (void) -{ - struct _hp_private *hp = &hp_private; - - if (hp->ioc_regs) { - if (hp->io_pdir_owner) { - writeq(0, hp->ioc_regs+HP_ZX1_IBASE); - readq(hp->ioc_regs+HP_ZX1_IBASE); - } - iounmap(hp->ioc_regs); - } - if (hp->lba_regs) - iounmap(hp->lba_regs); -} - -static void -hp_zx1_tlbflush (struct agp_memory *mem) -{ - struct _hp_private *hp = &hp_private; - - writeq(hp->gart_base | ilog2(hp->gart_size), hp->ioc_regs+HP_ZX1_PCOM); - readq(hp->ioc_regs+HP_ZX1_PCOM); -} - -static int -hp_zx1_create_gatt_table (struct agp_bridge_data *bridge) -{ - struct _hp_private *hp = &hp_private; - int i; - - if (hp->io_pdir_owner) { - hp->io_pdir = (u64 *) __get_free_pages(GFP_KERNEL, - get_order(hp->io_pdir_size)); - if (!hp->io_pdir) { - printk(KERN_ERR PFX "Couldn't allocate contiguous " - "memory for I/O PDIR\n"); - hp->gatt = NULL; - hp->gatt_entries = 0; - return -ENOMEM; - } - memset(hp->io_pdir, 0, hp->io_pdir_size); - - hp->gatt = &hp->io_pdir[HP_ZX1_IOVA_TO_PDIR(hp->gart_base)]; - } - - for (i = 0; i < hp->gatt_entries; i++) { - hp->gatt[i] = (unsigned long) agp_bridge->scratch_page; - } - - return 0; -} - -static int -hp_zx1_free_gatt_table (struct agp_bridge_data *bridge) -{ - struct _hp_private *hp = &hp_private; - - if (hp->io_pdir_owner) - free_pages((unsigned long) hp->io_pdir, - get_order(hp->io_pdir_size)); - else - hp->gatt[0] = HP_ZX1_SBA_IOMMU_COOKIE; - return 0; -} - -static int -hp_zx1_insert_memory (struct agp_memory *mem, off_t pg_start, int type) -{ - struct _hp_private *hp = &hp_private; - int i, k; - off_t j, io_pg_start; - int io_pg_count; - - if (type != mem->type || - agp_bridge->driver->agp_type_to_mask_type(agp_bridge, type)) { - return -EINVAL; - } - - io_pg_start = hp->io_pages_per_kpage * pg_start; - io_pg_count = hp->io_pages_per_kpage * mem->page_count; - if ((io_pg_start + io_pg_count) > hp->gatt_entries) { - return -EINVAL; - } - - j = io_pg_start; - while (j < (io_pg_start + io_pg_count)) { - if (hp->gatt[j]) { - return -EBUSY; - } - j++; - } - - if (!mem->is_flushed) { - global_cache_flush(); - mem->is_flushed = true; - } - - for (i = 0, j = io_pg_start; i < mem->page_count; i++) { - unsigned long paddr; - - paddr = page_to_phys(mem->pages[i]); - for (k = 0; - k < hp->io_pages_per_kpage; - k++, j++, paddr += hp->io_page_size) { - hp->gatt[j] = HP_ZX1_PDIR_VALID_BIT | paddr; - } - } - - agp_bridge->driver->tlb_flush(mem); - return 0; -} - -static int -hp_zx1_remove_memory (struct agp_memory *mem, off_t pg_start, int type) -{ - struct _hp_private *hp = &hp_private; - int i, io_pg_start, io_pg_count; - - if (type != mem->type || - agp_bridge->driver->agp_type_to_mask_type(agp_bridge, type)) { - return -EINVAL; - } - - io_pg_start = hp->io_pages_per_kpage * pg_start; - io_pg_count = hp->io_pages_per_kpage * mem->page_count; - for (i = io_pg_start; i < io_pg_count + io_pg_start; i++) { - hp->gatt[i] = agp_bridge->scratch_page; - } - - agp_bridge->driver->tlb_flush(mem); - return 0; -} - -static unsigned long -hp_zx1_mask_memory (struct agp_bridge_data *bridge, dma_addr_t addr, int type) -{ - return HP_ZX1_PDIR_VALID_BIT | addr; -} - -static void -hp_zx1_enable (struct agp_bridge_data *bridge, u32 mode) -{ - struct _hp_private *hp = &hp_private; - u32 command; - - command = readl(hp->lba_regs+hp->lba_cap_offset+PCI_AGP_STATUS); - command = agp_collect_device_status(bridge, mode, command); - command |= 0x00000100; - - writel(command, hp->lba_regs+hp->lba_cap_offset+PCI_AGP_COMMAND); - - agp_device_command(command, (mode & AGP8X_MODE) != 0); -} - -const struct agp_bridge_driver hp_zx1_driver = { - .owner = THIS_MODULE, - .size_type = FIXED_APER_SIZE, - .configure = hp_zx1_configure, - .fetch_size = hp_zx1_fetch_size, - .cleanup = hp_zx1_cleanup, - .tlb_flush = hp_zx1_tlbflush, - .mask_memory = hp_zx1_mask_memory, - .masks = hp_zx1_masks, - .agp_enable = hp_zx1_enable, - .cache_flush = global_cache_flush, - .create_gatt_table = hp_zx1_create_gatt_table, - .free_gatt_table = hp_zx1_free_gatt_table, - .insert_memory = hp_zx1_insert_memory, - .remove_memory = hp_zx1_remove_memory, - .alloc_by_type = agp_generic_alloc_by_type, - .free_by_type = agp_generic_free_by_type, - .agp_alloc_page = agp_generic_alloc_page, - .agp_alloc_pages = agp_generic_alloc_pages, - .agp_destroy_page = agp_generic_destroy_page, - .agp_destroy_pages = agp_generic_destroy_pages, - .agp_type_to_mask_type = agp_generic_type_to_mask_type, - .cant_use_aperture = true, -}; - -static int __init -hp_zx1_setup (u64 ioc_hpa, u64 lba_hpa) -{ - struct agp_bridge_data *bridge; - int error = 0; - - error = hp_zx1_ioc_init(ioc_hpa); - if (error) - goto fail; - - error = hp_zx1_lba_init(lba_hpa); - if (error) - goto fail; - - bridge = agp_alloc_bridge(); - if (!bridge) { - error = -ENOMEM; - goto fail; - } - bridge->driver = &hp_zx1_driver; - - fake_bridge_dev.vendor = PCI_VENDOR_ID_HP; - fake_bridge_dev.device = PCI_DEVICE_ID_HP_PCIX_LBA; - bridge->dev = &fake_bridge_dev; - - error = agp_add_bridge(bridge); - fail: - if (error) - hp_zx1_cleanup(); - return error; -} - -static acpi_status __init -zx1_gart_probe (acpi_handle obj, u32 depth, void *context, void **ret) -{ - acpi_handle handle, parent; - acpi_status status; - struct acpi_device_info *info; - u64 lba_hpa, sba_hpa, length; - int match; - - status = hp_acpi_csr_space(obj, &lba_hpa, &length); - if (ACPI_FAILURE(status)) - return AE_OK; /* keep looking for another bridge */ - - /* Look for an enclosing IOC scope and find its CSR space */ - handle = obj; - do { - status = acpi_get_object_info(handle, &info); - if (ACPI_SUCCESS(status) && (info->valid & ACPI_VALID_HID)) { - /* TBD check _CID also */ - match = (strcmp(info->hardware_id.string, "HWP0001") == 0); - kfree(info); - if (match) { - status = hp_acpi_csr_space(handle, &sba_hpa, &length); - if (ACPI_SUCCESS(status)) - break; - else { - printk(KERN_ERR PFX "Detected HP ZX1 " - "AGP LBA but no IOC.\n"); - return AE_OK; - } - } - } - - status = acpi_get_parent(handle, &parent); - handle = parent; - } while (ACPI_SUCCESS(status)); - - if (ACPI_FAILURE(status)) - return AE_OK; /* found no enclosing IOC */ - - if (hp_zx1_setup(sba_hpa + HP_ZX1_IOC_OFFSET, lba_hpa)) - return AE_OK; - - printk(KERN_INFO PFX "Detected HP ZX1 %s AGP chipset " - "(ioc=%llx, lba=%llx)\n", (char *)context, - sba_hpa + HP_ZX1_IOC_OFFSET, lba_hpa); - - hp_zx1_gart_found = 1; - return AE_CTRL_TERMINATE; /* we only support one bridge; quit looking */ -} - -static int __init -agp_hp_init (void) -{ - if (agp_off) - return -EINVAL; - - acpi_get_devices("HWP0003", zx1_gart_probe, "HWP0003", NULL); - if (hp_zx1_gart_found) - return 0; - - acpi_get_devices("HWP0007", zx1_gart_probe, "HWP0007", NULL); - if (hp_zx1_gart_found) - return 0; - - return -ENODEV; -} - -static void __exit -agp_hp_cleanup (void) -{ -} - -module_init(agp_hp_init); -module_exit(agp_hp_cleanup); - -MODULE_LICENSE("GPL and additional rights"); diff --git a/drivers/char/agp/i460-agp.c b/drivers/char/agp/i460-agp.c deleted file mode 100644 index 15b240ea4848..000000000000 --- a/drivers/char/agp/i460-agp.c +++ /dev/null @@ -1,659 +0,0 @@ -/* - * For documentation on the i460 AGP interface, see Chapter 7 (AGP Subsystem) of - * the "Intel 460GTX Chipset Software Developer's Manual": - * http://www.intel.com/design/archives/itanium/downloads/248704.htm - */ -/* - * 460GX support by Chris Ahna - * Clean up & simplification by David Mosberger-Tang - */ -#include -#include -#include -#include -#include -#include -#include - -#include "agp.h" - -#define INTEL_I460_BAPBASE 0x98 -#define INTEL_I460_GXBCTL 0xa0 -#define INTEL_I460_AGPSIZ 0xa2 -#define INTEL_I460_ATTBASE 0xfe200000 -#define INTEL_I460_GATT_VALID (1UL << 24) -#define INTEL_I460_GATT_COHERENT (1UL << 25) - -/* - * The i460 can operate with large (4MB) pages, but there is no sane way to support this - * within the current kernel/DRM environment, so we disable the relevant code for now. - * See also comments in ia64_alloc_page()... - */ -#define I460_LARGE_IO_PAGES 0 - -#if I460_LARGE_IO_PAGES -# define I460_IO_PAGE_SHIFT i460.io_page_shift -#else -# define I460_IO_PAGE_SHIFT 12 -#endif - -#define I460_IOPAGES_PER_KPAGE (PAGE_SIZE >> I460_IO_PAGE_SHIFT) -#define I460_KPAGES_PER_IOPAGE (1 << (I460_IO_PAGE_SHIFT - PAGE_SHIFT)) -#define I460_SRAM_IO_DISABLE (1 << 4) -#define I460_BAPBASE_ENABLE (1 << 3) -#define I460_AGPSIZ_MASK 0x7 -#define I460_4M_PS (1 << 1) - -/* Control bits for Out-Of-GART coherency and Burst Write Combining */ -#define I460_GXBCTL_OOG (1UL << 0) -#define I460_GXBCTL_BWC (1UL << 2) - -/* - * gatt_table entries are 32-bits wide on the i460; the generic code ought to declare the - * gatt_table and gatt_table_real pointers a "void *"... - */ -#define RD_GATT(index) readl((u32 *) i460.gatt + (index)) -#define WR_GATT(index, val) writel((val), (u32 *) i460.gatt + (index)) -/* - * The 460 spec says we have to read the last location written to make sure that all - * writes have taken effect - */ -#define WR_FLUSH_GATT(index) RD_GATT(index) - -static unsigned long i460_mask_memory (struct agp_bridge_data *bridge, - dma_addr_t addr, int type); - -static struct { - void *gatt; /* ioremap'd GATT area */ - - /* i460 supports multiple GART page sizes, so GART pageshift is dynamic: */ - u8 io_page_shift; - - /* BIOS configures chipset to one of 2 possible apbase values: */ - u8 dynamic_apbase; - - /* structure for tracking partial use of 4MB GART pages: */ - struct lp_desc { - unsigned long *alloced_map; /* bitmap of kernel-pages in use */ - int refcount; /* number of kernel pages using the large page */ - u64 paddr; /* physical address of large page */ - struct page *page; /* page pointer */ - } *lp_desc; -} i460; - -static const struct aper_size_info_8 i460_sizes[3] = -{ - /* - * The 32GB aperture is only available with a 4M GART page size. Due to the - * dynamic GART page size, we can't figure out page_order or num_entries until - * runtime. - */ - {32768, 0, 0, 4}, - {1024, 0, 0, 2}, - {256, 0, 0, 1} -}; - -static struct gatt_mask i460_masks[] = -{ - { - .mask = INTEL_I460_GATT_VALID | INTEL_I460_GATT_COHERENT, - .type = 0 - } -}; - -static int i460_fetch_size (void) -{ - int i; - u8 temp; - struct aper_size_info_8 *values; - - /* Determine the GART page size */ - pci_read_config_byte(agp_bridge->dev, INTEL_I460_GXBCTL, &temp); - i460.io_page_shift = (temp & I460_4M_PS) ? 22 : 12; - pr_debug("i460_fetch_size: io_page_shift=%d\n", i460.io_page_shift); - - if (i460.io_page_shift != I460_IO_PAGE_SHIFT) { - printk(KERN_ERR PFX - "I/O (GART) page-size %luKB doesn't match expected " - "size %luKB\n", - 1UL << (i460.io_page_shift - 10), - 1UL << (I460_IO_PAGE_SHIFT)); - return 0; - } - - values = A_SIZE_8(agp_bridge->driver->aperture_sizes); - - pci_read_config_byte(agp_bridge->dev, INTEL_I460_AGPSIZ, &temp); - - /* Exit now if the IO drivers for the GART SRAMS are turned off */ - if (temp & I460_SRAM_IO_DISABLE) { - printk(KERN_ERR PFX "GART SRAMS disabled on 460GX chipset\n"); - printk(KERN_ERR PFX "AGPGART operation not possible\n"); - return 0; - } - - /* Make sure we don't try to create an 2 ^ 23 entry GATT */ - if ((i460.io_page_shift == 0) && ((temp & I460_AGPSIZ_MASK) == 4)) { - printk(KERN_ERR PFX "We can't have a 32GB aperture with 4KB GART pages\n"); - return 0; - } - - /* Determine the proper APBASE register */ - if (temp & I460_BAPBASE_ENABLE) - i460.dynamic_apbase = INTEL_I460_BAPBASE; - else - i460.dynamic_apbase = AGP_APBASE; - - for (i = 0; i < agp_bridge->driver->num_aperture_sizes; i++) { - /* - * Dynamically calculate the proper num_entries and page_order values for - * the define aperture sizes. Take care not to shift off the end of - * values[i].size. - */ - values[i].num_entries = (values[i].size << 8) >> (I460_IO_PAGE_SHIFT - 12); - values[i].page_order = ilog2((sizeof(u32)*values[i].num_entries) >> PAGE_SHIFT); - } - - for (i = 0; i < agp_bridge->driver->num_aperture_sizes; i++) { - /* Neglect control bits when matching up size_value */ - if ((temp & I460_AGPSIZ_MASK) == values[i].size_value) { - agp_bridge->previous_size = agp_bridge->current_size = (void *) (values + i); - agp_bridge->aperture_size_idx = i; - return values[i].size; - } - } - - return 0; -} - -/* There isn't anything to do here since 460 has no GART TLB. */ -static void i460_tlb_flush (struct agp_memory *mem) -{ - return; -} - -/* - * This utility function is needed to prevent corruption of the control bits - * which are stored along with the aperture size in 460's AGPSIZ register - */ -static void i460_write_agpsiz (u8 size_value) -{ - u8 temp; - - pci_read_config_byte(agp_bridge->dev, INTEL_I460_AGPSIZ, &temp); - pci_write_config_byte(agp_bridge->dev, INTEL_I460_AGPSIZ, - ((temp & ~I460_AGPSIZ_MASK) | size_value)); -} - -static void i460_cleanup (void) -{ - struct aper_size_info_8 *previous_size; - - previous_size = A_SIZE_8(agp_bridge->previous_size); - i460_write_agpsiz(previous_size->size_value); - - if (I460_IO_PAGE_SHIFT > PAGE_SHIFT) - kfree(i460.lp_desc); -} - -static int i460_configure (void) -{ - union { - u32 small[2]; - u64 large; - } temp; - size_t size; - u8 scratch; - struct aper_size_info_8 *current_size; - - temp.large = 0; - - current_size = A_SIZE_8(agp_bridge->current_size); - i460_write_agpsiz(current_size->size_value); - - /* - * Do the necessary rigmarole to read all eight bytes of APBASE. - * This has to be done since the AGP aperture can be above 4GB on - * 460 based systems. - */ - pci_read_config_dword(agp_bridge->dev, i460.dynamic_apbase, &(temp.small[0])); - pci_read_config_dword(agp_bridge->dev, i460.dynamic_apbase + 4, &(temp.small[1])); - - /* Clear BAR control bits */ - agp_bridge->gart_bus_addr = temp.large & ~((1UL << 3) - 1); - - pci_read_config_byte(agp_bridge->dev, INTEL_I460_GXBCTL, &scratch); - pci_write_config_byte(agp_bridge->dev, INTEL_I460_GXBCTL, - (scratch & 0x02) | I460_GXBCTL_OOG | I460_GXBCTL_BWC); - - /* - * Initialize partial allocation trackers if a GART page is bigger than a kernel - * page. - */ - if (I460_IO_PAGE_SHIFT > PAGE_SHIFT) { - size = current_size->num_entries * sizeof(i460.lp_desc[0]); - i460.lp_desc = kzalloc(size, GFP_KERNEL); - if (!i460.lp_desc) - return -ENOMEM; - } - return 0; -} - -static int i460_create_gatt_table (struct agp_bridge_data *bridge) -{ - int page_order, num_entries, i; - void *temp; - - /* - * Load up the fixed address of the GART SRAMS which hold our GATT table. - */ - temp = agp_bridge->current_size; - page_order = A_SIZE_8(temp)->page_order; - num_entries = A_SIZE_8(temp)->num_entries; - - i460.gatt = ioremap(INTEL_I460_ATTBASE, PAGE_SIZE << page_order); - if (!i460.gatt) { - printk(KERN_ERR PFX "ioremap failed\n"); - return -ENOMEM; - } - - /* These are no good, the should be removed from the agp_bridge strucure... */ - agp_bridge->gatt_table_real = NULL; - agp_bridge->gatt_table = NULL; - agp_bridge->gatt_bus_addr = 0; - - for (i = 0; i < num_entries; ++i) - WR_GATT(i, 0); - WR_FLUSH_GATT(i - 1); - return 0; -} - -static int i460_free_gatt_table (struct agp_bridge_data *bridge) -{ - int num_entries, i; - void *temp; - - temp = agp_bridge->current_size; - - num_entries = A_SIZE_8(temp)->num_entries; - - for (i = 0; i < num_entries; ++i) - WR_GATT(i, 0); - WR_FLUSH_GATT(num_entries - 1); - - iounmap(i460.gatt); - return 0; -} - -/* - * The following functions are called when the I/O (GART) page size is smaller than - * PAGE_SIZE. - */ - -static int i460_insert_memory_small_io_page (struct agp_memory *mem, - off_t pg_start, int type) -{ - unsigned long paddr, io_pg_start, io_page_size; - int i, j, k, num_entries; - void *temp; - - pr_debug("i460_insert_memory_small_io_page(mem=%p, pg_start=%ld, type=%d, paddr0=0x%lx)\n", - mem, pg_start, type, page_to_phys(mem->pages[0])); - - if (type >= AGP_USER_TYPES || mem->type >= AGP_USER_TYPES) - return -EINVAL; - - io_pg_start = I460_IOPAGES_PER_KPAGE * pg_start; - - temp = agp_bridge->current_size; - num_entries = A_SIZE_8(temp)->num_entries; - - if ((io_pg_start + I460_IOPAGES_PER_KPAGE * mem->page_count) > num_entries) { - printk(KERN_ERR PFX "Looks like we're out of AGP memory\n"); - return -EINVAL; - } - - j = io_pg_start; - while (j < (io_pg_start + I460_IOPAGES_PER_KPAGE * mem->page_count)) { - if (!PGE_EMPTY(agp_bridge, RD_GATT(j))) { - pr_debug("i460_insert_memory_small_io_page: GATT[%d]=0x%x is busy\n", - j, RD_GATT(j)); - return -EBUSY; - } - j++; - } - - io_page_size = 1UL << I460_IO_PAGE_SHIFT; - for (i = 0, j = io_pg_start; i < mem->page_count; i++) { - paddr = page_to_phys(mem->pages[i]); - for (k = 0; k < I460_IOPAGES_PER_KPAGE; k++, j++, paddr += io_page_size) - WR_GATT(j, i460_mask_memory(agp_bridge, paddr, mem->type)); - } - WR_FLUSH_GATT(j - 1); - return 0; -} - -static int i460_remove_memory_small_io_page(struct agp_memory *mem, - off_t pg_start, int type) -{ - int i; - - pr_debug("i460_remove_memory_small_io_page(mem=%p, pg_start=%ld, type=%d)\n", - mem, pg_start, type); - - pg_start = I460_IOPAGES_PER_KPAGE * pg_start; - - for (i = pg_start; i < (pg_start + I460_IOPAGES_PER_KPAGE * mem->page_count); i++) - WR_GATT(i, 0); - WR_FLUSH_GATT(i - 1); - return 0; -} - -#if I460_LARGE_IO_PAGES - -/* - * These functions are called when the I/O (GART) page size exceeds PAGE_SIZE. - * - * This situation is interesting since AGP memory allocations that are smaller than a - * single GART page are possible. The i460.lp_desc array tracks partial allocation of the - * large GART pages to work around this issue. - * - * i460.lp_desc[pg_num].refcount tracks the number of kernel pages in use within GART page - * pg_num. i460.lp_desc[pg_num].paddr is the physical address of the large page and - * i460.lp_desc[pg_num].alloced_map is a bitmap of kernel pages that are in use (allocated). - */ - -static int i460_alloc_large_page (struct lp_desc *lp) -{ - unsigned long order = I460_IO_PAGE_SHIFT - PAGE_SHIFT; - size_t map_size; - - lp->page = alloc_pages(GFP_KERNEL, order); - if (!lp->page) { - printk(KERN_ERR PFX "Couldn't alloc 4M GART page...\n"); - return -ENOMEM; - } - - map_size = ((I460_KPAGES_PER_IOPAGE + BITS_PER_LONG - 1) & -BITS_PER_LONG)/8; - lp->alloced_map = kzalloc(map_size, GFP_KERNEL); - if (!lp->alloced_map) { - __free_pages(lp->page, order); - printk(KERN_ERR PFX "Out of memory, we're in trouble...\n"); - return -ENOMEM; - } - - lp->paddr = page_to_phys(lp->page); - lp->refcount = 0; - atomic_add(I460_KPAGES_PER_IOPAGE, &agp_bridge->current_memory_agp); - return 0; -} - -static void i460_free_large_page (struct lp_desc *lp) -{ - kfree(lp->alloced_map); - lp->alloced_map = NULL; - - __free_pages(lp->page, I460_IO_PAGE_SHIFT - PAGE_SHIFT); - atomic_sub(I460_KPAGES_PER_IOPAGE, &agp_bridge->current_memory_agp); -} - -static int i460_insert_memory_large_io_page (struct agp_memory *mem, - off_t pg_start, int type) -{ - int i, start_offset, end_offset, idx, pg, num_entries; - struct lp_desc *start, *end, *lp; - void *temp; - - if (type >= AGP_USER_TYPES || mem->type >= AGP_USER_TYPES) - return -EINVAL; - - temp = agp_bridge->current_size; - num_entries = A_SIZE_8(temp)->num_entries; - - /* Figure out what pg_start means in terms of our large GART pages */ - start = &i460.lp_desc[pg_start / I460_KPAGES_PER_IOPAGE]; - end = &i460.lp_desc[(pg_start + mem->page_count - 1) / I460_KPAGES_PER_IOPAGE]; - start_offset = pg_start % I460_KPAGES_PER_IOPAGE; - end_offset = (pg_start + mem->page_count - 1) % I460_KPAGES_PER_IOPAGE; - - if (end > i460.lp_desc + num_entries) { - printk(KERN_ERR PFX "Looks like we're out of AGP memory\n"); - return -EINVAL; - } - - /* Check if the requested region of the aperture is free */ - for (lp = start; lp <= end; ++lp) { - if (!lp->alloced_map) - continue; /* OK, the entire large page is available... */ - - for (idx = ((lp == start) ? start_offset : 0); - idx < ((lp == end) ? (end_offset + 1) : I460_KPAGES_PER_IOPAGE); - idx++) - { - if (test_bit(idx, lp->alloced_map)) - return -EBUSY; - } - } - - for (lp = start, i = 0; lp <= end; ++lp) { - if (!lp->alloced_map) { - /* Allocate new GART pages... */ - if (i460_alloc_large_page(lp) < 0) - return -ENOMEM; - pg = lp - i460.lp_desc; - WR_GATT(pg, i460_mask_memory(agp_bridge, - lp->paddr, 0)); - WR_FLUSH_GATT(pg); - } - - for (idx = ((lp == start) ? start_offset : 0); - idx < ((lp == end) ? (end_offset + 1) : I460_KPAGES_PER_IOPAGE); - idx++, i++) - { - mem->pages[i] = lp->page; - __set_bit(idx, lp->alloced_map); - ++lp->refcount; - } - } - return 0; -} - -static int i460_remove_memory_large_io_page (struct agp_memory *mem, - off_t pg_start, int type) -{ - int i, pg, start_offset, end_offset, idx, num_entries; - struct lp_desc *start, *end, *lp; - void *temp; - - temp = agp_bridge->current_size; - num_entries = A_SIZE_8(temp)->num_entries; - - /* Figure out what pg_start means in terms of our large GART pages */ - start = &i460.lp_desc[pg_start / I460_KPAGES_PER_IOPAGE]; - end = &i460.lp_desc[(pg_start + mem->page_count - 1) / I460_KPAGES_PER_IOPAGE]; - start_offset = pg_start % I460_KPAGES_PER_IOPAGE; - end_offset = (pg_start + mem->page_count - 1) % I460_KPAGES_PER_IOPAGE; - - for (i = 0, lp = start; lp <= end; ++lp) { - for (idx = ((lp == start) ? start_offset : 0); - idx < ((lp == end) ? (end_offset + 1) : I460_KPAGES_PER_IOPAGE); - idx++, i++) - { - mem->pages[i] = NULL; - __clear_bit(idx, lp->alloced_map); - --lp->refcount; - } - - /* Free GART pages if they are unused */ - if (lp->refcount == 0) { - pg = lp - i460.lp_desc; - WR_GATT(pg, 0); - WR_FLUSH_GATT(pg); - i460_free_large_page(lp); - } - } - return 0; -} - -/* Wrapper routines to call the approriate {small_io_page,large_io_page} function */ - -static int i460_insert_memory (struct agp_memory *mem, - off_t pg_start, int type) -{ - if (I460_IO_PAGE_SHIFT <= PAGE_SHIFT) - return i460_insert_memory_small_io_page(mem, pg_start, type); - else - return i460_insert_memory_large_io_page(mem, pg_start, type); -} - -static int i460_remove_memory (struct agp_memory *mem, - off_t pg_start, int type) -{ - if (I460_IO_PAGE_SHIFT <= PAGE_SHIFT) - return i460_remove_memory_small_io_page(mem, pg_start, type); - else - return i460_remove_memory_large_io_page(mem, pg_start, type); -} - -/* - * If the I/O (GART) page size is bigger than the kernel page size, we don't want to - * allocate memory until we know where it is to be bound in the aperture (a - * multi-kernel-page alloc might fit inside of an already allocated GART page). - * - * Let's just hope nobody counts on the allocated AGP memory being there before bind time - * (I don't think current drivers do)... - */ -static struct page *i460_alloc_page (struct agp_bridge_data *bridge) -{ - void *page; - - if (I460_IO_PAGE_SHIFT <= PAGE_SHIFT) { - page = agp_generic_alloc_page(agp_bridge); - } else - /* Returning NULL would cause problems */ - /* AK: really dubious code. */ - page = (void *)~0UL; - return page; -} - -static void i460_destroy_page (struct page *page, int flags) -{ - if (I460_IO_PAGE_SHIFT <= PAGE_SHIFT) { - agp_generic_destroy_page(page, flags); - } -} - -#endif /* I460_LARGE_IO_PAGES */ - -static unsigned long i460_mask_memory (struct agp_bridge_data *bridge, - dma_addr_t addr, int type) -{ - /* Make sure the returned address is a valid GATT entry */ - return bridge->driver->masks[0].mask - | (((addr & ~((1 << I460_IO_PAGE_SHIFT) - 1)) & 0xfffff000) >> 12); -} - -const struct agp_bridge_driver intel_i460_driver = { - .owner = THIS_MODULE, - .aperture_sizes = i460_sizes, - .size_type = U8_APER_SIZE, - .num_aperture_sizes = 3, - .configure = i460_configure, - .fetch_size = i460_fetch_size, - .cleanup = i460_cleanup, - .tlb_flush = i460_tlb_flush, - .mask_memory = i460_mask_memory, - .masks = i460_masks, - .agp_enable = agp_generic_enable, - .cache_flush = global_cache_flush, - .create_gatt_table = i460_create_gatt_table, - .free_gatt_table = i460_free_gatt_table, -#if I460_LARGE_IO_PAGES - .insert_memory = i460_insert_memory, - .remove_memory = i460_remove_memory, - .agp_alloc_page = i460_alloc_page, - .agp_destroy_page = i460_destroy_page, -#else - .insert_memory = i460_insert_memory_small_io_page, - .remove_memory = i460_remove_memory_small_io_page, - .agp_alloc_page = agp_generic_alloc_page, - .agp_alloc_pages = agp_generic_alloc_pages, - .agp_destroy_page = agp_generic_destroy_page, - .agp_destroy_pages = agp_generic_destroy_pages, -#endif - .alloc_by_type = agp_generic_alloc_by_type, - .free_by_type = agp_generic_free_by_type, - .agp_type_to_mask_type = agp_generic_type_to_mask_type, - .cant_use_aperture = true, -}; - -static int agp_intel_i460_probe(struct pci_dev *pdev, - const struct pci_device_id *ent) -{ - struct agp_bridge_data *bridge; - u8 cap_ptr; - - cap_ptr = pci_find_capability(pdev, PCI_CAP_ID_AGP); - if (!cap_ptr) - return -ENODEV; - - bridge = agp_alloc_bridge(); - if (!bridge) - return -ENOMEM; - - bridge->driver = &intel_i460_driver; - bridge->dev = pdev; - bridge->capndx = cap_ptr; - - printk(KERN_INFO PFX "Detected Intel 460GX chipset\n"); - - pci_set_drvdata(pdev, bridge); - return agp_add_bridge(bridge); -} - -static void agp_intel_i460_remove(struct pci_dev *pdev) -{ - struct agp_bridge_data *bridge = pci_get_drvdata(pdev); - - agp_remove_bridge(bridge); - agp_put_bridge(bridge); -} - -static struct pci_device_id agp_intel_i460_pci_table[] = { - { - .class = (PCI_CLASS_BRIDGE_HOST << 8), - .class_mask = ~0, - .vendor = PCI_VENDOR_ID_INTEL, - .device = PCI_DEVICE_ID_INTEL_84460GX, - .subvendor = PCI_ANY_ID, - .subdevice = PCI_ANY_ID, - }, - { } -}; - -MODULE_DEVICE_TABLE(pci, agp_intel_i460_pci_table); - -static struct pci_driver agp_intel_i460_pci_driver = { - .name = "agpgart-intel-i460", - .id_table = agp_intel_i460_pci_table, - .probe = agp_intel_i460_probe, - .remove = agp_intel_i460_remove, -}; - -static int __init agp_intel_i460_init(void) -{ - if (agp_off) - return -EINVAL; - return pci_register_driver(&agp_intel_i460_pci_driver); -} - -static void __exit agp_intel_i460_cleanup(void) -{ - pci_unregister_driver(&agp_intel_i460_pci_driver); -} - -module_init(agp_intel_i460_init); -module_exit(agp_intel_i460_cleanup); - -MODULE_AUTHOR("Chris Ahna "); -MODULE_LICENSE("GPL and additional rights"); diff --git a/drivers/char/hpet.c b/drivers/char/hpet.c index ee71376f174b..3b2159416e62 100644 --- a/drivers/char/hpet.c +++ b/drivers/char/hpet.c @@ -64,25 +64,6 @@ static DEFINE_MUTEX(hpet_mutex); /* replaces BKL */ static u32 hpet_nhpet, hpet_max_freq = HPET_USER_FREQ; -/* This clocksource driver currently only works on ia64 */ -#ifdef CONFIG_IA64 -static void __iomem *hpet_mctr; - -static u64 read_hpet(struct clocksource *cs) -{ - return (u64)read_counter((void __iomem *)hpet_mctr); -} - -static struct clocksource clocksource_hpet = { - .name = "hpet", - .rating = 250, - .read = read_hpet, - .mask = CLOCKSOURCE_MASK(64), - .flags = CLOCK_SOURCE_IS_CONTINUOUS, -}; -static struct clocksource *hpet_clocksource; -#endif - /* A lock for concurrent access by app and isr hpet activity. */ static DEFINE_SPINLOCK(hpet_lock); @@ -907,17 +888,6 @@ int hpet_alloc(struct hpet_data *hdp) hpetp->hp_delta = hpet_calibrate(hpetp); -/* This clocksource driver currently only works on ia64 */ -#ifdef CONFIG_IA64 - if (!hpet_clocksource) { - hpet_mctr = (void __iomem *)&hpetp->hp_hpet->hpet_mc; - clocksource_hpet.archdata.fsys_mmio = hpet_mctr; - clocksource_register_hz(&clocksource_hpet, hpetp->hp_tick_freq); - hpetp->hp_clocksource = &clocksource_hpet; - hpet_clocksource = &clocksource_hpet; - } -#endif - return 0; } diff --git a/drivers/char/hw_random/Kconfig b/drivers/char/hw_random/Kconfig index 8de74dcfa18c..442c40efb200 100644 --- a/drivers/char/hw_random/Kconfig +++ b/drivers/char/hw_random/Kconfig @@ -37,7 +37,7 @@ config HW_RANDOM_TIMERIOMEM config HW_RANDOM_INTEL tristate "Intel HW Random Number Generator support" - depends on (X86 || IA64 || COMPILE_TEST) && PCI + depends on (X86 || COMPILE_TEST) && PCI default HW_RANDOM help This driver provides kernel-side support for the Random Number diff --git a/drivers/char/mem.c b/drivers/char/mem.c index 1052b0f2d4cf..8d27aa6b5b50 100644 --- a/drivers/char/mem.c +++ b/drivers/char/mem.c @@ -31,10 +31,6 @@ #include #include -#ifdef CONFIG_IA64 -# include -#endif - #define DEVMEM_MINOR 1 #define DEVPORT_MINOR 4 @@ -277,13 +273,6 @@ int __weak phys_mem_access_prot_allowed(struct file *file, #ifdef pgprot_noncached static int uncached_access(struct file *file, phys_addr_t addr) { -#if defined(CONFIG_IA64) - /* - * On ia64, we ignore O_DSYNC because we cannot tolerate memory - * attribute aliases. - */ - return !(efi_mem_attributes(addr) & EFI_MEMORY_WB); -#else /* * Accessing memory above the top the kernel knows about or through a * file pointer @@ -292,7 +281,6 @@ static int uncached_access(struct file *file, phys_addr_t addr) if (file->f_flags & O_DSYNC) return 1; return addr >= __pa(high_memory); -#endif } #endif diff --git a/drivers/char/mspec.c b/drivers/char/mspec.c deleted file mode 100644 index b35f651837c8..000000000000 --- a/drivers/char/mspec.c +++ /dev/null @@ -1,295 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Copyright (C) 2001-2006 Silicon Graphics, Inc. All rights - * reserved. - */ - -/* - * SN Platform Special Memory (mspec) Support - * - * This driver exports the SN special memory (mspec) facility to user - * processes. - * There are two types of memory made available thru this driver: - * uncached and cached. - * - * Uncached are used for memory write combining feature of the ia64 - * cpu. - * - * Cached are used for areas of memory that are used as cached addresses - * on our partition and used as uncached addresses from other partitions. - * Due to a design constraint of the SN2 Shub, you can not have processors - * on the same FSB perform both a cached and uncached reference to the - * same cache line. These special memory cached regions prevent the - * kernel from ever dropping in a TLB entry and therefore prevent the - * processor from ever speculating a cache line from this page. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - - -#define CACHED_ID "Cached," -#define UNCACHED_ID "Uncached" -#define REVISION "4.0" -#define MSPEC_BASENAME "mspec" - -/* - * Page types allocated by the device. - */ -enum mspec_page_type { - MSPEC_CACHED = 2, - MSPEC_UNCACHED -}; - -/* - * One of these structures is allocated when an mspec region is mmaped. The - * structure is pointed to by the vma->vm_private_data field in the vma struct. - * This structure is used to record the addresses of the mspec pages. - * This structure is shared by all vma's that are split off from the - * original vma when split_vma()'s are done. - * - * The refcnt is incremented atomically because mm->mmap_lock does not - * protect in fork case where multiple tasks share the vma_data. - */ -struct vma_data { - refcount_t refcnt; /* Number of vmas sharing the data. */ - spinlock_t lock; /* Serialize access to this structure. */ - int count; /* Number of pages allocated. */ - enum mspec_page_type type; /* Type of pages allocated. */ - unsigned long vm_start; /* Original (unsplit) base. */ - unsigned long vm_end; /* Original (unsplit) end. */ - unsigned long maddr[]; /* Array of MSPEC addresses. */ -}; - -/* - * mspec_open - * - * Called when a device mapping is created by a means other than mmap - * (via fork, munmap, etc.). Increments the reference count on the - * underlying mspec data so it is not freed prematurely. - */ -static void -mspec_open(struct vm_area_struct *vma) -{ - struct vma_data *vdata; - - vdata = vma->vm_private_data; - refcount_inc(&vdata->refcnt); -} - -/* - * mspec_close - * - * Called when unmapping a device mapping. Frees all mspec pages - * belonging to all the vma's sharing this vma_data structure. - */ -static void -mspec_close(struct vm_area_struct *vma) -{ - struct vma_data *vdata; - int index, last_index; - unsigned long my_page; - - vdata = vma->vm_private_data; - - if (!refcount_dec_and_test(&vdata->refcnt)) - return; - - last_index = (vdata->vm_end - vdata->vm_start) >> PAGE_SHIFT; - for (index = 0; index < last_index; index++) { - if (vdata->maddr[index] == 0) - continue; - /* - * Clear the page before sticking it back - * into the pool. - */ - my_page = vdata->maddr[index]; - vdata->maddr[index] = 0; - memset((char *)my_page, 0, PAGE_SIZE); - uncached_free_page(my_page, 1); - } - - kvfree(vdata); -} - -/* - * mspec_fault - * - * Creates a mspec page and maps it to user space. - */ -static vm_fault_t -mspec_fault(struct vm_fault *vmf) -{ - unsigned long paddr, maddr; - unsigned long pfn; - pgoff_t index = vmf->pgoff; - struct vma_data *vdata = vmf->vma->vm_private_data; - - maddr = (volatile unsigned long) vdata->maddr[index]; - if (maddr == 0) { - maddr = uncached_alloc_page(numa_node_id(), 1); - if (maddr == 0) - return VM_FAULT_OOM; - - spin_lock(&vdata->lock); - if (vdata->maddr[index] == 0) { - vdata->count++; - vdata->maddr[index] = maddr; - } else { - uncached_free_page(maddr, 1); - maddr = vdata->maddr[index]; - } - spin_unlock(&vdata->lock); - } - - paddr = maddr & ~__IA64_UNCACHED_OFFSET; - pfn = paddr >> PAGE_SHIFT; - - return vmf_insert_pfn(vmf->vma, vmf->address, pfn); -} - -static const struct vm_operations_struct mspec_vm_ops = { - .open = mspec_open, - .close = mspec_close, - .fault = mspec_fault, -}; - -/* - * mspec_mmap - * - * Called when mmapping the device. Initializes the vma with a fault handler - * and private data structure necessary to allocate, track, and free the - * underlying pages. - */ -static int -mspec_mmap(struct file *file, struct vm_area_struct *vma, - enum mspec_page_type type) -{ - struct vma_data *vdata; - int pages, vdata_size; - - if (vma->vm_pgoff != 0) - return -EINVAL; - - if ((vma->vm_flags & VM_SHARED) == 0) - return -EINVAL; - - if ((vma->vm_flags & VM_WRITE) == 0) - return -EPERM; - - pages = vma_pages(vma); - vdata_size = sizeof(struct vma_data) + pages * sizeof(long); - vdata = kvzalloc(vdata_size, GFP_KERNEL); - if (!vdata) - return -ENOMEM; - - vdata->vm_start = vma->vm_start; - vdata->vm_end = vma->vm_end; - vdata->type = type; - spin_lock_init(&vdata->lock); - refcount_set(&vdata->refcnt, 1); - vma->vm_private_data = vdata; - - vm_flags_set(vma, VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP); - if (vdata->type == MSPEC_UNCACHED) - vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); - vma->vm_ops = &mspec_vm_ops; - - return 0; -} - -static int -cached_mmap(struct file *file, struct vm_area_struct *vma) -{ - return mspec_mmap(file, vma, MSPEC_CACHED); -} - -static int -uncached_mmap(struct file *file, struct vm_area_struct *vma) -{ - return mspec_mmap(file, vma, MSPEC_UNCACHED); -} - -static const struct file_operations cached_fops = { - .owner = THIS_MODULE, - .mmap = cached_mmap, - .llseek = noop_llseek, -}; - -static struct miscdevice cached_miscdev = { - .minor = MISC_DYNAMIC_MINOR, - .name = "mspec_cached", - .fops = &cached_fops -}; - -static const struct file_operations uncached_fops = { - .owner = THIS_MODULE, - .mmap = uncached_mmap, - .llseek = noop_llseek, -}; - -static struct miscdevice uncached_miscdev = { - .minor = MISC_DYNAMIC_MINOR, - .name = "mspec_uncached", - .fops = &uncached_fops -}; - -/* - * mspec_init - * - * Called at boot time to initialize the mspec facility. - */ -static int __init -mspec_init(void) -{ - int ret; - - ret = misc_register(&cached_miscdev); - if (ret) { - printk(KERN_ERR "%s: failed to register device %i\n", - CACHED_ID, ret); - return ret; - } - ret = misc_register(&uncached_miscdev); - if (ret) { - printk(KERN_ERR "%s: failed to register device %i\n", - UNCACHED_ID, ret); - misc_deregister(&cached_miscdev); - return ret; - } - - printk(KERN_INFO "%s %s initialized devices: %s %s\n", - MSPEC_BASENAME, REVISION, CACHED_ID, UNCACHED_ID); - - return 0; -} - -static void __exit -mspec_exit(void) -{ - misc_deregister(&uncached_miscdev); - misc_deregister(&cached_miscdev); -} - -module_init(mspec_init); -module_exit(mspec_exit); - -MODULE_AUTHOR("Silicon Graphics, Inc. "); -MODULE_DESCRIPTION("Driver for SGI SN special memory operations"); -MODULE_LICENSE("GPL"); diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig index f429b9b37b76..35efb53d5492 100644 --- a/drivers/cpufreq/Kconfig +++ b/drivers/cpufreq/Kconfig @@ -239,17 +239,6 @@ if PPC32 || PPC64 source "drivers/cpufreq/Kconfig.powerpc" endif -if IA64 -config IA64_ACPI_CPUFREQ - tristate "ACPI Processor P-States driver" - depends on ACPI_PROCESSOR - help - This driver adds a CPUFreq driver which utilizes the ACPI - Processor Performance States. - - If in doubt, say N. -endif - if MIPS config BMIPS_CPUFREQ tristate "BMIPS CPUfreq Driver" diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile index ef8510774913..8d141c71b016 100644 --- a/drivers/cpufreq/Makefile +++ b/drivers/cpufreq/Makefile @@ -102,7 +102,6 @@ obj-$(CONFIG_POWERNV_CPUFREQ) += powernv-cpufreq.o ################################################################################## # Other platform drivers obj-$(CONFIG_BMIPS_CPUFREQ) += bmips-cpufreq.o -obj-$(CONFIG_IA64_ACPI_CPUFREQ) += ia64-acpi-cpufreq.o obj-$(CONFIG_LOONGSON2_CPUFREQ) += loongson2_cpufreq.o obj-$(CONFIG_SH_CPU_FREQ) += sh-cpufreq.o obj-$(CONFIG_SPARC_US2E_CPUFREQ) += sparc-us2e-cpufreq.o diff --git a/drivers/cpufreq/ia64-acpi-cpufreq.c b/drivers/cpufreq/ia64-acpi-cpufreq.c deleted file mode 100644 index c6bdc455517f..000000000000 --- a/drivers/cpufreq/ia64-acpi-cpufreq.c +++ /dev/null @@ -1,353 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * This file provides the ACPI based P-state support. This - * module works with generic cpufreq infrastructure. Most of - * the code is based on i386 version - * (arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c) - * - * Copyright (C) 2005 Intel Corp - * Venkatesh Pallipadi - */ - -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -MODULE_AUTHOR("Venkatesh Pallipadi"); -MODULE_DESCRIPTION("ACPI Processor P-States Driver"); -MODULE_LICENSE("GPL"); - -struct cpufreq_acpi_io { - struct acpi_processor_performance acpi_data; - unsigned int resume; -}; - -struct cpufreq_acpi_req { - unsigned int cpu; - unsigned int state; -}; - -static struct cpufreq_acpi_io *acpi_io_data[NR_CPUS]; - -static struct cpufreq_driver acpi_cpufreq_driver; - - -static int -processor_set_pstate ( - u32 value) -{ - s64 retval; - - pr_debug("processor_set_pstate\n"); - - retval = ia64_pal_set_pstate((u64)value); - - if (retval) { - pr_debug("Failed to set freq to 0x%x, with error 0x%llx\n", - value, retval); - return -ENODEV; - } - return (int)retval; -} - - -static int -processor_get_pstate ( - u32 *value) -{ - u64 pstate_index = 0; - s64 retval; - - pr_debug("processor_get_pstate\n"); - - retval = ia64_pal_get_pstate(&pstate_index, - PAL_GET_PSTATE_TYPE_INSTANT); - *value = (u32) pstate_index; - - if (retval) - pr_debug("Failed to get current freq with " - "error 0x%llx, idx 0x%x\n", retval, *value); - - return (int)retval; -} - - -/* To be used only after data->acpi_data is initialized */ -static unsigned -extract_clock ( - struct cpufreq_acpi_io *data, - unsigned value) -{ - unsigned long i; - - pr_debug("extract_clock\n"); - - for (i = 0; i < data->acpi_data.state_count; i++) { - if (value == data->acpi_data.states[i].status) - return data->acpi_data.states[i].core_frequency; - } - return data->acpi_data.states[i-1].core_frequency; -} - - -static long -processor_get_freq ( - void *arg) -{ - struct cpufreq_acpi_req *req = arg; - unsigned int cpu = req->cpu; - struct cpufreq_acpi_io *data = acpi_io_data[cpu]; - u32 value; - int ret; - - pr_debug("processor_get_freq\n"); - if (smp_processor_id() != cpu) - return -EAGAIN; - - /* processor_get_pstate gets the instantaneous frequency */ - ret = processor_get_pstate(&value); - if (ret) { - pr_warn("get performance failed with error %d\n", ret); - return ret; - } - return 1000 * extract_clock(data, value); -} - - -static long -processor_set_freq ( - void *arg) -{ - struct cpufreq_acpi_req *req = arg; - unsigned int cpu = req->cpu; - struct cpufreq_acpi_io *data = acpi_io_data[cpu]; - int ret, state = req->state; - u32 value; - - pr_debug("processor_set_freq\n"); - if (smp_processor_id() != cpu) - return -EAGAIN; - - if (state == data->acpi_data.state) { - if (unlikely(data->resume)) { - pr_debug("Called after resume, resetting to P%d\n", state); - data->resume = 0; - } else { - pr_debug("Already at target state (P%d)\n", state); - return 0; - } - } - - pr_debug("Transitioning from P%d to P%d\n", - data->acpi_data.state, state); - - /* - * First we write the target state's 'control' value to the - * control_register. - */ - value = (u32) data->acpi_data.states[state].control; - - pr_debug("Transitioning to state: 0x%08x\n", value); - - ret = processor_set_pstate(value); - if (ret) { - pr_warn("Transition failed with error %d\n", ret); - return -ENODEV; - } - - data->acpi_data.state = state; - return 0; -} - - -static unsigned int -acpi_cpufreq_get ( - unsigned int cpu) -{ - struct cpufreq_acpi_req req; - long ret; - - req.cpu = cpu; - ret = work_on_cpu(cpu, processor_get_freq, &req); - - return ret > 0 ? (unsigned int) ret : 0; -} - - -static int -acpi_cpufreq_target ( - struct cpufreq_policy *policy, - unsigned int index) -{ - struct cpufreq_acpi_req req; - - req.cpu = policy->cpu; - req.state = index; - - return work_on_cpu(req.cpu, processor_set_freq, &req); -} - -static int -acpi_cpufreq_cpu_init ( - struct cpufreq_policy *policy) -{ - unsigned int i; - unsigned int cpu = policy->cpu; - struct cpufreq_acpi_io *data; - unsigned int result = 0; - struct cpufreq_frequency_table *freq_table; - - pr_debug("acpi_cpufreq_cpu_init\n"); - - data = kzalloc(sizeof(*data), GFP_KERNEL); - if (!data) - return (-ENOMEM); - - acpi_io_data[cpu] = data; - - result = acpi_processor_register_performance(&data->acpi_data, cpu); - - if (result) - goto err_free; - - /* capability check */ - if (data->acpi_data.state_count <= 1) { - pr_debug("No P-States\n"); - result = -ENODEV; - goto err_unreg; - } - - if ((data->acpi_data.control_register.space_id != - ACPI_ADR_SPACE_FIXED_HARDWARE) || - (data->acpi_data.status_register.space_id != - ACPI_ADR_SPACE_FIXED_HARDWARE)) { - pr_debug("Unsupported address space [%d, %d]\n", - (u32) (data->acpi_data.control_register.space_id), - (u32) (data->acpi_data.status_register.space_id)); - result = -ENODEV; - goto err_unreg; - } - - /* alloc freq_table */ - freq_table = kcalloc(data->acpi_data.state_count + 1, - sizeof(*freq_table), - GFP_KERNEL); - if (!freq_table) { - result = -ENOMEM; - goto err_unreg; - } - - /* detect transition latency */ - policy->cpuinfo.transition_latency = 0; - for (i=0; iacpi_data.state_count; i++) { - if ((data->acpi_data.states[i].transition_latency * 1000) > - policy->cpuinfo.transition_latency) { - policy->cpuinfo.transition_latency = - data->acpi_data.states[i].transition_latency * 1000; - } - } - - /* table init */ - for (i = 0; i <= data->acpi_data.state_count; i++) - { - if (i < data->acpi_data.state_count) { - freq_table[i].frequency = - data->acpi_data.states[i].core_frequency * 1000; - } else { - freq_table[i].frequency = CPUFREQ_TABLE_END; - } - } - - policy->freq_table = freq_table; - - /* notify BIOS that we exist */ - acpi_processor_notify_smm(THIS_MODULE); - - pr_info("CPU%u - ACPI performance management activated\n", cpu); - - for (i = 0; i < data->acpi_data.state_count; i++) - pr_debug(" %cP%d: %d MHz, %d mW, %d uS, %d uS, 0x%x 0x%x\n", - (i == data->acpi_data.state?'*':' '), i, - (u32) data->acpi_data.states[i].core_frequency, - (u32) data->acpi_data.states[i].power, - (u32) data->acpi_data.states[i].transition_latency, - (u32) data->acpi_data.states[i].bus_master_latency, - (u32) data->acpi_data.states[i].status, - (u32) data->acpi_data.states[i].control); - - /* the first call to ->target() should result in us actually - * writing something to the appropriate registers. */ - data->resume = 1; - - return (result); - - err_unreg: - acpi_processor_unregister_performance(cpu); - err_free: - kfree(data); - acpi_io_data[cpu] = NULL; - - return (result); -} - - -static int -acpi_cpufreq_cpu_exit ( - struct cpufreq_policy *policy) -{ - struct cpufreq_acpi_io *data = acpi_io_data[policy->cpu]; - - pr_debug("acpi_cpufreq_cpu_exit\n"); - - if (data) { - acpi_io_data[policy->cpu] = NULL; - acpi_processor_unregister_performance(policy->cpu); - kfree(policy->freq_table); - kfree(data); - } - - return (0); -} - - -static struct cpufreq_driver acpi_cpufreq_driver = { - .verify = cpufreq_generic_frequency_table_verify, - .target_index = acpi_cpufreq_target, - .get = acpi_cpufreq_get, - .init = acpi_cpufreq_cpu_init, - .exit = acpi_cpufreq_cpu_exit, - .name = "acpi-cpufreq", - .attr = cpufreq_generic_attr, -}; - - -static int __init -acpi_cpufreq_init (void) -{ - pr_debug("acpi_cpufreq_init\n"); - - return cpufreq_register_driver(&acpi_cpufreq_driver); -} - - -static void __exit -acpi_cpufreq_exit (void) -{ - pr_debug("acpi_cpufreq_exit\n"); - - cpufreq_unregister_driver(&acpi_cpufreq_driver); -} - -late_initcall(acpi_cpufreq_init); -module_exit(acpi_cpufreq_exit); diff --git a/drivers/firmware/Kconfig b/drivers/firmware/Kconfig index b59e3041fd62..a79579fea6f0 100644 --- a/drivers/firmware/Kconfig +++ b/drivers/firmware/Kconfig @@ -77,30 +77,6 @@ config FIRMWARE_MEMMAP See also Documentation/ABI/testing/sysfs-firmware-memmap. -config EFI_PCDP - bool "Console device selection via EFI PCDP or HCDP table" - depends on ACPI && EFI && IA64 - default y if IA64 - help - If your firmware supplies the PCDP table, and you want to - automatically use the primary console device it describes - as the Linux console, say Y here. - - If your firmware supplies the HCDP table, and you want to - use the first serial port it describes as the Linux console, - say Y here. If your EFI ConOut path contains only a UART - device, it will become the console automatically. Otherwise, - you must specify the "console=hcdp" kernel boot argument. - - Neither the PCDP nor the HCDP affects naming of serial devices, - so a serial console may be /dev/ttyS0, /dev/ttyS1, etc, depending - on how the driver discovers devices. - - You must also enable the appropriate drivers (serial, VGA, etc.) - - See DIG64_HCDPv20_042804.pdf available from - - config DMIID bool "Export DMI identification via sysfs to userspace" depends on DMI diff --git a/drivers/firmware/Makefile b/drivers/firmware/Makefile index 28fcddcd688f..1d1eb671d805 100644 --- a/drivers/firmware/Makefile +++ b/drivers/firmware/Makefile @@ -8,7 +8,6 @@ obj-$(CONFIG_ARM_SDE_INTERFACE) += arm_sdei.o obj-$(CONFIG_DMI) += dmi_scan.o obj-$(CONFIG_DMI_SYSFS) += dmi-sysfs.o obj-$(CONFIG_EDD) += edd.o -obj-$(CONFIG_EFI_PCDP) += pcdp.o obj-$(CONFIG_DMIID) += dmi-id.o obj-$(CONFIG_INTEL_STRATIX10_SERVICE) += stratix10-svc.o obj-$(CONFIG_INTEL_STRATIX10_RSU) += stratix10-rsu.o diff --git a/drivers/firmware/efi/Kconfig b/drivers/firmware/efi/Kconfig index 231f1c70d1db..cb374b2da9b7 100644 --- a/drivers/firmware/efi/Kconfig +++ b/drivers/firmware/efi/Kconfig @@ -4,7 +4,7 @@ menu "EFI (Extensible Firmware Interface) Support" config EFI_ESRT bool - depends on EFI && !IA64 + depends on EFI default y config EFI_VARS_PSTORE @@ -123,7 +123,7 @@ config EFI_BOOTLOADER_CONTROL config EFI_CAPSULE_LOADER tristate "EFI capsule loader" - depends on EFI && !IA64 + depends on EFI help This option exposes a loader interface "/dev/efi_capsule_loader" for users to load EFI capsules. This driver requires working runtime @@ -224,7 +224,7 @@ config EFI_DISABLE_PCI_DMA config EFI_EARLYCON def_bool y - depends on SERIAL_EARLYCON && !ARM && !IA64 + depends on SERIAL_EARLYCON && !ARM select FONT_SUPPORT select ARCH_USE_MEMREMAP_PROT diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c index 1599f1176842..47ca652f02f6 100644 --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c @@ -147,7 +147,7 @@ static ssize_t systab_show(struct kobject *kobj, if (efi.smbios != EFI_INVALID_TABLE_ADDR) str += sprintf(str, "SMBIOS=0x%lx\n", efi.smbios); - if (IS_ENABLED(CONFIG_IA64) || IS_ENABLED(CONFIG_X86)) + if (IS_ENABLED(CONFIG_X86)) str = efi_systab_show_arch(str); return str - buf; @@ -777,7 +777,6 @@ int __init efi_systab_check_header(const efi_table_hdr_t *systab_hdr) return 0; } -#ifndef CONFIG_IA64 static const efi_char16_t *__init map_fw_vendor(unsigned long fw_vendor, size_t size) { @@ -793,10 +792,6 @@ static void __init unmap_fw_vendor(const void *fw_vendor, size_t size) { early_memunmap((void *)fw_vendor, size); } -#else -#define map_fw_vendor(p, s) __va(p) -#define unmap_fw_vendor(v, s) -#endif void __init efi_systab_report_header(const efi_table_hdr_t *systab_hdr, unsigned long fw_vendor) @@ -899,11 +894,6 @@ char * __init efi_md_typeattr_format(char *buf, size_t size, return buf; } -/* - * IA64 has a funky EFI memory map that doesn't work the same way as - * other architectures. - */ -#ifndef CONFIG_IA64 /* * efi_mem_attributes - lookup memmap attributes for physical address * @phys_addr: the physical address to lookup @@ -951,7 +941,6 @@ int efi_mem_type(unsigned long phys_addr) } return -EINVAL; } -#endif int efi_status_to_err(efi_status_t status) { diff --git a/drivers/firmware/pcdp.c b/drivers/firmware/pcdp.c deleted file mode 100644 index 715a45442d1c..000000000000 --- a/drivers/firmware/pcdp.c +++ /dev/null @@ -1,135 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Parse the EFI PCDP table to locate the console device. - * - * (c) Copyright 2002, 2003, 2004 Hewlett-Packard Development Company, L.P. - * Khalid Aziz - * Alex Williamson - * Bjorn Helgaas - */ - -#include -#include -#include -#include -#include -#include -#include "pcdp.h" - -static int __init -setup_serial_console(struct pcdp_uart *uart) -{ -#ifdef CONFIG_SERIAL_8250_CONSOLE - int mmio; - static char options[64], *p = options; - char parity; - - mmio = (uart->addr.space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY); - p += sprintf(p, "uart8250,%s,0x%llx", - mmio ? "mmio" : "io", uart->addr.address); - if (uart->baud) { - p += sprintf(p, ",%llu", uart->baud); - if (uart->bits) { - switch (uart->parity) { - case 0x2: parity = 'e'; break; - case 0x3: parity = 'o'; break; - default: parity = 'n'; - } - p += sprintf(p, "%c%d", parity, uart->bits); - } - } - - add_preferred_console("uart", 8250, &options[9]); - return setup_earlycon(options); -#else - return -ENODEV; -#endif -} - -static int __init -setup_vga_console(struct pcdp_device *dev) -{ -#if defined(CONFIG_VT) && defined(CONFIG_VGA_CONSOLE) - u8 *if_ptr; - - if_ptr = ((u8 *)dev + sizeof(struct pcdp_device)); - if (if_ptr[0] == PCDP_IF_PCI) { - struct pcdp_if_pci if_pci; - - /* struct copy since ifptr might not be correctly aligned */ - - memcpy(&if_pci, if_ptr, sizeof(if_pci)); - - if (if_pci.trans & PCDP_PCI_TRANS_IOPORT) - vga_console_iobase = if_pci.ioport_tra; - - if (if_pci.trans & PCDP_PCI_TRANS_MMIO) - vga_console_membase = if_pci.mmio_tra; - } - - if (efi_mem_type(vga_console_membase + 0xA0000) == EFI_CONVENTIONAL_MEMORY) { - printk(KERN_ERR "PCDP: VGA selected, but frame buffer is not MMIO!\n"); - return -ENODEV; - } - - conswitchp = &vga_con; - printk(KERN_INFO "PCDP: VGA console\n"); - return 0; -#else - return -ENODEV; -#endif -} - -extern unsigned long hcdp_phys; - -int __init -efi_setup_pcdp_console(char *cmdline) -{ - struct pcdp *pcdp; - struct pcdp_uart *uart; - struct pcdp_device *dev, *end; - int i, serial = 0; - int rc = -ENODEV; - - if (hcdp_phys == EFI_INVALID_TABLE_ADDR) - return -ENODEV; - - pcdp = early_memremap(hcdp_phys, 4096); - printk(KERN_INFO "PCDP: v%d at 0x%lx\n", pcdp->rev, hcdp_phys); - - if (strstr(cmdline, "console=hcdp")) { - if (pcdp->rev < 3) - serial = 1; - } else if (strstr(cmdline, "console=")) { - printk(KERN_INFO "Explicit \"console=\"; ignoring PCDP\n"); - goto out; - } - - if (pcdp->rev < 3 && efi_uart_console_only()) - serial = 1; - - for (i = 0, uart = pcdp->uart; i < pcdp->num_uarts; i++, uart++) { - if (uart->flags & PCDP_UART_PRIMARY_CONSOLE || serial) { - if (uart->type == PCDP_CONSOLE_UART) { - rc = setup_serial_console(uart); - goto out; - } - } - } - - end = (struct pcdp_device *) ((u8 *) pcdp + pcdp->length); - for (dev = (struct pcdp_device *) (pcdp->uart + pcdp->num_uarts); - dev < end; - dev = (struct pcdp_device *) ((u8 *) dev + dev->length)) { - if (dev->flags & PCDP_PRIMARY_CONSOLE) { - if (dev->type == PCDP_CONSOLE_VGA) { - rc = setup_vga_console(dev); - goto out; - } - } - } - -out: - early_memunmap(pcdp, 4096); - return rc; -} diff --git a/drivers/firmware/pcdp.h b/drivers/firmware/pcdp.h deleted file mode 100644 index e02540571c52..000000000000 --- a/drivers/firmware/pcdp.h +++ /dev/null @@ -1,108 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Definitions for PCDP-defined console devices - * - * For DIG64_HCDPv10a_01.pdf and DIG64_PCDPv20.pdf (v1.0a and v2.0 resp.), - * please see - * - * (c) Copyright 2002, 2004 Hewlett-Packard Development Company, L.P. - * Khalid Aziz - * Bjorn Helgaas - */ - -#define PCDP_CONSOLE 0 -#define PCDP_DEBUG 1 -#define PCDP_CONSOLE_OUTPUT 2 -#define PCDP_CONSOLE_INPUT 3 - -#define PCDP_UART (0 << 3) -#define PCDP_VGA (1 << 3) -#define PCDP_USB (2 << 3) - -/* pcdp_uart.type and pcdp_device.type */ -#define PCDP_CONSOLE_UART (PCDP_UART | PCDP_CONSOLE) -#define PCDP_DEBUG_UART (PCDP_UART | PCDP_DEBUG) -#define PCDP_CONSOLE_VGA (PCDP_VGA | PCDP_CONSOLE_OUTPUT) -#define PCDP_CONSOLE_USB (PCDP_USB | PCDP_CONSOLE_INPUT) - -/* pcdp_uart.flags */ -#define PCDP_UART_EDGE_SENSITIVE (1 << 0) -#define PCDP_UART_ACTIVE_LOW (1 << 1) -#define PCDP_UART_PRIMARY_CONSOLE (1 << 2) -#define PCDP_UART_IRQ (1 << 6) /* in pci_func for rev < 3 */ -#define PCDP_UART_PCI (1 << 7) /* in pci_func for rev < 3 */ - -struct pcdp_uart { - u8 type; - u8 bits; - u8 parity; - u8 stop_bits; - u8 pci_seg; - u8 pci_bus; - u8 pci_dev; - u8 pci_func; - u64 baud; - struct acpi_generic_address addr; - u16 pci_dev_id; - u16 pci_vendor_id; - u32 gsi; - u32 clock_rate; - u8 pci_prog_intfc; - u8 flags; - u16 conout_index; - u32 reserved; -} __attribute__((packed)); - -#define PCDP_IF_PCI 1 - -/* pcdp_if_pci.trans */ -#define PCDP_PCI_TRANS_IOPORT 0x02 -#define PCDP_PCI_TRANS_MMIO 0x01 - -struct pcdp_if_pci { - u8 interconnect; - u8 reserved; - u16 length; - u8 segment; - u8 bus; - u8 dev; - u8 fun; - u16 dev_id; - u16 vendor_id; - u32 acpi_interrupt; - u64 mmio_tra; - u64 ioport_tra; - u8 flags; - u8 trans; -} __attribute__((packed)); - -struct pcdp_vga { - u8 count; /* address space descriptors */ -} __attribute__((packed)); - -/* pcdp_device.flags */ -#define PCDP_PRIMARY_CONSOLE 1 - -struct pcdp_device { - u8 type; - u8 flags; - u16 length; - u16 efi_index; - /* next data is pcdp_if_pci or pcdp_if_acpi (not yet supported) */ - /* next data is device specific type (currently only pcdp_vga) */ -} __attribute__((packed)); - -struct pcdp { - u8 signature[4]; - u32 length; - u8 rev; /* PCDP v2.0 is rev 3 */ - u8 chksum; - u8 oemid[6]; - u8 oem_tabid[8]; - u32 oem_rev; - u8 creator_id[4]; - u32 creator_rev; - u32 num_uarts; - struct pcdp_uart uart[]; /* actual size is num_uarts */ - /* remainder of table is pcdp_device structures */ -} __attribute__((packed)); diff --git a/drivers/gpu/drm/drm_ioc32.c b/drivers/gpu/drm/drm_ioc32.c index 49a743f62b4a..025dc558c94e 100644 --- a/drivers/gpu/drm/drm_ioc32.c +++ b/drivers/gpu/drm/drm_ioc32.c @@ -945,11 +945,11 @@ static struct { DRM_IOCTL32_DEF(DRM_IOCTL_SG_ALLOC, compat_drm_sg_alloc), DRM_IOCTL32_DEF(DRM_IOCTL_SG_FREE, compat_drm_sg_free), #endif -#if defined(CONFIG_X86) || defined(CONFIG_IA64) +#if defined(CONFIG_X86) DRM_IOCTL32_DEF(DRM_IOCTL_UPDATE_DRAW, compat_drm_update_draw), #endif DRM_IOCTL32_DEF(DRM_IOCTL_WAIT_VBLANK, compat_drm_wait_vblank), -#if defined(CONFIG_X86) || defined(CONFIG_IA64) +#if defined(CONFIG_X86) DRM_IOCTL32_DEF(DRM_IOCTL_MODE_ADDFB2, compat_drm_mode_addfb2), #endif }; diff --git a/drivers/input/serio/i8042.h b/drivers/input/serio/i8042.h index adb5173372d3..5f61672d55b7 100644 --- a/drivers/input/serio/i8042.h +++ b/drivers/input/serio/i8042.h @@ -19,7 +19,7 @@ #include "i8042-snirm.h" #elif defined(CONFIG_SPARC) #include "i8042-sparcio.h" -#elif defined(CONFIG_X86) || defined(CONFIG_IA64) || defined(CONFIG_LOONGARCH) +#elif defined(CONFIG_X86) || defined(CONFIG_LOONGARCH) #include "i8042-acpipnpio.h" #else #include "i8042-io.h" diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index 2b12b583ef4b..7f04491ca5f0 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -91,7 +91,7 @@ config IOMMU_DEBUGFS choice prompt "IOMMU default domain type" depends on IOMMU_API - default IOMMU_DEFAULT_DMA_LAZY if X86 || IA64 + default IOMMU_DEFAULT_DMA_LAZY if X86 default IOMMU_DEFAULT_DMA_STRICT help Choose the type of IOMMU domain used to manage DMA API usage by @@ -146,7 +146,7 @@ config OF_IOMMU # IOMMU-agnostic DMA-mapping layer config IOMMU_DMA - def_bool ARM64 || IA64 || X86 + def_bool ARM64 || X86 select DMA_OPS select IOMMU_API select IOMMU_IOVA diff --git a/drivers/iommu/intel/Kconfig b/drivers/iommu/intel/Kconfig index 2e56bd79f589..119d2c57a48e 100644 --- a/drivers/iommu/intel/Kconfig +++ b/drivers/iommu/intel/Kconfig @@ -11,7 +11,7 @@ config DMAR_DEBUG config INTEL_IOMMU bool "Support for Intel IOMMU using DMA Remapping Devices" - depends on PCI_MSI && ACPI && (X86 || IA64) + depends on PCI_MSI && ACPI && X86 select DMA_OPS select IOMMU_API select IOMMU_IOVA diff --git a/drivers/media/cec/platform/Kconfig b/drivers/media/cec/platform/Kconfig index b672d3142eb7..ede81fe331b0 100644 --- a/drivers/media/cec/platform/Kconfig +++ b/drivers/media/cec/platform/Kconfig @@ -99,7 +99,7 @@ config CEC_TEGRA config CEC_SECO tristate "SECO Boards HDMI CEC driver" - depends on (X86 || IA64) || COMPILE_TEST + depends on X86 || COMPILE_TEST depends on PCI && DMI select CEC_CORE select CEC_NOTIFIER diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig index cadd4a820c03..f37c4b8380ae 100644 --- a/drivers/misc/Kconfig +++ b/drivers/misc/Kconfig @@ -166,7 +166,7 @@ config ENCLOSURE_SERVICES config SGI_XP tristate "Support communication between SGI SSIs" depends on NET - depends on (IA64_SGI_UV || X86_UV) && SMP + depends on X86_UV && SMP depends on X86_64 || BROKEN select SGI_GRU if X86_64 && SMP help diff --git a/drivers/misc/sgi-gru/gru.h b/drivers/misc/sgi-gru/gru.h index 3ad76cd18b4b..6ae045037219 100644 --- a/drivers/misc/sgi-gru/gru.h +++ b/drivers/misc/sgi-gru/gru.h @@ -30,9 +30,7 @@ /* * Size used to map GRU GSeg */ -#if defined(CONFIG_IA64) -#define GRU_GSEG_PAGESIZE (256 * 1024UL) -#elif defined(CONFIG_X86_64) +#if defined(CONFIG_X86_64) #define GRU_GSEG_PAGESIZE (256 * 1024UL) /* ZZZ 2MB ??? */ #else #error "Unsupported architecture" diff --git a/drivers/misc/sgi-gru/gru_instructions.h b/drivers/misc/sgi-gru/gru_instructions.h index 04d5170ac149..da5eb9edf9ec 100644 --- a/drivers/misc/sgi-gru/gru_instructions.h +++ b/drivers/misc/sgi-gru/gru_instructions.h @@ -29,17 +29,7 @@ extern void gru_wait_abort_proc(void *cb); * Architecture dependent functions */ -#if defined(CONFIG_IA64) -#include -#include -#define __flush_cache(p) ia64_fc((unsigned long)p) -/* Use volatile on IA64 to ensure ordering via st4.rel */ -#define gru_ordered_store_ulong(p, v) \ - do { \ - barrier(); \ - *((volatile unsigned long *)(p)) = v; /* force st.rel */ \ - } while (0) -#elif defined(CONFIG_X86_64) +#if defined(CONFIG_X86_64) #include #define __flush_cache(p) clflush(p) #define gru_ordered_store_ulong(p, v) \ diff --git a/drivers/misc/sgi-gru/grufile.c b/drivers/misc/sgi-gru/grufile.c index a3d659c11cc4..e755690c9805 100644 --- a/drivers/misc/sgi-gru/grufile.c +++ b/drivers/misc/sgi-gru/grufile.c @@ -337,72 +337,6 @@ static unsigned long gru_chiplet_cpu_to_mmr(int chiplet, int cpu, int *corep) return mmr; } -#ifdef CONFIG_IA64 - -static int gru_irq_count[GRU_CHIPLETS_PER_BLADE]; - -static void gru_noop(struct irq_data *d) -{ -} - -static struct irq_chip gru_chip[GRU_CHIPLETS_PER_BLADE] = { - [0 ... GRU_CHIPLETS_PER_BLADE - 1] { - .irq_mask = gru_noop, - .irq_unmask = gru_noop, - .irq_ack = gru_noop - } -}; - -static int gru_chiplet_setup_tlb_irq(int chiplet, char *irq_name, - irq_handler_t irq_handler, int cpu, int blade) -{ - unsigned long mmr; - int irq = IRQ_GRU + chiplet; - int ret, core; - - mmr = gru_chiplet_cpu_to_mmr(chiplet, cpu, &core); - if (mmr == 0) - return 0; - - if (gru_irq_count[chiplet] == 0) { - gru_chip[chiplet].name = irq_name; - ret = irq_set_chip(irq, &gru_chip[chiplet]); - if (ret) { - printk(KERN_ERR "%s: set_irq_chip failed, errno=%d\n", - GRU_DRIVER_ID_STR, -ret); - return ret; - } - - ret = request_irq(irq, irq_handler, 0, irq_name, NULL); - if (ret) { - printk(KERN_ERR "%s: request_irq failed, errno=%d\n", - GRU_DRIVER_ID_STR, -ret); - return ret; - } - } - gru_irq_count[chiplet]++; - - return 0; -} - -static void gru_chiplet_teardown_tlb_irq(int chiplet, int cpu, int blade) -{ - unsigned long mmr; - int core, irq = IRQ_GRU + chiplet; - - if (gru_irq_count[chiplet] == 0) - return; - - mmr = gru_chiplet_cpu_to_mmr(chiplet, cpu, &core); - if (mmr == 0) - return; - - if (--gru_irq_count[chiplet] == 0) - free_irq(irq, NULL); -} - -#elif defined CONFIG_X86_64 - static int gru_chiplet_setup_tlb_irq(int chiplet, char *irq_name, irq_handler_t irq_handler, int cpu, int blade) { @@ -447,8 +381,6 @@ static void gru_chiplet_teardown_tlb_irq(int chiplet, int cpu, int blade) } } -#endif - static void gru_teardown_tlb_irqs(void) { int blade; @@ -514,12 +446,8 @@ static int __init gru_init(void) if (!gru_supported()) return 0; -#if defined CONFIG_IA64 - gru_start_paddr = 0xd000000000UL; /* ZZZZZZZZZZZZZZZZZZZ fixme */ -#else gru_start_paddr = uv_read_local_mmr(UVH_RH_GAM_GRU_OVERLAY_CONFIG) & 0x7fffffffffffUL; -#endif gru_start_vaddr = __va(gru_start_paddr); gru_end_paddr = gru_start_paddr + GRU_MAX_BLADES * GRU_SIZE; printk(KERN_INFO "GRU space: 0x%lx - 0x%lx\n", diff --git a/drivers/misc/sgi-gru/gruhandles.c b/drivers/misc/sgi-gru/gruhandles.c index 1d75d5e540bc..695316a83b01 100644 --- a/drivers/misc/sgi-gru/gruhandles.c +++ b/drivers/misc/sgi-gru/gruhandles.c @@ -11,16 +11,10 @@ #include "grutables.h" /* 10 sec */ -#ifdef CONFIG_IA64 -#include -#define GRU_OPERATION_TIMEOUT (((cycles_t) local_cpu_data->itc_freq)*10) -#define CLKS2NSEC(c) ((c) *1000000000 / local_cpu_data->itc_freq) -#else #include #include #define GRU_OPERATION_TIMEOUT ((cycles_t) tsc_khz*10*1000) #define CLKS2NSEC(c) ((c) * 1000000 / tsc_khz) -#endif /* Extract the status field from a kernel handle */ #define GET_MSEG_HANDLE_STATUS(h) (((*(unsigned long *)(h)) >> 16) & 3) diff --git a/drivers/misc/sgi-gru/grumain.c b/drivers/misc/sgi-gru/grumain.c index 4eb4b9455139..0f5b09e290c8 100644 --- a/drivers/misc/sgi-gru/grumain.c +++ b/drivers/misc/sgi-gru/grumain.c @@ -41,16 +41,12 @@ struct device *grudev = &gru_device; */ int gru_cpu_fault_map_id(void) { -#ifdef CONFIG_IA64 - return uv_blade_processor_id() % GRU_NUM_TFM; -#else int cpu = smp_processor_id(); int id, core; core = uv_cpu_core_number(cpu); id = core + UV_MAX_INT_CORES * uv_cpu_socket_number(cpu); return id; -#endif } /*--------- ASID Management ------------------------------------------- diff --git a/drivers/misc/sgi-xp/xp.h b/drivers/misc/sgi-xp/xp.h index f1336f43d3bd..3185711beb07 100644 --- a/drivers/misc/sgi-xp/xp.h +++ b/drivers/misc/sgi-xp/xp.h @@ -16,7 +16,7 @@ #include -#if defined CONFIG_X86_UV || defined CONFIG_IA64_SGI_UV +#if defined CONFIG_X86_UV #include #endif diff --git a/drivers/misc/sgi-xp/xp_uv.c b/drivers/misc/sgi-xp/xp_uv.c index 19fc7076af27..3faa7eadf679 100644 --- a/drivers/misc/sgi-xp/xp_uv.c +++ b/drivers/misc/sgi-xp/xp_uv.c @@ -18,8 +18,6 @@ #include #if defined CONFIG_X86_64 #include -#elif defined CONFIG_IA64_SGI_UV -#include #endif #include "../sgi-gru/grukservices.h" #include "xp.h" @@ -99,17 +97,6 @@ xp_expand_memprotect_uv(unsigned long phys_addr, unsigned long size) "UV_MEMPROT_ALLOW_RW) failed, ret=%d\n", ret); return xpBiosError; } - -#elif defined CONFIG_IA64_SGI_UV - u64 nasid_array; - - ret = sn_change_memprotect(phys_addr, size, SN_MEMPROT_ACCESS_CLASS_1, - &nasid_array); - if (ret != 0) { - dev_err(xp, "sn_change_memprotect(,, " - "SN_MEMPROT_ACCESS_CLASS_1,) failed ret=%d\n", ret); - return xpSalError; - } #else #error not a supported configuration #endif @@ -129,17 +116,6 @@ xp_restrict_memprotect_uv(unsigned long phys_addr, unsigned long size) "UV_MEMPROT_RESTRICT_ACCESS) failed, ret=%d\n", ret); return xpBiosError; } - -#elif defined CONFIG_IA64_SGI_UV - u64 nasid_array; - - ret = sn_change_memprotect(phys_addr, size, SN_MEMPROT_ACCESS_CLASS_0, - &nasid_array); - if (ret != 0) { - dev_err(xp, "sn_change_memprotect(,, " - "SN_MEMPROT_ACCESS_CLASS_0,) failed ret=%d\n", ret); - return xpSalError; - } #else #error not a supported configuration #endif diff --git a/drivers/misc/sgi-xp/xpc_main.c b/drivers/misc/sgi-xp/xpc_main.c index 6da509d692bb..cc71395782b6 100644 --- a/drivers/misc/sgi-xp/xpc_main.c +++ b/drivers/misc/sgi-xp/xpc_main.c @@ -1155,36 +1155,6 @@ xpc_die_deactivate(void) static int xpc_system_die(struct notifier_block *nb, unsigned long event, void *_die_args) { -#ifdef CONFIG_IA64 /* !!! temporary kludge */ - switch (event) { - case DIE_MACHINE_RESTART: - case DIE_MACHINE_HALT: - xpc_die_deactivate(); - break; - - case DIE_KDEBUG_ENTER: - /* Should lack of heartbeat be ignored by other partitions? */ - if (!xpc_kdebug_ignore) - break; - - fallthrough; - case DIE_MCA_MONARCH_ENTER: - case DIE_INIT_MONARCH_ENTER: - xpc_arch_ops.offline_heartbeat(); - break; - - case DIE_KDEBUG_LEAVE: - /* Is lack of heartbeat being ignored by other partitions? */ - if (!xpc_kdebug_ignore) - break; - - fallthrough; - case DIE_MCA_MONARCH_LEAVE: - case DIE_INIT_MONARCH_LEAVE: - xpc_arch_ops.online_heartbeat(); - break; - } -#else struct die_args *die_args = _die_args; switch (event) { @@ -1206,7 +1176,6 @@ xpc_system_die(struct notifier_block *nb, unsigned long event, void *_die_args) default: xpc_die_deactivate(); } -#endif return NOTIFY_DONE; } diff --git a/drivers/misc/sgi-xp/xpc_uv.c b/drivers/misc/sgi-xp/xpc_uv.c index fff522d347e3..2f03a7080d96 100644 --- a/drivers/misc/sgi-xp/xpc_uv.c +++ b/drivers/misc/sgi-xp/xpc_uv.c @@ -24,34 +24,12 @@ #include #include #include -#if defined CONFIG_X86_64 #include #include -#elif defined CONFIG_IA64_SGI_UV -#include -#include -#endif #include "../sgi-gru/gru.h" #include "../sgi-gru/grukservices.h" #include "xpc.h" -#if defined CONFIG_IA64_SGI_UV -struct uv_IO_APIC_route_entry { - __u64 vector : 8, - delivery_mode : 3, - dest_mode : 1, - delivery_status : 1, - polarity : 1, - __reserved_1 : 1, - trigger : 1, - mask : 1, - __reserved_2 : 15, - dest : 32; -}; - -#define sn_partition_id 0 -#endif - static struct xpc_heartbeat_uv *xpc_heartbeat_uv; #define XPC_ACTIVATE_MSG_SIZE_UV (1 * GRU_CACHE_LINE_BYTES) @@ -113,7 +91,6 @@ xpc_get_gru_mq_irq_uv(struct xpc_gru_mq_uv *mq, int cpu, char *irq_name) { int mmr_pnode = uv_blade_to_pnode(mq->mmr_blade); -#if defined CONFIG_X86_64 mq->irq = uv_setup_irq(irq_name, cpu, mq->mmr_blade, mq->mmr_offset, UV_AFFINITY_CPU); if (mq->irq < 0) @@ -121,40 +98,13 @@ xpc_get_gru_mq_irq_uv(struct xpc_gru_mq_uv *mq, int cpu, char *irq_name) mq->mmr_value = uv_read_global_mmr64(mmr_pnode, mq->mmr_offset); -#elif defined CONFIG_IA64_SGI_UV - if (strcmp(irq_name, XPC_ACTIVATE_IRQ_NAME) == 0) - mq->irq = SGI_XPC_ACTIVATE; - else if (strcmp(irq_name, XPC_NOTIFY_IRQ_NAME) == 0) - mq->irq = SGI_XPC_NOTIFY; - else - return -EINVAL; - - mq->mmr_value = (unsigned long)cpu_physical_id(cpu) << 32 | mq->irq; - uv_write_global_mmr64(mmr_pnode, mq->mmr_offset, mq->mmr_value); -#else - #error not a supported configuration -#endif - return 0; } static void xpc_release_gru_mq_irq_uv(struct xpc_gru_mq_uv *mq) { -#if defined CONFIG_X86_64 uv_teardown_irq(mq->irq); - -#elif defined CONFIG_IA64_SGI_UV - int mmr_pnode; - unsigned long mmr_value; - - mmr_pnode = uv_blade_to_pnode(mq->mmr_blade); - mmr_value = 1UL << 16; - - uv_write_global_mmr64(mmr_pnode, mq->mmr_offset, mmr_value); -#else - #error not a supported configuration -#endif } static int @@ -162,17 +112,6 @@ xpc_gru_mq_watchlist_alloc_uv(struct xpc_gru_mq_uv *mq) { int ret; -#if defined CONFIG_IA64_SGI_UV - int mmr_pnode = uv_blade_to_pnode(mq->mmr_blade); - - ret = sn_mq_watchlist_alloc(mmr_pnode, (void *)uv_gpa(mq->address), - mq->order, &mq->mmr_offset); - if (ret < 0) { - dev_err(xpc_part, "sn_mq_watchlist_alloc() failed, ret=%d\n", - ret); - return -EBUSY; - } -#elif defined CONFIG_X86_64 ret = uv_bios_mq_watchlist_alloc(uv_gpa(mq->address), mq->order, &mq->mmr_offset); if (ret < 0) { @@ -180,9 +119,6 @@ xpc_gru_mq_watchlist_alloc_uv(struct xpc_gru_mq_uv *mq) "ret=%d\n", ret); return ret; } -#else - #error not a supported configuration -#endif mq->watchlist_num = ret; return 0; @@ -194,15 +130,8 @@ xpc_gru_mq_watchlist_free_uv(struct xpc_gru_mq_uv *mq) int ret; int mmr_pnode = uv_blade_to_pnode(mq->mmr_blade); -#if defined CONFIG_X86_64 ret = uv_bios_mq_watchlist_free(mmr_pnode, mq->watchlist_num); BUG_ON(ret != BIOS_STATUS_SUCCESS); -#elif defined CONFIG_IA64_SGI_UV - ret = sn_mq_watchlist_free(mmr_pnode, mq->watchlist_num); - BUG_ON(ret != SALRET_OK); -#else - #error not a supported configuration -#endif } static struct xpc_gru_mq_uv * @@ -786,7 +715,6 @@ xpc_get_partition_rsvd_page_pa_uv(void *buf, u64 *cookie, unsigned long *rp_pa, s64 status; enum xp_retval ret; -#if defined CONFIG_X86_64 status = uv_bios_reserved_page_pa((u64)buf, cookie, (u64 *)rp_pa, (u64 *)len); if (status == BIOS_STATUS_SUCCESS) @@ -796,19 +724,6 @@ xpc_get_partition_rsvd_page_pa_uv(void *buf, u64 *cookie, unsigned long *rp_pa, else ret = xpBiosError; -#elif defined CONFIG_IA64_SGI_UV - status = sn_partition_reserved_page_pa((u64)buf, cookie, rp_pa, len); - if (status == SALRET_OK) - ret = xpSuccess; - else if (status == SALRET_MORE_PASSES) - ret = xpNeedMoreInfo; - else - ret = xpSalError; - -#else - #error not a supported configuration -#endif - return ret; } diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index 14b311196b8f..c7c1ff38ea33 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -17005,7 +17005,7 @@ static u32 tg3_calc_dma_bndry(struct tg3 *tp, u32 val) !tg3_flag(tp, PCI_EXPRESS)) goto out; -#if defined(CONFIG_PPC64) || defined(CONFIG_IA64) || defined(CONFIG_PARISC) +#if defined(CONFIG_PPC64) || defined(CONFIG_PARISC) goal = BOUNDARY_MULTI_CACHELINE; #else #if defined(CONFIG_SPARC64) || defined(CONFIG_ALPHA) diff --git a/drivers/net/ethernet/brocade/bna/bnad.h b/drivers/net/ethernet/brocade/bna/bnad.h index 627a93ce38ab..10b1e534030e 100644 --- a/drivers/net/ethernet/brocade/bna/bnad.h +++ b/drivers/net/ethernet/brocade/bna/bnad.h @@ -19,7 +19,6 @@ #include #include -/* Fix for IA64 */ #include #include diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c index 1d1e183d3a8b..ed24d6af7487 100644 --- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c +++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c @@ -233,9 +233,7 @@ static int nx_set_dma_mask(struct netxen_adapter *adapter) cmask = DMA_BIT_MASK(32); if (NX_IS_REVISION_P2(adapter->ahw.revision_id)) { -#ifndef CONFIG_IA64 mask = DMA_BIT_MASK(35); -#endif } else { mask = DMA_BIT_MASK(39); cmask = mask; diff --git a/drivers/pci/vgaarb.c b/drivers/pci/vgaarb.c index 5e6b1eb54c64..a771b2259f21 100644 --- a/drivers/pci/vgaarb.c +++ b/drivers/pci/vgaarb.c @@ -556,7 +556,7 @@ EXPORT_SYMBOL(vga_put); static bool vga_is_firmware_default(struct pci_dev *pdev) { -#if defined(CONFIG_X86) || defined(CONFIG_IA64) +#if defined(CONFIG_X86) u64 base = screen_info.lfb_base; u64 size = screen_info.lfb_size; struct resource *r; diff --git a/drivers/tty/serial/8250/Kconfig b/drivers/tty/serial/8250/Kconfig index ee17cf5c44c6..c12978311a09 100644 --- a/drivers/tty/serial/8250/Kconfig +++ b/drivers/tty/serial/8250/Kconfig @@ -222,7 +222,7 @@ config SERIAL_8250_EXTENDED config SERIAL_8250_MANY_PORTS bool "Support more than 4 legacy serial ports" - depends on SERIAL_8250_EXTENDED && !IA64 + depends on SERIAL_8250_EXTENDED help Say Y here if you have dumb serial boards other than the four standard COM 1/2/3/4 ports. This may happen if you have an AST diff --git a/drivers/tty/vt/keyboard.c b/drivers/tty/vt/keyboard.c index 358f216c6cd6..1fe6107b539b 100644 --- a/drivers/tty/vt/keyboard.c +++ b/drivers/tty/vt/keyboard.c @@ -1273,7 +1273,7 @@ static void kbd_bh(struct tasklet_struct *unused) } } -#if defined(CONFIG_X86) || defined(CONFIG_IA64) || defined(CONFIG_ALPHA) ||\ +#if defined(CONFIG_X86) || defined(CONFIG_ALPHA) ||\ defined(CONFIG_MIPS) || defined(CONFIG_PPC) || defined(CONFIG_SPARC) ||\ defined(CONFIG_PARISC) || defined(CONFIG_SUPERH) ||\ (defined(CONFIG_ARM) && defined(CONFIG_KEYBOARD_ATKBD) && !defined(CONFIG_ARCH_RPC)) diff --git a/drivers/video/fbdev/Kconfig b/drivers/video/fbdev/Kconfig index eac0ba39581e..747053f529a5 100644 --- a/drivers/video/fbdev/Kconfig +++ b/drivers/video/fbdev/Kconfig @@ -463,7 +463,7 @@ config FB_VESA config FB_EFI bool "EFI-based Framebuffer Support" - depends on (FB = y) && !IA64 && EFI + depends on (FB = y) && EFI select APERTURE_HELPERS select DRM_PANEL_ORIENTATION_QUIRKS select FB_IOMEM_HELPERS diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig index 751458959411..8cb6fa45d599 100644 --- a/drivers/watchdog/Kconfig +++ b/drivers/watchdog/Kconfig @@ -1287,7 +1287,7 @@ config INTEL_MID_WATCHDOG config ITCO_WDT tristate "Intel TCO Timer/Watchdog" - depends on (X86 || IA64) && PCI + depends on X86 && PCI select WATCHDOG_CORE depends on I2C || I2C=n depends on MFD_INTEL_PMC_BXT || !MFD_INTEL_PMC_BXT diff --git a/fs/Kconfig b/fs/Kconfig index aa7e03cc1941..421f68fed1ba 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -255,7 +255,7 @@ config ARCH_SUPPORTS_HUGETLBFS config HUGETLBFS bool "HugeTLB file system support" - depends on X86 || IA64 || SPARC64 || ARCH_SUPPORTS_HUGETLBFS || BROKEN + depends on X86 || SPARC64 || ARCH_SUPPORTS_HUGETLBFS || BROKEN depends on (SYSFS || SYSCTL) select MEMFD_CREATE help diff --git a/fs/afs/main.c b/fs/afs/main.c index eae288c8d40a..6425c81d07de 100644 --- a/fs/afs/main.c +++ b/fs/afs/main.c @@ -41,8 +41,6 @@ const char afs_init_sysname[] = "arm_linux26"; const char afs_init_sysname[] = "aarch64_linux26"; #elif defined(CONFIG_X86_32) const char afs_init_sysname[] = "i386_linux26"; -#elif defined(CONFIG_IA64) -const char afs_init_sysname[] = "ia64_linux26"; #elif defined(CONFIG_PPC64) const char afs_init_sysname[] = "ppc64_linux26"; #elif defined(CONFIG_PPC32) diff --git a/fs/xfs/xfs_ioctl32.h b/fs/xfs/xfs_ioctl32.h index c14852362fce..052d0e888c27 100644 --- a/fs/xfs/xfs_ioctl32.h +++ b/fs/xfs/xfs_ioctl32.h @@ -22,7 +22,7 @@ /* * On intel, even if sizes match, alignment and/or padding may differ. */ -#if defined(CONFIG_IA64) || defined(CONFIG_X86_64) +#if defined(CONFIG_X86_64) #define BROKEN_X86_ALIGNMENT #define __compat_packed __attribute__((packed)) #else diff --git a/include/linux/acpi.h b/include/linux/acpi.h index a73246c3c35e..9bcf5641a7cf 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -259,7 +259,7 @@ void acpi_table_print_madt_entry (struct acpi_subtable_header *madt); /* the following numa functions are architecture-dependent */ void acpi_numa_slit_init (struct acpi_table_slit *slit); -#if defined(CONFIG_X86) || defined(CONFIG_IA64) || defined(CONFIG_LOONGARCH) +#if defined(CONFIG_X86) || defined(CONFIG_LOONGARCH) void acpi_numa_processor_affinity_init (struct acpi_srat_cpu_affinity *pa); #else static inline void @@ -1114,15 +1114,8 @@ static inline int acpi_get_lps0_constraint(struct device *dev) return ACPI_STATE_UNKNOWN; } #endif /* CONFIG_SUSPEND && CONFIG_X86 */ -#ifndef CONFIG_IA64 void arch_reserve_mem_area(acpi_physical_address addr, size_t size); #else -static inline void arch_reserve_mem_area(acpi_physical_address addr, - size_t size) -{ -} -#endif /* CONFIG_X86 */ -#else #define acpi_os_set_prepare_sleep(func, pm1a_ctrl, pm1b_ctrl) do { } while (0) #endif diff --git a/include/linux/efi.h b/include/linux/efi.h index 80b21d1c6eaf..9cc5bf32f6f2 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -358,13 +358,10 @@ void efi_native_runtime_setup(void); * where the UEFI SPEC breaks the line. */ #define NULL_GUID EFI_GUID(0x00000000, 0x0000, 0x0000, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00) -#define MPS_TABLE_GUID EFI_GUID(0xeb9d2d2f, 0x2d88, 0x11d3, 0x9a, 0x16, 0x00, 0x90, 0x27, 0x3f, 0xc1, 0x4d) #define ACPI_TABLE_GUID EFI_GUID(0xeb9d2d30, 0x2d88, 0x11d3, 0x9a, 0x16, 0x00, 0x90, 0x27, 0x3f, 0xc1, 0x4d) #define ACPI_20_TABLE_GUID EFI_GUID(0x8868e871, 0xe4f1, 0x11d3, 0xbc, 0x22, 0x00, 0x80, 0xc7, 0x3c, 0x88, 0x81) #define SMBIOS_TABLE_GUID EFI_GUID(0xeb9d2d31, 0x2d88, 0x11d3, 0x9a, 0x16, 0x00, 0x90, 0x27, 0x3f, 0xc1, 0x4d) #define SMBIOS3_TABLE_GUID EFI_GUID(0xf2fd1544, 0x9794, 0x4a2c, 0x99, 0x2e, 0xe5, 0xbb, 0xcf, 0x20, 0xe3, 0x94) -#define SAL_SYSTEM_TABLE_GUID EFI_GUID(0xeb9d2d32, 0x2d88, 0x11d3, 0x9a, 0x16, 0x00, 0x90, 0x27, 0x3f, 0xc1, 0x4d) -#define HCDP_TABLE_GUID EFI_GUID(0xf951938d, 0x620b, 0x42ef, 0x82, 0x79, 0xa8, 0x4b, 0x79, 0x61, 0x78, 0x98) #define UGA_IO_PROTOCOL_GUID EFI_GUID(0x61a4d49e, 0x6f68, 0x4f1b, 0xb9, 0x22, 0xa8, 0x6e, 0xed, 0x0b, 0x07, 0xa2) #define EFI_GLOBAL_VARIABLE_GUID EFI_GUID(0x8be4df61, 0x93ca, 0x11d2, 0xaa, 0x0d, 0x00, 0xe0, 0x98, 0x03, 0x2b, 0x8c) #define UV_SYSTEM_TABLE_GUID EFI_GUID(0x3b13a7d4, 0x633e, 0x11dd, 0x93, 0xec, 0xda, 0x25, 0x56, 0xd8, 0x95, 0x93) @@ -851,10 +848,6 @@ static inline int efi_range_is_wc(unsigned long start, unsigned long len) return 1; } -#ifdef CONFIG_EFI_PCDP -extern int __init efi_setup_pcdp_console(char *); -#endif - /* * We play games with efi_enabled so that the compiler will, if * possible, remove EFI-related code altogether. diff --git a/include/linux/mm.h b/include/linux/mm.h index bf5d0b1b16f4..17d530e12f70 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -362,8 +362,6 @@ extern unsigned int kobjsize(const void *objp); # define VM_SAO VM_ARCH_1 /* Strong Access Ordering (powerpc) */ #elif defined(CONFIG_PARISC) # define VM_GROWSUP VM_ARCH_1 -#elif defined(CONFIG_IA64) -# define VM_GROWSUP VM_ARCH_1 #elif defined(CONFIG_SPARC64) # define VM_SPARC_ADI VM_ARCH_1 /* Uses ADI tag for access control */ # define VM_ARCH_CLEAR VM_SPARC_ADI diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h index 962cd41a2cb5..99e4f1f718c7 100644 --- a/include/linux/moduleparam.h +++ b/include/linux/moduleparam.h @@ -276,7 +276,7 @@ struct kparam_array read-only sections (which is part of respective UNIX ABI on these platforms). So 'const' makes no sense and even causes compile failures with some compilers. */ -#if defined(CONFIG_ALPHA) || defined(CONFIG_IA64) || defined(CONFIG_PPC64) +#if defined(CONFIG_ALPHA) || defined(CONFIG_PPC64) #define __moduleparam_const #else #define __moduleparam_const const diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h index 1478b9dd05fa..d801409b33cf 100644 --- a/include/trace/events/mmflags.h +++ b/include/trace/events/mmflags.h @@ -144,7 +144,7 @@ IF_HAVE_PG_ARCH_X(arch_3) #define __VM_ARCH_SPECIFIC_1 {VM_PAT, "pat" } #elif defined(CONFIG_PPC) #define __VM_ARCH_SPECIFIC_1 {VM_SAO, "sao" } -#elif defined(CONFIG_PARISC) || defined(CONFIG_IA64) +#elif defined(CONFIG_PARISC) #define __VM_ARCH_SPECIFIC_1 {VM_GROWSUP, "growsup" } #elif !defined(CONFIG_MMU) #define __VM_ARCH_SPECIFIC_1 {VM_MAPPED_COPY,"mappedcopy" } diff --git a/init/Kconfig b/init/Kconfig index 6d35728b94b2..9ffb103fc927 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1723,7 +1723,7 @@ config KALLSYMS_ABSOLUTE_PERCPU config KALLSYMS_BASE_RELATIVE bool depends on KALLSYMS - default !IA64 + default y help Instead of emitting them as absolute values in the native word size, emit the symbol references in the kallsyms table as 32-bit entries, diff --git a/kernel/cpu.c b/kernel/cpu.c index 6de7c6bb74ee..234361530007 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -1725,9 +1725,6 @@ static int cpu_up(unsigned int cpu, enum cpuhp_state target) if (!cpu_possible(cpu)) { pr_err("can't online cpu %d because it is not configured as may-hotadd at boot time\n", cpu); -#if defined(CONFIG_IA64) - pr_err("please check additional_cpus= boot parameter\n"); -#endif return -EINVAL; } diff --git a/kernel/fork.c b/kernel/fork.c index 3b6d20dfb9a8..faa921ecbe67 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -3144,7 +3144,7 @@ static inline bool clone3_stack_valid(struct kernel_clone_args *kargs) if (!access_ok((void __user *)kargs->stack, kargs->stack_size)) return false; -#if !defined(CONFIG_STACK_GROWSUP) && !defined(CONFIG_IA64) +#if !defined(CONFIG_STACK_GROWSUP) kargs->stack += kargs->stack_size; #endif } diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 2299a5cfbfb9..caab7cd26790 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -10289,9 +10289,9 @@ void normalize_rt_tasks(void) #endif /* CONFIG_MAGIC_SYSRQ */ -#if defined(CONFIG_IA64) || defined(CONFIG_KGDB_KDB) +#if defined(CONFIG_KGDB_KDB) /* - * These functions are only useful for the IA64 MCA handling, or kdb. + * These functions are only useful for kdb. * * They can only be called when the whole system has been * stopped - every CPU needs to be quiescent, and no scheduling @@ -10313,30 +10313,7 @@ struct task_struct *curr_task(int cpu) return cpu_curr(cpu); } -#endif /* defined(CONFIG_IA64) || defined(CONFIG_KGDB_KDB) */ - -#ifdef CONFIG_IA64 -/** - * ia64_set_curr_task - set the current task for a given CPU. - * @cpu: the processor in question. - * @p: the task pointer to set. - * - * Description: This function must only be used when non-maskable interrupts - * are serviced on a separate stack. It allows the architecture to switch the - * notion of the current task on a CPU in a non-blocking manner. This function - * must be called with all CPU's synchronized, and interrupts disabled, the - * and caller must save the original value of the current task (see - * curr_task() above) and restore that value before reenabling interrupts and - * re-starting the system. - * - * ONLY VALID WHEN THE WHOLE SYSTEM IS STOPPED! - */ -void ia64_set_curr_task(int cpu, struct task_struct *p) -{ - cpu_curr(cpu) = p; -} - -#endif +#endif /* defined(CONFIG_KGDB_KDB) */ #ifdef CONFIG_CGROUP_SCHED /* task_group_lock serializes the addition/removal of task groups */ diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 354a2d294f52..dca7a8f735cd 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1939,15 +1939,6 @@ static struct ctl_table kern_table[] = { .proc_handler = proc_dointvec, }, #endif -#ifdef CONFIG_IA64 - { - .procname = "unaligned-dump-stack", - .data = &unaligned_dump_stack, - .maxlen = sizeof (int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, -#endif #ifdef CONFIG_RT_MUTEXES { .procname = "max_lock_depth", diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index fa307f93fa2e..2caf73c9f623 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -759,7 +759,7 @@ config SHRINKER_DEBUG config DEBUG_STACK_USAGE bool "Stack utilization instrumentation" - depends on DEBUG_KERNEL && !IA64 + depends on DEBUG_KERNEL help Enables the display of the minimum amount of free stack which each task has ever had available in the sysrq-T and sysrq-P debug output. diff --git a/lib/decompress_unxz.c b/lib/decompress_unxz.c index 353268b9f129..842894158944 100644 --- a/lib/decompress_unxz.c +++ b/lib/decompress_unxz.c @@ -133,9 +133,6 @@ #ifdef CONFIG_ARM # define XZ_DEC_ARM #endif -#ifdef CONFIG_IA64 -# define XZ_DEC_IA64 -#endif #ifdef CONFIG_SPARC # define XZ_DEC_SPARC #endif diff --git a/lib/xz/Kconfig b/lib/xz/Kconfig index adce22ac18d6..aef086a6bf2f 100644 --- a/lib/xz/Kconfig +++ b/lib/xz/Kconfig @@ -19,11 +19,6 @@ config XZ_DEC_POWERPC default y select XZ_DEC_BCJ -config XZ_DEC_IA64 - bool "IA-64 BCJ filter decoder" if EXPERT - default y - select XZ_DEC_BCJ - config XZ_DEC_ARM bool "ARM BCJ filter decoder" if EXPERT default y diff --git a/mm/mmap.c b/mm/mmap.c index b56a7f0c9f85..4b0eaf6427c4 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1924,9 +1924,9 @@ static int acct_stack_growth(struct vm_area_struct *vma, return 0; } -#if defined(CONFIG_STACK_GROWSUP) || defined(CONFIG_IA64) +#if defined(CONFIG_STACK_GROWSUP) /* - * PA-RISC uses this for its stack; IA64 for its Register Backing Store. + * PA-RISC uses this for its stack. * vma is the last one with address > vma->vm_end. Have to extend vma. */ static int expand_upwards(struct vm_area_struct *vma, unsigned long address) @@ -2023,7 +2023,7 @@ static int expand_upwards(struct vm_area_struct *vma, unsigned long address) validate_mm(mm); return error; } -#endif /* CONFIG_STACK_GROWSUP || CONFIG_IA64 */ +#endif /* CONFIG_STACK_GROWSUP */ /* * vma is the first one with address < vma->vm_start. Have to extend vma. diff --git a/tools/arch/ia64/include/asm/barrier.h b/tools/arch/ia64/include/asm/barrier.h deleted file mode 100644 index 6fffe5682713..000000000000 --- a/tools/arch/ia64/include/asm/barrier.h +++ /dev/null @@ -1,59 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Copied from the kernel sources to tools/: - * - * Memory barrier definitions. This is based on information published - * in the Processor Abstraction Layer and the System Abstraction Layer - * manual. - * - * Copyright (C) 1998-2003 Hewlett-Packard Co - * David Mosberger-Tang - * Copyright (C) 1999 Asit Mallick - * Copyright (C) 1999 Don Dugger - */ -#ifndef _TOOLS_LINUX_ASM_IA64_BARRIER_H -#define _TOOLS_LINUX_ASM_IA64_BARRIER_H - -#include - -/* - * Macros to force memory ordering. In these descriptions, "previous" - * and "subsequent" refer to program order; "visible" means that all - * architecturally visible effects of a memory access have occurred - * (at a minimum, this means the memory has been read or written). - * - * wmb(): Guarantees that all preceding stores to memory- - * like regions are visible before any subsequent - * stores and that all following stores will be - * visible only after all previous stores. - * rmb(): Like wmb(), but for reads. - * mb(): wmb()/rmb() combo, i.e., all previous memory - * accesses are visible before all subsequent - * accesses and vice versa. This is also known as - * a "fence." - * - * Note: "mb()" and its variants cannot be used as a fence to order - * accesses to memory mapped I/O registers. For that, mf.a needs to - * be used. However, we don't want to always use mf.a because (a) - * it's (presumably) much slower than mf and (b) mf.a is supported for - * sequential memory pages only. - */ - -#define mb() ia64_mf() -#define rmb() mb() -#define wmb() mb() - -#define smp_store_release(p, v) \ -do { \ - barrier(); \ - WRITE_ONCE(*p, v); \ -} while (0) - -#define smp_load_acquire(p) \ -({ \ - typeof(*p) ___p1 = READ_ONCE(*p); \ - barrier(); \ - ___p1; \ -}) - -#endif /* _TOOLS_LINUX_ASM_IA64_BARRIER_H */ diff --git a/tools/arch/ia64/include/uapi/asm/bitsperlong.h b/tools/arch/ia64/include/uapi/asm/bitsperlong.h deleted file mode 100644 index 1146d55563db..000000000000 --- a/tools/arch/ia64/include/uapi/asm/bitsperlong.h +++ /dev/null @@ -1,9 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -#ifndef __ASM_IA64_BITSPERLONG_H -#define __ASM_IA64_BITSPERLONG_H - -#define __BITS_PER_LONG 64 - -#include - -#endif /* __ASM_IA64_BITSPERLONG_H */ diff --git a/tools/arch/ia64/include/uapi/asm/mman.h b/tools/arch/ia64/include/uapi/asm/mman.h deleted file mode 100644 index 2a19bb1db4ab..000000000000 --- a/tools/arch/ia64/include/uapi/asm/mman.h +++ /dev/null @@ -1,7 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -#ifndef TOOLS_ARCH_IA64_UAPI_ASM_MMAN_FIX_H -#define TOOLS_ARCH_IA64_UAPI_ASM_MMAN_FIX_H -#include -/* MAP_32BIT is undefined on ia64, fix it for perf */ -#define MAP_32BIT 0 -#endif diff --git a/usr/include/Makefile b/usr/include/Makefile index 07796df0a295..338c81f1fcf3 100644 --- a/usr/include/Makefile +++ b/usr/include/Makefile @@ -59,12 +59,6 @@ ifeq ($(SRCARCH),arc) no-header-test += linux/bpf_perf_event.h endif -ifeq ($(SRCARCH),ia64) -no-header-test += asm/setup.h -no-header-test += asm/sigcontext.h -no-header-test += linux/if_bonding.h -endif - ifeq ($(SRCARCH),powerpc) no-header-test += linux/bpf_perf_event.h endif -- cgit v1.2.3 From f5e836884d8e55b416dfad55c29481ec1b65c1f0 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 13 Jan 2023 17:57:47 +0100 Subject: kernel: Drop IA64 support from sig_fault handlers Signed-off-by: Ard Biesheuvel --- include/linux/sched/signal.h | 17 ++++------------- include/uapi/asm-generic/siginfo.h | 5 ----- kernel/signal.c | 25 +++++-------------------- 3 files changed, 9 insertions(+), 38 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h index 0014d3adaf84..155332977239 100644 --- a/include/linux/sched/signal.h +++ b/include/linux/sched/signal.h @@ -303,20 +303,11 @@ static inline void kernel_signal_stop(void) schedule(); } -#ifdef __ia64__ -# define ___ARCH_SI_IA64(_a1, _a2, _a3) , _a1, _a2, _a3 -#else -# define ___ARCH_SI_IA64(_a1, _a2, _a3) -#endif -int force_sig_fault_to_task(int sig, int code, void __user *addr - ___ARCH_SI_IA64(int imm, unsigned int flags, unsigned long isr) - , struct task_struct *t); -int force_sig_fault(int sig, int code, void __user *addr - ___ARCH_SI_IA64(int imm, unsigned int flags, unsigned long isr)); -int send_sig_fault(int sig, int code, void __user *addr - ___ARCH_SI_IA64(int imm, unsigned int flags, unsigned long isr) - , struct task_struct *t); +int force_sig_fault_to_task(int sig, int code, void __user *addr, + struct task_struct *t); +int force_sig_fault(int sig, int code, void __user *addr); +int send_sig_fault(int sig, int code, void __user *addr, struct task_struct *t); int force_sig_mceerr(int code, void __user *, short); int send_sig_mceerr(int code, void __user *, short, struct task_struct *); diff --git a/include/uapi/asm-generic/siginfo.h b/include/uapi/asm-generic/siginfo.h index 0f52d0ac47c5..b7bc545ec3b2 100644 --- a/include/uapi/asm-generic/siginfo.h +++ b/include/uapi/asm-generic/siginfo.h @@ -68,11 +68,6 @@ union __sifields { /* SIGILL, SIGFPE, SIGSEGV, SIGBUS, SIGTRAP, SIGEMT */ struct { void __user *_addr; /* faulting insn/memory ref. */ -#ifdef __ia64__ - int _imm; /* immediate value for "break" */ - unsigned int _flags; /* see ia64 si_flags */ - unsigned long _isr; /* isr */ -#endif #define __ADDR_BND_PKEY_PAD (__alignof__(void *) < sizeof(short) ? \ sizeof(short) : __alignof__(void *)) diff --git a/kernel/signal.c b/kernel/signal.c index 09019017d669..26d9f66e5364 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -1718,9 +1718,8 @@ void force_sigsegv(int sig) force_sig(SIGSEGV); } -int force_sig_fault_to_task(int sig, int code, void __user *addr - ___ARCH_SI_IA64(int imm, unsigned int flags, unsigned long isr) - , struct task_struct *t) +int force_sig_fault_to_task(int sig, int code, void __user *addr, + struct task_struct *t) { struct kernel_siginfo info; @@ -1729,24 +1728,15 @@ int force_sig_fault_to_task(int sig, int code, void __user *addr info.si_errno = 0; info.si_code = code; info.si_addr = addr; -#ifdef __ia64__ - info.si_imm = imm; - info.si_flags = flags; - info.si_isr = isr; -#endif return force_sig_info_to_task(&info, t, HANDLER_CURRENT); } -int force_sig_fault(int sig, int code, void __user *addr - ___ARCH_SI_IA64(int imm, unsigned int flags, unsigned long isr)) +int force_sig_fault(int sig, int code, void __user *addr) { - return force_sig_fault_to_task(sig, code, addr - ___ARCH_SI_IA64(imm, flags, isr), current); + return force_sig_fault_to_task(sig, code, addr, current); } -int send_sig_fault(int sig, int code, void __user *addr - ___ARCH_SI_IA64(int imm, unsigned int flags, unsigned long isr) - , struct task_struct *t) +int send_sig_fault(int sig, int code, void __user *addr, struct task_struct *t) { struct kernel_siginfo info; @@ -1755,11 +1745,6 @@ int send_sig_fault(int sig, int code, void __user *addr info.si_errno = 0; info.si_code = code; info.si_addr = addr; -#ifdef __ia64__ - info.si_imm = imm; - info.si_flags = flags; - info.si_isr = isr; -#endif return send_sig_info(info.si_signo, &info, t); } -- cgit v1.2.3 From b089ea3cc30de85ea7e20aa66500feb4082dfbf7 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 13 Jan 2023 18:08:32 +0100 Subject: lib/raid6: Drop IA64 support Drop Itanium support from the RAID6 code, and along with it, the 16x and 32x unrolled versions, which were only used by IA64. Signed-off-by: Ard Biesheuvel --- include/linux/raid/pq.h | 2 -- lib/raid6/Makefile | 4 ++-- lib/raid6/algos.c | 4 ---- lib/raid6/int.uc | 9 --------- 4 files changed, 2 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/include/linux/raid/pq.h b/include/linux/raid/pq.h index 006e18decfad..98030accf641 100644 --- a/include/linux/raid/pq.h +++ b/include/linux/raid/pq.h @@ -84,8 +84,6 @@ extern const struct raid6_calls raid6_intx1; extern const struct raid6_calls raid6_intx2; extern const struct raid6_calls raid6_intx4; extern const struct raid6_calls raid6_intx8; -extern const struct raid6_calls raid6_intx16; -extern const struct raid6_calls raid6_intx32; extern const struct raid6_calls raid6_mmxx1; extern const struct raid6_calls raid6_mmxx2; extern const struct raid6_calls raid6_sse1x1; diff --git a/lib/raid6/Makefile b/lib/raid6/Makefile index 035b0a4db476..1c5420ff254e 100644 --- a/lib/raid6/Makefile +++ b/lib/raid6/Makefile @@ -2,7 +2,7 @@ obj-$(CONFIG_RAID6_PQ) += raid6_pq.o raid6_pq-y += algos.o recov.o tables.o int1.o int2.o int4.o \ - int8.o int16.o int32.o + int8.o raid6_pq-$(CONFIG_X86) += recov_ssse3.o recov_avx2.o mmx.o sse1.o sse2.o avx2.o avx512.o recov_avx512.o raid6_pq-$(CONFIG_ALTIVEC) += altivec1.o altivec2.o altivec4.o altivec8.o \ @@ -55,7 +55,7 @@ endif quiet_cmd_unroll = UNROLL $@ cmd_unroll = $(AWK) -v N=$* -f $(srctree)/$(src)/unroll.awk < $< > $@ -targets += int1.c int2.c int4.c int8.c int16.c int32.c +targets += int1.c int2.c int4.c int8.c $(obj)/int%.c: $(src)/int.uc $(src)/unroll.awk FORCE $(call if_changed,unroll) diff --git a/lib/raid6/algos.c b/lib/raid6/algos.c index 0ec534faf019..cd2e88ee1f14 100644 --- a/lib/raid6/algos.c +++ b/lib/raid6/algos.c @@ -80,10 +80,6 @@ const struct raid6_calls * const raid6_algos[] = { #ifdef CONFIG_CPU_HAS_LSX &raid6_lsx, #endif -#endif -#if defined(__ia64__) - &raid6_intx32, - &raid6_intx16, #endif &raid6_intx8, &raid6_intx4, diff --git a/lib/raid6/int.uc b/lib/raid6/int.uc index 558aeac9342a..1ba56c3fa482 100644 --- a/lib/raid6/int.uc +++ b/lib/raid6/int.uc @@ -41,13 +41,6 @@ typedef u32 unative_t; -/* - * IA-64 wants insane amounts of unrolling. On other architectures that - * is just a waste of space. - */ -#if ($# <= 8) || defined(__ia64__) - - /* * These sub-operations are separate inlines since they can sometimes be * specially optimized using architecture-specific hacks. @@ -152,5 +145,3 @@ const struct raid6_calls raid6_intx$# = { "int" NSTRING "x$#", 0 }; - -#endif -- cgit v1.2.3 From f42dafe3da0cd887c9d2aaa59576f2a92ee4d876 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Sun, 3 Sep 2023 21:06:57 +0200 Subject: gpiolib: unexport gpiod_set_transitory() There are no and never have been any users of gpiod_set_transitory() outside the core GPIOLIB code. Make it private. Signed-off-by: Bartosz Golaszewski Reviewed-by: Linus Walleij --- drivers/gpio/gpiolib.c | 1 - drivers/gpio/gpiolib.h | 2 ++ include/linux/gpio/consumer.h | 8 -------- 3 files changed, 2 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index 40a0022ea719..edffa0d2acaa 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -2700,7 +2700,6 @@ int gpiod_set_transitory(struct gpio_desc *desc, bool transitory) PIN_CONFIG_PERSIST_STATE, !transitory); } -EXPORT_SYMBOL_GPL(gpiod_set_transitory); /** * gpiod_is_active_low - test whether a GPIO is active-low or not diff --git a/drivers/gpio/gpiolib.h b/drivers/gpio/gpiolib.h index a0a67569300b..d1c94bd571c6 100644 --- a/drivers/gpio/gpiolib.h +++ b/drivers/gpio/gpiolib.h @@ -144,6 +144,8 @@ int gpiod_set_array_value_complex(bool raw, bool can_sleep, struct gpio_array *array_info, unsigned long *value_bitmap); +int gpiod_set_transitory(struct gpio_desc *desc, bool transitory); + extern spinlock_t gpio_lock; extern struct list_head gpio_devices; diff --git a/include/linux/gpio/consumer.h b/include/linux/gpio/consumer.h index 1c4385a00f88..6cc345440a5b 100644 --- a/include/linux/gpio/consumer.h +++ b/include/linux/gpio/consumer.h @@ -159,7 +159,6 @@ int gpiod_set_raw_array_value_cansleep(unsigned int array_size, int gpiod_set_config(struct gpio_desc *desc, unsigned long config); int gpiod_set_debounce(struct gpio_desc *desc, unsigned int debounce); -int gpiod_set_transitory(struct gpio_desc *desc, bool transitory); void gpiod_toggle_active_low(struct gpio_desc *desc); int gpiod_is_active_low(const struct gpio_desc *desc); @@ -494,13 +493,6 @@ static inline int gpiod_set_debounce(struct gpio_desc *desc, unsigned int deboun return -ENOSYS; } -static inline int gpiod_set_transitory(struct gpio_desc *desc, bool transitory) -{ - /* GPIO can never have been requested */ - WARN_ON(desc); - return -ENOSYS; -} - static inline void gpiod_toggle_active_low(struct gpio_desc *desc) { /* GPIO can never have been requested */ -- cgit v1.2.3 From 8de54392b849a612f337044d81d9859ee95ab871 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Fri, 1 Sep 2023 13:35:20 +0200 Subject: gpiolib: remove stray newline in gpio/driver.h Fix a double newline in the GPIO provider header. Signed-off-by: Bartosz Golaszewski --- include/linux/gpio/driver.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index 4f0c5d62c8f3..1571cfca65e7 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -764,7 +764,6 @@ void gpiochip_free_own_desc(struct gpio_desc *desc); int gpiochip_lock_as_irq(struct gpio_chip *gc, unsigned int offset); void gpiochip_unlock_as_irq(struct gpio_chip *gc, unsigned int offset); - struct gpio_chip *gpiod_to_chip(const struct gpio_desc *desc); #else /* CONFIG_GPIOLIB */ -- cgit v1.2.3 From 37d42ab3924919652858f836a80ab49ec7d11f1e Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Fri, 1 Sep 2023 13:34:58 +0200 Subject: gpiolib: remove unnecessary extern specifiers from the driver header 'extern' doesn't do anything for function declarations. Remove it. Signed-off-by: Bartosz Golaszewski Reviewed-by: Andy Shevchenko Reviewed-by: Linus Walleij --- include/linux/gpio/driver.h | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index 1571cfca65e7..b721422f4bfa 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -529,8 +529,7 @@ struct gpio_chip { #endif /* CONFIG_OF_GPIO */ }; -extern const char *gpiochip_is_requested(struct gpio_chip *gc, - unsigned int offset); +const char *gpiochip_is_requested(struct gpio_chip *gc, unsigned int offset); /** * for_each_requested_gpio_in_range - iterates over requested GPIOs in a given range @@ -549,9 +548,9 @@ extern const char *gpiochip_is_requested(struct gpio_chip *gc, for_each_requested_gpio_in_range(chip, i, 0, chip->ngpio, label) /* add/remove chips */ -extern int gpiochip_add_data_with_key(struct gpio_chip *gc, void *data, - struct lock_class_key *lock_key, - struct lock_class_key *request_key); +int gpiochip_add_data_with_key(struct gpio_chip *gc, void *data, + struct lock_class_key *lock_key, + struct lock_class_key *request_key); /** * gpiochip_add_data() - register a gpio_chip @@ -599,13 +598,13 @@ static inline int gpiochip_add(struct gpio_chip *gc) { return gpiochip_add_data(gc, NULL); } -extern void gpiochip_remove(struct gpio_chip *gc); -extern int devm_gpiochip_add_data_with_key(struct device *dev, struct gpio_chip *gc, void *data, - struct lock_class_key *lock_key, - struct lock_class_key *request_key); +void gpiochip_remove(struct gpio_chip *gc); +int devm_gpiochip_add_data_with_key(struct device *dev, struct gpio_chip *gc, + void *data, struct lock_class_key *lock_key, + struct lock_class_key *request_key); -extern struct gpio_chip *gpiochip_find(void *data, - int (*match)(struct gpio_chip *gc, void *data)); +struct gpio_chip *gpiochip_find(void *data, + int (*match)(struct gpio_chip *gc, void *data)); bool gpiochip_line_is_irq(struct gpio_chip *gc, unsigned int offset); int gpiochip_reqres_irq(struct gpio_chip *gc, unsigned int offset); -- cgit v1.2.3 From 2ae5c9248e06dac2c2360be26b4e25f673238337 Mon Sep 17 00:00:00 2001 From: Jeff Johnson Date: Thu, 31 Aug 2023 11:22:58 -0700 Subject: wifi: mac80211: Use flexible array in struct ieee80211_tim_ie Currently struct ieee80211_tim_ie defines: u8 virtual_map[1]; Per the guidance in [1] change this to be a flexible array. Per the discussion in [2] wrap the virtual_map in a union with a u8 item in order to preserve the existing expectation that the virtual_map must contain at least one octet (at least when used in a non-S1G PPDU). This means that no driver changes are required. [1] https://docs.kernel.org/process/deprecated.html#zero-length-and-one-element-arrays [2] https://lore.kernel.org/linux-wireless/202308301529.AC90A9EF98@keescook/ Suggested-by: Kees Cook Signed-off-by: Jeff Johnson Reviewed-by: Kees Cook Link: https://lore.kernel.org/r/20230831-ieee80211_tim_ie-v3-2-e10ff584ab5d@quicinc.com [add wifi prefix] Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index bd2f6e19c357..340d7e0f6bf7 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -951,17 +951,24 @@ struct ieee80211_wide_bw_chansw_ie { * @dtim_count: DTIM Count * @dtim_period: DTIM Period * @bitmap_ctrl: Bitmap Control + * @required_octet: "Syntatic sugar" to force the struct size to the + * minimum valid size when carried in a non-S1G PPDU * @virtual_map: Partial Virtual Bitmap * * This structure represents the payload of the "TIM element" as - * described in IEEE Std 802.11-2020 section 9.4.2.5. + * described in IEEE Std 802.11-2020 section 9.4.2.5. Note that this + * definition is only applicable when the element is carried in a + * non-S1G PPDU. When the TIM is carried in an S1G PPDU, the Bitmap + * Control and Partial Virtual Bitmap may not be present. */ struct ieee80211_tim_ie { u8 dtim_count; u8 dtim_period; u8 bitmap_ctrl; - /* variable size: 1 - 251 bytes */ - u8 virtual_map[1]; + union { + u8 required_octet; + DECLARE_FLEX_ARRAY(u8, virtual_map); + }; } __packed; /** -- cgit v1.2.3 From 0d423c4a78984dd02f6596d6fd9dd40446eec517 Mon Sep 17 00:00:00 2001 From: Alexey Romanov Date: Wed, 30 Aug 2023 17:08:50 +0300 Subject: drivers: meson: sm: correct meson_sm_* API retval handling 1. Following the ARM SMC32 calling convention, the return value from secure monitor is a 32-bit signed integer. This patch changes the type of the return value of the function meson_sm_call(). 2. Now, when meson_sm_call() returns a 32-bit signed integer, we need to ensure that this value is not negative. It is important to check that the return value is not negative in both the meson_sm_call_read() and meson_sm_call_write() functions. 3. Add a comment explaining why it is necessary to check if the SMC return value is equal to 0 in the function meson_sm_call_read(). It is not obvious when reading this code. Signed-off-by: Alexey Romanov Reviewed-by: Neil Armstrong Link: https://lore.kernel.org/r/20230830140850.17130-1-avromanov@salutedevices.com Signed-off-by: Neil Armstrong --- drivers/firmware/meson/meson_sm.c | 20 +++++++++++++------- include/linux/firmware/meson/meson_sm.h | 2 +- 2 files changed, 14 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/drivers/firmware/meson/meson_sm.c b/drivers/firmware/meson/meson_sm.c index 9a2656d73600..53bf56e18743 100644 --- a/drivers/firmware/meson/meson_sm.c +++ b/drivers/firmware/meson/meson_sm.c @@ -67,7 +67,7 @@ static u32 meson_sm_get_cmd(const struct meson_sm_chip *chip, return cmd->smc_id; } -static u32 __meson_sm_call(u32 cmd, u32 arg0, u32 arg1, u32 arg2, +static s32 __meson_sm_call(u32 cmd, u32 arg0, u32 arg1, u32 arg2, u32 arg3, u32 arg4) { struct arm_smccc_res res; @@ -102,9 +102,10 @@ static void __iomem *meson_sm_map_shmem(u32 cmd_shmem, unsigned int size) * Return: 0 on success, a negative value on error */ int meson_sm_call(struct meson_sm_firmware *fw, unsigned int cmd_index, - u32 *ret, u32 arg0, u32 arg1, u32 arg2, u32 arg3, u32 arg4) + s32 *ret, u32 arg0, u32 arg1, u32 arg2, u32 arg3, u32 arg4) { - u32 cmd, lret; + u32 cmd; + s32 lret; if (!fw->chip) return -ENOENT; @@ -143,7 +144,7 @@ int meson_sm_call_read(struct meson_sm_firmware *fw, void *buffer, unsigned int bsize, unsigned int cmd_index, u32 arg0, u32 arg1, u32 arg2, u32 arg3, u32 arg4) { - u32 size; + s32 size; int ret; if (!fw->chip) @@ -158,11 +159,16 @@ int meson_sm_call_read(struct meson_sm_firmware *fw, void *buffer, if (meson_sm_call(fw, cmd_index, &size, arg0, arg1, arg2, arg3, arg4) < 0) return -EINVAL; - if (size > bsize) + if (size < 0 || size > bsize) return -EINVAL; ret = size; + /* In some cases (for example GET_CHIP_ID command), + * SMC doesn't return the number of bytes read, even + * though the bytes were actually read into sm_shmem_out. + * So this check is needed. + */ if (!size) size = bsize; @@ -192,7 +198,7 @@ int meson_sm_call_write(struct meson_sm_firmware *fw, void *buffer, unsigned int size, unsigned int cmd_index, u32 arg0, u32 arg1, u32 arg2, u32 arg3, u32 arg4) { - u32 written; + s32 written; if (!fw->chip) return -ENOENT; @@ -208,7 +214,7 @@ int meson_sm_call_write(struct meson_sm_firmware *fw, void *buffer, if (meson_sm_call(fw, cmd_index, &written, arg0, arg1, arg2, arg3, arg4) < 0) return -EINVAL; - if (!written) + if (written <= 0 || written > size) return -EINVAL; return written; diff --git a/include/linux/firmware/meson/meson_sm.h b/include/linux/firmware/meson/meson_sm.h index 95b0da2326a9..8eaf8922ab02 100644 --- a/include/linux/firmware/meson/meson_sm.h +++ b/include/linux/firmware/meson/meson_sm.h @@ -19,7 +19,7 @@ enum { struct meson_sm_firmware; int meson_sm_call(struct meson_sm_firmware *fw, unsigned int cmd_index, - u32 *ret, u32 arg0, u32 arg1, u32 arg2, u32 arg3, u32 arg4); + s32 *ret, u32 arg0, u32 arg1, u32 arg2, u32 arg3, u32 arg4); int meson_sm_call_write(struct meson_sm_firmware *fw, void *buffer, unsigned int b_size, unsigned int cmd_index, u32 arg0, u32 arg1, u32 arg2, u32 arg3, u32 arg4); -- cgit v1.2.3 From 3e15dcf77b23b8e9b9b7f3c0d4def8fe9c12c534 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Fri, 8 Sep 2023 16:28:59 +0300 Subject: fs: rename __mnt_{want,drop}_write*() helpers Before exporting these helpers to modules, make their names more meaningful. The names mnt_{get,put)_write_access*() were chosen, because they rhyme with the inode {get,put)_write_access() helpers, which have a very close meaning for the inode object. Suggested-by: Christian Brauner Link: https://lore.kernel.org/r/20230817-anfechtbar-ruhelosigkeit-8c6cca8443fc@brauner/ Signed-off-by: Amir Goldstein Message-Id: <20230908132900.2983519-2-amir73il@gmail.com> Signed-off-by: Christian Brauner --- fs/inode.c | 8 ++++---- fs/internal.h | 12 ++++++------ fs/namespace.c | 34 +++++++++++++++++----------------- fs/open.c | 2 +- include/linux/mount.h | 4 ++-- kernel/acct.c | 4 ++-- 6 files changed, 32 insertions(+), 32 deletions(-) (limited to 'include/linux') diff --git a/fs/inode.c b/fs/inode.c index 35fd688168c5..7febdc9fd1a9 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -2006,7 +2006,7 @@ void touch_atime(const struct path *path) if (!sb_start_write_trylock(inode->i_sb)) return; - if (__mnt_want_write(mnt) != 0) + if (mnt_get_write_access(mnt) != 0) goto skip_update; /* * File systems can error out when updating inodes if they need to @@ -2018,7 +2018,7 @@ void touch_atime(const struct path *path) * of the fs read only, e.g. subvolumes in Btrfs. */ inode_update_time(inode, S_ATIME); - __mnt_drop_write(mnt); + mnt_put_write_access(mnt); skip_update: sb_end_write(inode->i_sb); } @@ -2173,9 +2173,9 @@ static int __file_update_time(struct file *file, int sync_mode) struct inode *inode = file_inode(file); /* try to update time settings */ - if (!__mnt_want_write_file(file)) { + if (!mnt_get_write_access_file(file)) { ret = inode_update_time(inode, sync_mode); - __mnt_drop_write_file(file); + mnt_put_write_access_file(file); } return ret; diff --git a/fs/internal.h b/fs/internal.h index d64ae03998cc..8260c738980c 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -73,8 +73,8 @@ extern int sb_prepare_remount_readonly(struct super_block *); extern void __init mnt_init(void); -extern int __mnt_want_write_file(struct file *); -extern void __mnt_drop_write_file(struct file *); +int mnt_get_write_access_file(struct file *file); +void mnt_put_write_access_file(struct file *file); extern void dissolve_on_fput(struct vfsmount *); extern bool may_mount(void); @@ -101,7 +101,7 @@ static inline void put_file_access(struct file *file) i_readcount_dec(file->f_inode); } else if (file->f_mode & FMODE_WRITER) { put_write_access(file->f_inode); - __mnt_drop_write(file->f_path.mnt); + mnt_put_write_access(file->f_path.mnt); } } @@ -130,9 +130,9 @@ static inline void sb_start_ro_state_change(struct super_block *sb) * mnt_is_readonly() making sure if mnt_is_readonly() sees SB_RDONLY * cleared, it will see s_readonly_remount set. * For RW->RO transition, the barrier pairs with the barrier in - * __mnt_want_write() before the mnt_is_readonly() check. The barrier - * makes sure if __mnt_want_write() sees MNT_WRITE_HOLD already - * cleared, it will see s_readonly_remount set. + * mnt_get_write_access() before the mnt_is_readonly() check. + * The barrier makes sure if mnt_get_write_access() sees MNT_WRITE_HOLD + * already cleared, it will see s_readonly_remount set. */ smp_wmb(); } diff --git a/fs/namespace.c b/fs/namespace.c index e157efc54023..3fe7c0484e6a 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -330,16 +330,16 @@ static int mnt_is_readonly(struct vfsmount *mnt) * can determine when writes are able to occur to a filesystem. */ /** - * __mnt_want_write - get write access to a mount without freeze protection + * mnt_get_write_access - get write access to a mount without freeze protection * @m: the mount on which to take a write * * This tells the low-level filesystem that a write is about to be performed to * it, and makes sure that writes are allowed (mnt it read-write) before * returning success. This operation does not protect against filesystem being - * frozen. When the write operation is finished, __mnt_drop_write() must be + * frozen. When the write operation is finished, mnt_put_write_access() must be * called. This is effectively a refcount. */ -int __mnt_want_write(struct vfsmount *m) +int mnt_get_write_access(struct vfsmount *m) { struct mount *mnt = real_mount(m); int ret = 0; @@ -401,7 +401,7 @@ int mnt_want_write(struct vfsmount *m) int ret; sb_start_write(m->mnt_sb); - ret = __mnt_want_write(m); + ret = mnt_get_write_access(m); if (ret) sb_end_write(m->mnt_sb); return ret; @@ -409,15 +409,15 @@ int mnt_want_write(struct vfsmount *m) EXPORT_SYMBOL_GPL(mnt_want_write); /** - * __mnt_want_write_file - get write access to a file's mount + * mnt_get_write_access_file - get write access to a file's mount * @file: the file who's mount on which to take a write * - * This is like __mnt_want_write, but if the file is already open for writing it + * This is like mnt_get_write_access, but if @file is already open for write it * skips incrementing mnt_writers (since the open file already has a reference) * and instead only does the check for emergency r/o remounts. This must be - * paired with __mnt_drop_write_file. + * paired with mnt_put_write_access_file. */ -int __mnt_want_write_file(struct file *file) +int mnt_get_write_access_file(struct file *file) { if (file->f_mode & FMODE_WRITER) { /* @@ -428,7 +428,7 @@ int __mnt_want_write_file(struct file *file) return -EROFS; return 0; } - return __mnt_want_write(file->f_path.mnt); + return mnt_get_write_access(file->f_path.mnt); } /** @@ -445,7 +445,7 @@ int mnt_want_write_file(struct file *file) int ret; sb_start_write(file_inode(file)->i_sb); - ret = __mnt_want_write_file(file); + ret = mnt_get_write_access_file(file); if (ret) sb_end_write(file_inode(file)->i_sb); return ret; @@ -453,14 +453,14 @@ int mnt_want_write_file(struct file *file) EXPORT_SYMBOL_GPL(mnt_want_write_file); /** - * __mnt_drop_write - give up write access to a mount + * mnt_put_write_access - give up write access to a mount * @mnt: the mount on which to give up write access * * Tells the low-level filesystem that we are done * performing writes to it. Must be matched with - * __mnt_want_write() call above. + * mnt_get_write_access() call above. */ -void __mnt_drop_write(struct vfsmount *mnt) +void mnt_put_write_access(struct vfsmount *mnt) { preempt_disable(); mnt_dec_writers(real_mount(mnt)); @@ -477,20 +477,20 @@ void __mnt_drop_write(struct vfsmount *mnt) */ void mnt_drop_write(struct vfsmount *mnt) { - __mnt_drop_write(mnt); + mnt_put_write_access(mnt); sb_end_write(mnt->mnt_sb); } EXPORT_SYMBOL_GPL(mnt_drop_write); -void __mnt_drop_write_file(struct file *file) +void mnt_put_write_access_file(struct file *file) { if (!(file->f_mode & FMODE_WRITER)) - __mnt_drop_write(file->f_path.mnt); + mnt_put_write_access(file->f_path.mnt); } void mnt_drop_write_file(struct file *file) { - __mnt_drop_write_file(file); + mnt_put_write_access_file(file); sb_end_write(file_inode(file)->i_sb); } EXPORT_SYMBOL(mnt_drop_write_file); diff --git a/fs/open.c b/fs/open.c index 98f6601fbac6..a65ce47810cf 100644 --- a/fs/open.c +++ b/fs/open.c @@ -895,7 +895,7 @@ static int do_dentry_open(struct file *f, error = get_write_access(inode); if (unlikely(error)) goto cleanup_file; - error = __mnt_want_write(f->f_path.mnt); + error = mnt_get_write_access(f->f_path.mnt); if (unlikely(error)) { put_write_access(inode); goto cleanup_file; diff --git a/include/linux/mount.h b/include/linux/mount.h index 4f40b40306d0..ac3dd2876197 100644 --- a/include/linux/mount.h +++ b/include/linux/mount.h @@ -92,8 +92,8 @@ extern bool __mnt_is_readonly(struct vfsmount *mnt); extern bool mnt_may_suid(struct vfsmount *mnt); extern struct vfsmount *clone_private_mount(const struct path *path); -extern int __mnt_want_write(struct vfsmount *); -extern void __mnt_drop_write(struct vfsmount *); +int mnt_get_write_access(struct vfsmount *mnt); +void mnt_put_write_access(struct vfsmount *mnt); extern struct vfsmount *fc_mount(struct fs_context *fc); extern struct vfsmount *vfs_create_mount(struct fs_context *fc); diff --git a/kernel/acct.c b/kernel/acct.c index 1a9f929fe629..986c8214dabf 100644 --- a/kernel/acct.c +++ b/kernel/acct.c @@ -246,7 +246,7 @@ static int acct_on(struct filename *pathname) filp_close(file, NULL); return PTR_ERR(internal); } - err = __mnt_want_write(internal); + err = mnt_get_write_access(internal); if (err) { mntput(internal); kfree(acct); @@ -271,7 +271,7 @@ static int acct_on(struct filename *pathname) old = xchg(&ns->bacct, &acct->pin); mutex_unlock(&acct->lock); pin_kill(old); - __mnt_drop_write(mnt); + mnt_put_write_access(mnt); mntput(mnt); return 0; } -- cgit v1.2.3 From fa671e4f1556e2c18e5443f777a75ae041290068 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Thu, 7 Sep 2023 10:52:03 +0200 Subject: fbdev/core: Unexport logo helpers The interfaces for the fbdev logo are not used outside of the fbdev module. Hence declare the fbdev logo functions in the internal header file and remove their symbol exports. Only build the functions if CONFIG_LOGO has been selected. Signed-off-by: Thomas Zimmermann Acked-by: Javier Martinez Canillas Link: https://patchwork.freedesktop.org/patch/msgid/20230907085408.9354-5-tzimmermann@suse.de --- drivers/video/fbdev/core/fb_internal.h | 16 ++++++++++++++++ drivers/video/fbdev/core/fbmem.c | 5 ----- include/linux/fb.h | 5 ----- 3 files changed, 16 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/drivers/video/fbdev/core/fb_internal.h b/drivers/video/fbdev/core/fb_internal.h index 4c8d509a0026..1116faefa034 100644 --- a/drivers/video/fbdev/core/fb_internal.h +++ b/drivers/video/fbdev/core/fb_internal.h @@ -21,6 +21,22 @@ static inline void fb_unregister_chrdev(void) #endif /* fbmem.c */ +#if defined(CONFIG_LOGO) +extern bool fb_center_logo; +extern int fb_logo_count; +int fb_prepare_logo(struct fb_info *fb_info, int rotate); +int fb_show_logo(struct fb_info *fb_info, int rotate); +#else +static inline int fb_prepare_logo(struct fb_info *info, int rotate) +{ + return 0; +} +static inline int fb_show_logo(struct fb_info *info, int rotate) +{ + return 0; +} +#endif /* CONFIG_LOGO */ + extern struct class *fb_class; extern struct mutex registration_lock; extern struct fb_info *registered_fb[FB_MAX]; diff --git a/drivers/video/fbdev/core/fbmem.c b/drivers/video/fbdev/core/fbmem.c index 98e1847e4287..ee25ac38737d 100644 --- a/drivers/video/fbdev/core/fbmem.c +++ b/drivers/video/fbdev/core/fbmem.c @@ -696,12 +696,7 @@ int fb_show_logo(struct fb_info *info, int rotate) return y; } -#else -int fb_prepare_logo(struct fb_info *info, int rotate) { return 0; } -int fb_show_logo(struct fb_info *info, int rotate) { return 0; } #endif /* CONFIG_LOGO */ -EXPORT_SYMBOL(fb_prepare_logo); -EXPORT_SYMBOL(fb_show_logo); int fb_pan_display(struct fb_info *info, struct fb_var_screeninfo *var) diff --git a/include/linux/fb.h b/include/linux/fb.h index 16c3e6d6c55d..d110676c9c83 100644 --- a/include/linux/fb.h +++ b/include/linux/fb.h @@ -591,8 +591,6 @@ extern ssize_t fb_sys_write(struct fb_info *info, const char __user *buf, /* drivers/video/fbmem.c */ extern int register_framebuffer(struct fb_info *fb_info); extern void unregister_framebuffer(struct fb_info *fb_info); -extern int fb_prepare_logo(struct fb_info *fb_info, int rotate); -extern int fb_show_logo(struct fb_info *fb_info, int rotate); extern char* fb_get_buffer_offset(struct fb_info *info, struct fb_pixmap *buf, u32 size); extern void fb_pad_unaligned_buffer(u8 *dst, u32 d_pitch, u8 *src, u32 idx, u32 height, u32 shift_high, u32 shift_low, u32 mod); @@ -603,9 +601,6 @@ extern int fb_get_color_depth(struct fb_var_screeninfo *var, extern int fb_get_options(const char *name, char **option); extern int fb_new_modelist(struct fb_info *info); -extern bool fb_center_logo; -extern int fb_logo_count; - static inline void lock_fb_info(struct fb_info *info) { mutex_lock(&info->lock); -- cgit v1.2.3 From ebc7abb35b258152d4a424f89d7c03db1d7ce61c Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 7 Sep 2023 20:18:56 +0200 Subject: thermal: Constify the trip argument of the .get_trend() zone callback Add 'const' to the definition of the 'trip' argument of the .get_trend() thermal zone callback to indicate that the trip point passed to it should not be modified by it and adjust the callback functions implementing it, thermal_get_trend() in the ACPI thermal driver and __ti_thermal_get_trend(), accordingly. No intentional functional impact. Signed-off-by: Rafael J. Wysocki Reviewed-by: Michal Wilczynski --- drivers/acpi/thermal.c | 2 +- drivers/thermal/ti-soc-thermal/ti-thermal-common.c | 3 ++- include/linux/thermal.h | 4 ++-- 3 files changed, 5 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/drivers/acpi/thermal.c b/drivers/acpi/thermal.c index f14e68266ccd..312730f8272e 100644 --- a/drivers/acpi/thermal.c +++ b/drivers/acpi/thermal.c @@ -492,7 +492,7 @@ static int thermal_get_temp(struct thermal_zone_device *thermal, int *temp) } static int thermal_get_trend(struct thermal_zone_device *thermal, - struct thermal_trip *trip, + const struct thermal_trip *trip, enum thermal_trend *trend) { struct acpi_thermal *tz = thermal_zone_device_priv(thermal); diff --git a/drivers/thermal/ti-soc-thermal/ti-thermal-common.c b/drivers/thermal/ti-soc-thermal/ti-thermal-common.c index 6ba2613627e1..0cf0826b805a 100644 --- a/drivers/thermal/ti-soc-thermal/ti-thermal-common.c +++ b/drivers/thermal/ti-soc-thermal/ti-thermal-common.c @@ -110,7 +110,8 @@ static inline int __ti_thermal_get_temp(struct thermal_zone_device *tz, int *tem } static int __ti_thermal_get_trend(struct thermal_zone_device *tz, - struct thermal_trip *trip, enum thermal_trend *trend) + const struct thermal_trip *trip, + enum thermal_trend *trend) { struct ti_thermal_data *data = thermal_zone_device_priv(tz); struct ti_bandgap *bgp; diff --git a/include/linux/thermal.h b/include/linux/thermal.h index c99440aac1a1..a5ae4af955ff 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -80,8 +80,8 @@ struct thermal_zone_device_ops { int (*set_trip_hyst) (struct thermal_zone_device *, int, int); int (*get_crit_temp) (struct thermal_zone_device *, int *); int (*set_emul_temp) (struct thermal_zone_device *, int); - int (*get_trend) (struct thermal_zone_device *, struct thermal_trip *, - enum thermal_trend *); + int (*get_trend) (struct thermal_zone_device *, + const struct thermal_trip *, enum thermal_trend *); void (*hot)(struct thermal_zone_device *); void (*critical)(struct thermal_zone_device *); }; -- cgit v1.2.3 From ef7d9593390a050c50eba5fc02d2cb65a1104434 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 11 Sep 2023 08:39:04 -0700 Subject: xfs: remove CPU hotplug infrastructure There are no users of the cpu hotplug hooks in xfs now, so remove it. This reverts f1653c2e2831e ("xfs: introduce CPU hotplug infrastructure"). Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner --- fs/xfs/xfs_super.c | 42 +----------------------------------------- include/linux/cpuhotplug.h | 1 - 2 files changed, 1 insertion(+), 42 deletions(-) (limited to 'include/linux') diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 5cced7713cd2..c8a2dae1dd65 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -2294,39 +2294,6 @@ xfs_destroy_workqueues(void) destroy_workqueue(xfs_alloc_wq); } -#ifdef CONFIG_HOTPLUG_CPU -static int -xfs_cpu_dead( - unsigned int cpu) -{ - return 0; -} - -static int __init -xfs_cpu_hotplug_init(void) -{ - int error; - - error = cpuhp_setup_state_nocalls(CPUHP_XFS_DEAD, "xfs:dead", NULL, - xfs_cpu_dead); - if (error < 0) - xfs_alert(NULL, -"Failed to initialise CPU hotplug, error %d. XFS is non-functional.", - error); - return error; -} - -static void -xfs_cpu_hotplug_destroy(void) -{ - cpuhp_remove_state_nocalls(CPUHP_XFS_DEAD); -} - -#else /* !CONFIG_HOTPLUG_CPU */ -static inline int xfs_cpu_hotplug_init(void) { return 0; } -static inline void xfs_cpu_hotplug_destroy(void) {} -#endif - STATIC int __init init_xfs_fs(void) { @@ -2343,13 +2310,9 @@ init_xfs_fs(void) xfs_dir_startup(); - error = xfs_cpu_hotplug_init(); - if (error) - goto out; - error = xfs_init_caches(); if (error) - goto out_destroy_hp; + goto out; error = xfs_init_workqueues(); if (error) @@ -2433,8 +2396,6 @@ init_xfs_fs(void) xfs_destroy_workqueues(); out_destroy_caches: xfs_destroy_caches(); - out_destroy_hp: - xfs_cpu_hotplug_destroy(); out: return error; } @@ -2458,7 +2419,6 @@ exit_xfs_fs(void) xfs_destroy_workqueues(); xfs_destroy_caches(); xfs_uuid_table_free(); - xfs_cpu_hotplug_destroy(); } module_init(init_xfs_fs); diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 06dda85f0424..068f7738be22 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -90,7 +90,6 @@ enum cpuhp_state { CPUHP_FS_BUFF_DEAD, CPUHP_PRINTK_DEAD, CPUHP_MM_MEMCQ_DEAD, - CPUHP_XFS_DEAD, CPUHP_PERCPU_CNT_DEAD, CPUHP_RADIX_DEAD, CPUHP_PAGE_ALLOC, -- cgit v1.2.3 From 5b404fdabacf4bee92d8c66013402a85f18a6a10 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 15 Aug 2023 15:46:32 -0700 Subject: rcu: Add RCU CPU stall notifier It is sometimes helpful to have a way for the subsystem causing the stall to dump its state when an RCU CPU stall occurs. This commit therefore bases rcu_stall_chain_notifier_register() and rcu_stall_chain_notifier_unregister() on atomic notifiers in order to provide this functionality. Signed-off-by: Paul E. McKenney Cc: Steven Rostedt Signed-off-by: Frederic Weisbecker --- include/linux/rcu_notifier.h | 32 ++++++++++++++++++++++++ kernel/rcu/rcu.h | 6 +++++ kernel/rcu/tree_exp.h | 6 ++++- kernel/rcu/tree_stall.h | 59 +++++++++++++++++++++++++++++++++++++++++++- 4 files changed, 101 insertions(+), 2 deletions(-) create mode 100644 include/linux/rcu_notifier.h (limited to 'include/linux') diff --git a/include/linux/rcu_notifier.h b/include/linux/rcu_notifier.h new file mode 100644 index 000000000000..ebf371364581 --- /dev/null +++ b/include/linux/rcu_notifier.h @@ -0,0 +1,32 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* + * Read-Copy Update notifiers, initially RCU CPU stall notifier. + * Separate from rcupdate.h to avoid #include loops. + * + * Copyright (C) 2023 Paul E. McKenney. + */ + +#ifndef __LINUX_RCU_NOTIFIER_H +#define __LINUX_RCU_NOTIFIER_H + +// Actions for RCU CPU stall notifier calls. +#define RCU_STALL_NOTIFY_NORM 1 +#define RCU_STALL_NOTIFY_EXP 2 + +#ifdef CONFIG_RCU_STALL_COMMON + +#include +#include + +int rcu_stall_chain_notifier_register(struct notifier_block *n); +int rcu_stall_chain_notifier_unregister(struct notifier_block *n); + +#else // #ifdef CONFIG_RCU_STALL_COMMON + +// No RCU CPU stall warnings in Tiny RCU. +static inline int rcu_stall_chain_notifier_register(struct notifier_block *n) { return -EEXIST; } +static inline int rcu_stall_chain_notifier_unregister(struct notifier_block *n) { return -ENOENT; } + +#endif // #else // #ifdef CONFIG_RCU_STALL_COMMON + +#endif /* __LINUX_RCU_NOTIFIER_H */ diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h index 98e13be411af..ef3bab977407 100644 --- a/kernel/rcu/rcu.h +++ b/kernel/rcu/rcu.h @@ -654,4 +654,10 @@ static inline bool rcu_cpu_beenfullyonline(int cpu) { return true; } bool rcu_cpu_beenfullyonline(int cpu); #endif +#ifdef CONFIG_RCU_STALL_COMMON +int rcu_stall_notifier_call_chain(unsigned long val, void *v); +#else // #ifdef CONFIG_RCU_STALL_COMMON +static inline int rcu_stall_notifier_call_chain(unsigned long val, void *v) { return NOTIFY_DONE; } +#endif // #else // #ifdef CONFIG_RCU_STALL_COMMON + #endif /* __LINUX_RCU_H */ diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h index 8239b39d945b..6d7cea5d591f 100644 --- a/kernel/rcu/tree_exp.h +++ b/kernel/rcu/tree_exp.h @@ -621,10 +621,14 @@ static void synchronize_rcu_expedited_wait(void) } for (;;) { + unsigned long j; + if (synchronize_rcu_expedited_wait_once(jiffies_stall)) return; if (rcu_stall_is_suppressed()) continue; + j = jiffies; + rcu_stall_notifier_call_chain(RCU_STALL_NOTIFY_EXP, (void *)(j - jiffies_start)); trace_rcu_stall_warning(rcu_state.name, TPS("ExpeditedStall")); pr_err("INFO: %s detected expedited stalls on CPUs/tasks: {", rcu_state.name); @@ -647,7 +651,7 @@ static void synchronize_rcu_expedited_wait(void) } } pr_cont(" } %lu jiffies s: %lu root: %#lx/%c\n", - jiffies - jiffies_start, rcu_state.expedited_sequence, + j - jiffies_start, rcu_state.expedited_sequence, data_race(rnp_root->expmask), ".T"[!!data_race(rnp_root->exp_tasks)]); if (ndetected) { diff --git a/kernel/rcu/tree_stall.h b/kernel/rcu/tree_stall.h index 2443d1d4a6dc..49544f932279 100644 --- a/kernel/rcu/tree_stall.h +++ b/kernel/rcu/tree_stall.h @@ -8,6 +8,7 @@ */ #include +#include ////////////////////////////////////////////////////////////////////////////// // @@ -770,6 +771,7 @@ static void check_cpu_stall(struct rcu_data *rdp) if (kvm_check_and_clear_guest_paused()) return; + rcu_stall_notifier_call_chain(RCU_STALL_NOTIFY_NORM, (void *)j - gps); if (self_detected) { /* We haven't checked in, so go dump stack. */ print_cpu_stall(gps); @@ -790,7 +792,7 @@ static void check_cpu_stall(struct rcu_data *rdp) ////////////////////////////////////////////////////////////////////////////// // -// RCU forward-progress mechanisms, including of callback invocation. +// RCU forward-progress mechanisms, including for callback invocation. /* @@ -1042,3 +1044,58 @@ static int __init rcu_sysrq_init(void) return 0; } early_initcall(rcu_sysrq_init); + + +////////////////////////////////////////////////////////////////////////////// +// +// RCU CPU stall-warning notifiers + +static ATOMIC_NOTIFIER_HEAD(rcu_cpu_stall_notifier_list); + +/** + * rcu_stall_chain_notifier_register - Add an RCU CPU stall notifier + * @n: Entry to add. + * + * Adds an RCU CPU stall notifier to an atomic notifier chain. + * The @action passed to a notifier will be @RCU_STALL_NOTIFY_NORM or + * friends. The @data will be the duration of the stalled grace period, + * in jiffies, coerced to a void* pointer. + * + * Returns 0 on success, %-EEXIST on error. + */ +int rcu_stall_chain_notifier_register(struct notifier_block *n) +{ + return atomic_notifier_chain_register(&rcu_cpu_stall_notifier_list, n); +} +EXPORT_SYMBOL_GPL(rcu_stall_chain_notifier_register); + +/** + * rcu_stall_chain_notifier_unregister - Remove an RCU CPU stall notifier + * @n: Entry to add. + * + * Removes an RCU CPU stall notifier from an atomic notifier chain. + * + * Returns zero on success, %-ENOENT on failure. + */ +int rcu_stall_chain_notifier_unregister(struct notifier_block *n) +{ + return atomic_notifier_chain_unregister(&rcu_cpu_stall_notifier_list, n); +} +EXPORT_SYMBOL_GPL(rcu_stall_chain_notifier_unregister); + +/* + * rcu_stall_notifier_call_chain - Call functions in an RCU CPU stall notifier chain + * @val: Value passed unmodified to notifier function + * @v: Pointer passed unmodified to notifier function + * + * Calls each function in the RCU CPU stall notifier chain in turn, which + * is an atomic call chain. See atomic_notifier_call_chain() for more + * information. + * + * This is for use within RCU, hence the omission of the extra asterisk + * to indicate a non-kerneldoc format header comment. + */ +int rcu_stall_notifier_call_chain(unsigned long val, void *v) +{ + return atomic_notifier_call_chain(&rcu_cpu_stall_notifier_list, val, v); +} -- cgit v1.2.3 From fc52a64416b010c8324e2cb50070faae868521c1 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Fri, 8 Sep 2023 16:39:29 -0400 Subject: tracing/synthetic: Fix order of struct trace_dynamic_info To make handling BIG and LITTLE endian better the offset/len of dynamic fields of the synthetic events was changed into a structure of: struct trace_dynamic_info { #ifdef CONFIG_CPU_BIG_ENDIAN u16 offset; u16 len; #else u16 len; u16 offset; #endif }; to replace the manual changes of: data_offset = offset & 0xffff; data_offest = len << 16; But if you look closely, the above is: << 16 | offset Which in little endian would be in memory: offset_lo offset_hi len_lo len_hi and in big endian: len_hi len_lo offset_hi offset_lo Which if broken into a structure would be: struct trace_dynamic_info { #ifdef CONFIG_CPU_BIG_ENDIAN u16 len; u16 offset; #else u16 offset; u16 len; #endif }; Which is the opposite of what was defined. Fix this and just to be safe also add "__packed". Link: https://lore.kernel.org/all/20230908154417.5172e343@gandalf.local.home/ Link: https://lore.kernel.org/linux-trace-kernel/20230908163929.2c25f3dc@gandalf.local.home Cc: stable@vger.kernel.org Cc: Mark Rutland Tested-by: Sven Schnelle Acked-by: Masami Hiramatsu (Google) Fixes: ddeea494a16f3 ("tracing/synthetic: Use union instead of casts") Signed-off-by: Steven Rostedt (Google) --- include/linux/trace_events.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index 12f875e9e69a..21ae37e49319 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -62,13 +62,13 @@ void trace_event_printf(struct trace_iterator *iter, const char *fmt, ...); /* Used to find the offset and length of dynamic fields in trace events */ struct trace_dynamic_info { #ifdef CONFIG_CPU_BIG_ENDIAN - u16 offset; u16 len; + u16 offset; #else - u16 len; u16 offset; + u16 len; #endif -}; +} __packed; /* * The trace entry - the most basic unit of tracing. This is what -- cgit v1.2.3 From 49f776724e64c27dd861e7ac8da9d42f01d9d172 Mon Sep 17 00:00:00 2001 From: Smita Koralahalli Date: Wed, 23 Aug 2023 23:43:04 +0000 Subject: PCI/AER: Export pcie_aer_is_native() Export and move the declaration of pcie_aer_is_native() to a common header file to be reused by cxl/pci module. Signed-off-by: Smita Koralahalli Acked-by: Bjorn Helgaas Reviewed-by: Kuppuswamy Sathyanarayanan Reviewed-by: Robert Richter Reviewed-by: Jonathan Cameron Reviewed-by: Dave Jiang Link: https://lore.kernel.org/r/20230823234305.27333-3-Smita.KoralahalliChannabasappa@amd.com Signed-off-by: Dan Williams --- drivers/pci/pcie/aer.c | 1 + drivers/pci/pcie/portdrv.h | 2 -- include/linux/aer.h | 2 ++ 3 files changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c index e85ff946e8c8..9c8fd69ae5ad 100644 --- a/drivers/pci/pcie/aer.c +++ b/drivers/pci/pcie/aer.c @@ -229,6 +229,7 @@ int pcie_aer_is_native(struct pci_dev *dev) return pcie_ports_native || host->native_aer; } +EXPORT_SYMBOL_NS_GPL(pcie_aer_is_native, CXL); static int pci_enable_pcie_error_reporting(struct pci_dev *dev) { diff --git a/drivers/pci/pcie/portdrv.h b/drivers/pci/pcie/portdrv.h index 58a2b1a1cae4..1f3803bde7ee 100644 --- a/drivers/pci/pcie/portdrv.h +++ b/drivers/pci/pcie/portdrv.h @@ -29,10 +29,8 @@ extern bool pcie_ports_dpc_native; #ifdef CONFIG_PCIEAER int pcie_aer_init(void); -int pcie_aer_is_native(struct pci_dev *dev); #else static inline int pcie_aer_init(void) { return 0; } -static inline int pcie_aer_is_native(struct pci_dev *dev) { return 0; } #endif #ifdef CONFIG_HOTPLUG_PCI_PCIE diff --git a/include/linux/aer.h b/include/linux/aer.h index 2dd175f5debd..29cc10220952 100644 --- a/include/linux/aer.h +++ b/include/linux/aer.h @@ -42,11 +42,13 @@ struct aer_capability_regs { #if defined(CONFIG_PCIEAER) int pci_aer_clear_nonfatal_status(struct pci_dev *dev); +int pcie_aer_is_native(struct pci_dev *dev); #else static inline int pci_aer_clear_nonfatal_status(struct pci_dev *dev) { return -EINVAL; } +static inline int pcie_aer_is_native(struct pci_dev *dev) { return 0; } #endif void cper_print_aer(struct pci_dev *dev, int aer_severity, -- cgit v1.2.3 From 2b69987be575b92adb6c177679f3c559134f0d8f Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 16 Oct 2019 15:03:50 -0400 Subject: sched: Add task_struct->faults_disabled_mapping MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There has been a long standing page cache coherence bug with direct IO. This provides part of a mechanism to fix it, currently just used by bcachefs but potentially worth promoting to the VFS. Direct IO evicts the range of the pagecache being read or written to. For reads, we need dirty pages to be written to disk, so that the read doesn't return stale data. For writes, we need to evict that range of the pagecache so that it's not stale after the write completes. However, without a locking mechanism to prevent those pages from being re-added to the pagecache - by a buffered read or page fault - page cache inconsistency is still possible. This isn't necessarily just an issue for userspace when they're playing games; filesystems may hang arbitrary state off the pagecache, and so page cache inconsistency may cause real filesystem bugs, depending on the filesystem. This is less of an issue for iomap based filesystems, but e.g. buffer heads caches disk block mappings (!) and attaches them to the pagecache, and bcachefs attaches disk reservations to pagecache pages. This issue has been hard to fix, because - we need to add a lock (henceforth called pagecache_add_lock), which would be held for the duration of the direct IO - page faults add pages to the page cache, thus need to take the same lock - dio -> gup -> page fault thus can deadlock And we cannot enforce a lock ordering with this lock, since userspace will be controlling the lock ordering (via the fd and buffer arguments to direct IOs), so we need a different method of deadlock avoidance. We need to tell the page fault handler that we're already holding a pagecache_add_lock, and since plumbing it through the entire gup() path would be highly impractical this adds a field to task_struct. Then the full method is: - in the dio path, when we first take the pagecache_add_lock, note the mapping in the current task_struct - in the page fault handler, if faults_disabled_mapping is set, we check if it's the same mapping as the one we're taking a page fault for, and if so return an error. Then we check lock ordering: if there's a lock ordering violation and trylock fails, we'll have to cycle the locks and return an error that tells the DIO path to retry: faults_disabled_mapping is also used for signalling "locks were dropped, please retry". Also relevant to this patch: mapping->invalidate_lock. mapping->invalidate_lock provides most of the required semantics - it's used by truncate/fallocate to block pages being added to the pagecache. However, since it's a rwsem, direct IOs would need to take the write side in order to block page cache adds, and would then be exclusive with each other - we'll need a new type of lock to pair with this approach. Signed-off-by: Kent Overstreet Cc: Jan Kara Cc: Darrick J. Wong Cc: linux-fsdevel@vger.kernel.org Cc: Andreas Grünbacher --- include/linux/sched.h | 1 + init/init_task.c | 1 + 2 files changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 77f01ac385f7..d5951e99706a 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -875,6 +875,7 @@ struct task_struct { struct mm_struct *mm; struct mm_struct *active_mm; + struct address_space *faults_disabled_mapping; int exit_state; int exit_code; diff --git a/init/init_task.c b/init/init_task.c index ff6c4b9bfe6b..f703116e0523 100644 --- a/init/init_task.c +++ b/init/init_task.c @@ -85,6 +85,7 @@ struct task_struct init_task .nr_cpus_allowed= NR_CPUS, .mm = NULL, .active_mm = &init_mm, + .faults_disabled_mapping = NULL, .restart_block = { .fn = do_no_restart_syscall, }, -- cgit v1.2.3 From 771eb4fe8b420bb8563863e242861e635c742bc2 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 9 Jul 2018 23:27:33 -0400 Subject: fs: factor out d_mark_tmpfile() New helper for bcachefs - bcachefs doesn't want the inode_dec_link_count() call that d_tmpfile does, it handles i_nlink on its own atomically with other btree updates Signed-off-by: Kent Overstreet Cc: Alexander Viro Cc: Christian Brauner Cc: linux-fsdevel@vger.kernel.org Reviewed-by: Darrick J. Wong Reviewed-by: Christian Brauner --- fs/dcache.c | 12 ++++++++++-- include/linux/dcache.h | 1 + 2 files changed, 11 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/fs/dcache.c b/fs/dcache.c index 25ac74d30bff..796e23761ba0 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -3246,11 +3246,10 @@ void d_genocide(struct dentry *parent) d_walk(parent, parent, d_genocide_kill); } -void d_tmpfile(struct file *file, struct inode *inode) +void d_mark_tmpfile(struct file *file, struct inode *inode) { struct dentry *dentry = file->f_path.dentry; - inode_dec_link_count(inode); BUG_ON(dentry->d_name.name != dentry->d_iname || !hlist_unhashed(&dentry->d_u.d_alias) || !d_unlinked(dentry)); @@ -3260,6 +3259,15 @@ void d_tmpfile(struct file *file, struct inode *inode) (unsigned long long)inode->i_ino); spin_unlock(&dentry->d_lock); spin_unlock(&dentry->d_parent->d_lock); +} +EXPORT_SYMBOL(d_mark_tmpfile); + +void d_tmpfile(struct file *file, struct inode *inode) +{ + struct dentry *dentry = file->f_path.dentry; + + inode_dec_link_count(inode); + d_mark_tmpfile(file, inode); d_instantiate(dentry, inode); } EXPORT_SYMBOL(d_tmpfile); diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 6b351e009f59..3da2f0545d5d 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -251,6 +251,7 @@ extern struct dentry * d_make_root(struct inode *); /* - the ramfs-type tree */ extern void d_genocide(struct dentry *); +extern void d_mark_tmpfile(struct file *, struct inode *); extern void d_tmpfile(struct file *, struct inode *); extern struct dentry *d_find_alias(struct inode *); -- cgit v1.2.3 From 83feeb195592b533541ff6399c8084e5b7c59677 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 25 Apr 2022 15:26:28 -0400 Subject: lib/string_helpers: string_get_size() now returns characters wrote printbuf now needs to know the number of characters that would have been written if the buffer was too small, like snprintf(); this changes string_get_size() to return the the return value of snprintf(). Signed-off-by: Kent Overstreet --- include/linux/string_helpers.h | 4 ++-- lib/string_helpers.c | 10 ++++++---- 2 files changed, 8 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/string_helpers.h b/include/linux/string_helpers.h index 9d1f5bb74dd5..58fb1f90eda5 100644 --- a/include/linux/string_helpers.h +++ b/include/linux/string_helpers.h @@ -24,8 +24,8 @@ enum string_size_units { STRING_UNITS_2, /* use binary powers of 2^10 */ }; -void string_get_size(u64 size, u64 blk_size, enum string_size_units units, - char *buf, int len); +int string_get_size(u64 size, u64 blk_size, enum string_size_units units, + char *buf, int len); int parse_int_array_user(const char __user *from, size_t count, int **array); diff --git a/lib/string_helpers.c b/lib/string_helpers.c index 9982344cca34..7713f73e66b0 100644 --- a/lib/string_helpers.c +++ b/lib/string_helpers.c @@ -31,9 +31,11 @@ * giving the size in the required units. @buf should have room for * at least 9 bytes and will always be zero terminated. * + * Return value: number of characters of output that would have been written + * (which may be greater than len, if output was truncated). */ -void string_get_size(u64 size, u64 blk_size, const enum string_size_units units, - char *buf, int len) +int string_get_size(u64 size, u64 blk_size, const enum string_size_units units, + char *buf, int len) { static const char *const units_10[] = { "B", "kB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB" @@ -126,8 +128,8 @@ void string_get_size(u64 size, u64 blk_size, const enum string_size_units units, else unit = units_str[units][i]; - snprintf(buf, len, "%u%s %s", (u32)size, - tmp, unit); + return snprintf(buf, len, "%u%s %s", (u32)size, + tmp, unit); } EXPORT_SYMBOL(string_get_size); -- cgit v1.2.3 From fe4fa2e4f7d0722c179fffa25911ea35cafadce2 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Fri, 1 Sep 2023 13:29:25 +0200 Subject: gpiolib: make gpiochip_get_desc() public It makes sense for a GPIO driver to want to get its own descriptor without requesting it. After all, the driver knows that it'll still be valid. Let's move this helper to linux/gpio/driver.h. Signed-off-by: Bartosz Golaszewski Reviewed-by: Andy Shevchenko Acked-by: Linus Walleij --- drivers/gpio/gpiolib.h | 2 -- include/linux/gpio/driver.h | 2 ++ 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/drivers/gpio/gpiolib.h b/drivers/gpio/gpiolib.h index d1c94bd571c6..9bff5c2cf720 100644 --- a/drivers/gpio/gpiolib.h +++ b/drivers/gpio/gpiolib.h @@ -122,8 +122,6 @@ struct gpio_array { unsigned long invert_mask[]; }; -struct gpio_desc *gpiochip_get_desc(struct gpio_chip *gc, unsigned int hwnum); - #define for_each_gpio_desc(gc, desc) \ for (unsigned int __i = 0; \ __i < gc->ngpio && (desc = gpiochip_get_desc(gc, __i)); \ diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index b721422f4bfa..8f0859ba7065 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -757,6 +757,8 @@ struct gpio_desc *gpiochip_request_own_desc(struct gpio_chip *gc, enum gpiod_flags dflags); void gpiochip_free_own_desc(struct gpio_desc *desc); +struct gpio_desc *gpiochip_get_desc(struct gpio_chip *gc, unsigned int hwnum); + #ifdef CONFIG_GPIOLIB /* lock/unlock as IRQ */ -- cgit v1.2.3 From 2c97d3e55b70edf33b6e7f211bab8a748a0a2bcc Mon Sep 17 00:00:00 2001 From: "Luke D. Jones" Date: Wed, 30 Aug 2023 15:22:37 +1200 Subject: platform/x86: asus-wmi: add support for ASUS screenpad Add support for the WMI methods used to turn off and adjust the brightness of the secondary "screenpad" device found on some high-end ASUS laptops like the GX650P series and others. There are some small quirks with this device when considering only the raw WMI methods: 1. The Off method can only switch the device off 2. Changing the brightness turns the device back on 3. To turn the device back on the brightness must be > 1 4. When the device is off the brightness can't be changed (so it is stored by the driver if device is off). 5. Booting with a value of 0 brightness (retained by bios) means the bios will set a value of >0 <15 6. When the device is off it is "unplugged" asus_wmi sets the minimum brightness as 20 in general use, and 60 for booting with values <= min. The ACPI methods are used in a new backlight device named asus_screenpad. Signed-off-by: Luke D. Jones Link: https://lore.kernel.org/r/20230830032237.42987-2-luke@ljones.dev Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/x86/asus-wmi.c | 133 +++++++++++++++++++++++++++++ drivers/platform/x86/asus-wmi.h | 1 + include/linux/platform_data/x86/asus-wmi.h | 4 + 3 files changed, 138 insertions(+) (limited to 'include/linux') diff --git a/drivers/platform/x86/asus-wmi.c b/drivers/platform/x86/asus-wmi.c index 9f8cea5f9615..928fc74e79b4 100644 --- a/drivers/platform/x86/asus-wmi.c +++ b/drivers/platform/x86/asus-wmi.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -127,6 +128,10 @@ module_param(fnlock_default, bool, 0444); #define NVIDIA_TEMP_MIN 75 #define NVIDIA_TEMP_MAX 87 +#define ASUS_SCREENPAD_BRIGHT_MIN 20 +#define ASUS_SCREENPAD_BRIGHT_MAX 255 +#define ASUS_SCREENPAD_BRIGHT_DEFAULT 60 + static const char * const ashs_ids[] = { "ATK4001", "ATK4002", NULL }; static int throttle_thermal_policy_write(struct asus_wmi *); @@ -212,6 +217,7 @@ struct asus_wmi { struct input_dev *inputdev; struct backlight_device *backlight_device; + struct backlight_device *screenpad_backlight_device; struct platform_device *platform_device; struct led_classdev wlan_led; @@ -3776,6 +3782,124 @@ static int is_display_toggle(int code) return 0; } +/* Screenpad backlight *******************************************************/ + +static int read_screenpad_backlight_power(struct asus_wmi *asus) +{ + int ret; + + ret = asus_wmi_get_devstate_simple(asus, ASUS_WMI_DEVID_SCREENPAD_POWER); + if (ret < 0) + return ret; + /* 1 == powered */ + return ret ? FB_BLANK_UNBLANK : FB_BLANK_POWERDOWN; +} + +static int read_screenpad_brightness(struct backlight_device *bd) +{ + struct asus_wmi *asus = bl_get_data(bd); + u32 retval; + int err; + + err = read_screenpad_backlight_power(asus); + if (err < 0) + return err; + /* The device brightness can only be read if powered, so return stored */ + if (err == FB_BLANK_POWERDOWN) + return asus->driver->screenpad_brightness - ASUS_SCREENPAD_BRIGHT_MIN; + + err = asus_wmi_get_devstate(asus, ASUS_WMI_DEVID_SCREENPAD_LIGHT, &retval); + if (err < 0) + return err; + + return (retval & ASUS_WMI_DSTS_BRIGHTNESS_MASK) - ASUS_SCREENPAD_BRIGHT_MIN; +} + +static int update_screenpad_bl_status(struct backlight_device *bd) +{ + struct asus_wmi *asus = bl_get_data(bd); + int power, err = 0; + u32 ctrl_param; + + power = read_screenpad_backlight_power(asus); + if (power < 0) + return power; + + if (bd->props.power != power) { + if (power != FB_BLANK_UNBLANK) { + /* Only brightness > 0 can power it back on */ + ctrl_param = asus->driver->screenpad_brightness - ASUS_SCREENPAD_BRIGHT_MIN; + err = asus_wmi_set_devstate(ASUS_WMI_DEVID_SCREENPAD_LIGHT, + ctrl_param, NULL); + } else { + err = asus_wmi_set_devstate(ASUS_WMI_DEVID_SCREENPAD_POWER, 0, NULL); + } + } else if (power == FB_BLANK_UNBLANK) { + /* Only set brightness if powered on or we get invalid/unsync state */ + ctrl_param = bd->props.brightness + ASUS_SCREENPAD_BRIGHT_MIN; + err = asus_wmi_set_devstate(ASUS_WMI_DEVID_SCREENPAD_LIGHT, ctrl_param, NULL); + } + + /* Ensure brightness is stored to turn back on with */ + if (err == 0) + asus->driver->screenpad_brightness = bd->props.brightness + ASUS_SCREENPAD_BRIGHT_MIN; + + return err; +} + +static const struct backlight_ops asus_screenpad_bl_ops = { + .get_brightness = read_screenpad_brightness, + .update_status = update_screenpad_bl_status, + .options = BL_CORE_SUSPENDRESUME, +}; + +static int asus_screenpad_init(struct asus_wmi *asus) +{ + struct backlight_device *bd; + struct backlight_properties props; + int err, power; + int brightness = 0; + + power = read_screenpad_backlight_power(asus); + if (power < 0) + return power; + + if (power != FB_BLANK_POWERDOWN) { + err = asus_wmi_get_devstate(asus, ASUS_WMI_DEVID_SCREENPAD_LIGHT, &brightness); + if (err < 0) + return err; + } + /* default to an acceptable min brightness on boot if too low */ + if (brightness < ASUS_SCREENPAD_BRIGHT_MIN) + brightness = ASUS_SCREENPAD_BRIGHT_DEFAULT; + + memset(&props, 0, sizeof(struct backlight_properties)); + props.type = BACKLIGHT_RAW; /* ensure this bd is last to be picked */ + props.max_brightness = ASUS_SCREENPAD_BRIGHT_MAX - ASUS_SCREENPAD_BRIGHT_MIN; + bd = backlight_device_register("asus_screenpad", + &asus->platform_device->dev, asus, + &asus_screenpad_bl_ops, &props); + if (IS_ERR(bd)) { + pr_err("Could not register backlight device\n"); + return PTR_ERR(bd); + } + + asus->screenpad_backlight_device = bd; + asus->driver->screenpad_brightness = brightness; + bd->props.brightness = brightness - ASUS_SCREENPAD_BRIGHT_MIN; + bd->props.power = power; + backlight_update_status(bd); + + return 0; +} + +static void asus_screenpad_exit(struct asus_wmi *asus) +{ + backlight_device_unregister(asus->screenpad_backlight_device); + + asus->screenpad_backlight_device = NULL; +} + /* Fn-lock ********************************************************************/ static bool asus_wmi_has_fnlock_key(struct asus_wmi *asus) @@ -4431,6 +4555,12 @@ static int asus_wmi_add(struct platform_device *pdev) } else if (asus->driver->quirks->wmi_backlight_set_devstate) err = asus_wmi_set_devstate(ASUS_WMI_DEVID_BACKLIGHT, 2, NULL); + if (asus_wmi_dev_is_present(asus, ASUS_WMI_DEVID_SCREENPAD_LIGHT)) { + err = asus_screenpad_init(asus); + if (err && err != -ENODEV) + goto fail_screenpad; + } + if (asus_wmi_has_fnlock_key(asus)) { asus->fnlock_locked = fnlock_default; asus_wmi_fnlock_update(asus); @@ -4454,6 +4584,8 @@ fail_wmi_handler: asus_wmi_backlight_exit(asus); fail_backlight: asus_wmi_rfkill_exit(asus); +fail_screenpad: + asus_screenpad_exit(asus); fail_rfkill: asus_wmi_led_exit(asus); fail_leds: @@ -4480,6 +4612,7 @@ static int asus_wmi_remove(struct platform_device *device) asus = platform_get_drvdata(device); wmi_remove_notify_handler(asus->driver->event_guid); asus_wmi_backlight_exit(asus); + asus_screenpad_exit(asus); asus_wmi_input_exit(asus); asus_wmi_led_exit(asus); asus_wmi_rfkill_exit(asus); diff --git a/drivers/platform/x86/asus-wmi.h b/drivers/platform/x86/asus-wmi.h index a478ebfd34df..5fbdd0eafa02 100644 --- a/drivers/platform/x86/asus-wmi.h +++ b/drivers/platform/x86/asus-wmi.h @@ -57,6 +57,7 @@ struct quirk_entry { struct asus_wmi_driver { int brightness; int panel_power; + int screenpad_brightness; int wlan_ctrl_by_user; const char *name; diff --git a/include/linux/platform_data/x86/asus-wmi.h b/include/linux/platform_data/x86/asus-wmi.h index 16e99a1c37fc..63e630276499 100644 --- a/include/linux/platform_data/x86/asus-wmi.h +++ b/include/linux/platform_data/x86/asus-wmi.h @@ -58,6 +58,10 @@ #define ASUS_WMI_DEVID_KBD_BACKLIGHT 0x00050021 #define ASUS_WMI_DEVID_LIGHT_SENSOR 0x00050022 /* ?? */ #define ASUS_WMI_DEVID_LIGHTBAR 0x00050025 +/* This can only be used to disable the screen, not re-enable */ +#define ASUS_WMI_DEVID_SCREENPAD_POWER 0x00050031 +/* Writing a brightness re-enables the screen if disabled */ +#define ASUS_WMI_DEVID_SCREENPAD_LIGHT 0x00050032 #define ASUS_WMI_DEVID_FAN_BOOST_MODE 0x00110018 #define ASUS_WMI_DEVID_THROTTLE_THERMAL_POLICY 0x00120075 -- cgit v1.2.3 From 4eaf928622abc5dabc9b42a0de4dafbe29ddf491 Mon Sep 17 00:00:00 2001 From: Yue Haibing Date: Fri, 11 Aug 2023 17:57:01 +0800 Subject: iio: Remove unused declarations Commit 0f3a8c3f34f7 ("iio: Add support for creating IIO devices via configfs") declared but never implemented iio_sw_device_type_configfs_{un}register(). Commit b662f809d410 ("iio: core: Introduce IIO software triggers") declared but never implemented iio_sw_trigger_type_configfs_{un}register(). Commit a3e0b51884ee ("iio: accel: add support for FXLS8962AF/FXLS8964AF accelerometers") declared but never implemented fxls8962af_core_remove(). Commit 8dedcc3eee3a ("iio: core: centralize ioctl() calls to the main chardev") declared but never implemented iio_device_ioctl(). Commit d430f3c36ca6 ("iio: imu: inv_mpu6050: Use regmap instead of i2c specific functions") removed inv_mpu6050_write_reg() but not its declaration. Signed-off-by: Yue Haibing Link: https://lore.kernel.org/r/20230811095701.35372-1-yuehaibing@huawei.com Signed-off-by: Jonathan Cameron --- drivers/iio/accel/fxls8962af.h | 1 - drivers/iio/iio_core.h | 3 --- drivers/iio/imu/inv_mpu6050/inv_mpu_iio.h | 1 - include/linux/iio/sw_device.h | 3 --- include/linux/iio/sw_trigger.h | 3 --- 5 files changed, 11 deletions(-) (limited to 'include/linux') diff --git a/drivers/iio/accel/fxls8962af.h b/drivers/iio/accel/fxls8962af.h index 9cbe98c3ba9a..6eaa2803b26f 100644 --- a/drivers/iio/accel/fxls8962af.h +++ b/drivers/iio/accel/fxls8962af.h @@ -14,7 +14,6 @@ enum { }; int fxls8962af_core_probe(struct device *dev, struct regmap *regmap, int irq); -int fxls8962af_core_remove(struct device *dev); extern const struct dev_pm_ops fxls8962af_pm_ops; extern const struct regmap_config fxls8962af_i2c_regmap_conf; diff --git a/drivers/iio/iio_core.h b/drivers/iio/iio_core.h index 501e286702ef..1a38b1915e7a 100644 --- a/drivers/iio/iio_core.h +++ b/drivers/iio/iio_core.h @@ -30,9 +30,6 @@ struct iio_ioctl_handler { unsigned int cmd, unsigned long arg); }; -long iio_device_ioctl(struct iio_dev *indio_dev, struct file *filp, - unsigned int cmd, unsigned long arg); - void iio_device_ioctl_handler_register(struct iio_dev *indio_dev, struct iio_ioctl_handler *h); void iio_device_ioctl_handler_unregister(struct iio_ioctl_handler *h); diff --git a/drivers/iio/imu/inv_mpu6050/inv_mpu_iio.h b/drivers/iio/imu/inv_mpu6050/inv_mpu_iio.h index ed5a96e78df0..95f548235de7 100644 --- a/drivers/iio/imu/inv_mpu6050/inv_mpu_iio.h +++ b/drivers/iio/imu/inv_mpu6050/inv_mpu_iio.h @@ -464,7 +464,6 @@ int inv_mpu6050_probe_trigger(struct iio_dev *indio_dev, int irq_type); int inv_mpu6050_prepare_fifo(struct inv_mpu6050_state *st, bool enable); int inv_mpu6050_switch_engine(struct inv_mpu6050_state *st, bool en, unsigned int mask); -int inv_mpu6050_write_reg(struct inv_mpu6050_state *st, int reg, u8 val); int inv_mpu_acpi_create_mux_client(struct i2c_client *client); void inv_mpu_acpi_delete_mux_client(struct i2c_client *client); int inv_mpu_core_probe(struct regmap *regmap, int irq, const char *name, diff --git a/include/linux/iio/sw_device.h b/include/linux/iio/sw_device.h index eff1e6b2595c..0f7fe7b522e3 100644 --- a/include/linux/iio/sw_device.h +++ b/include/linux/iio/sw_device.h @@ -51,9 +51,6 @@ void iio_unregister_sw_device_type(struct iio_sw_device_type *dt); struct iio_sw_device *iio_sw_device_create(const char *, const char *); void iio_sw_device_destroy(struct iio_sw_device *); -int iio_sw_device_type_configfs_register(struct iio_sw_device_type *dt); -void iio_sw_device_type_configfs_unregister(struct iio_sw_device_type *dt); - static inline void iio_swd_group_init_type_name(struct iio_sw_device *d, const char *name, diff --git a/include/linux/iio/sw_trigger.h b/include/linux/iio/sw_trigger.h index 47de2443e984..bc77f88df303 100644 --- a/include/linux/iio/sw_trigger.h +++ b/include/linux/iio/sw_trigger.h @@ -51,9 +51,6 @@ void iio_unregister_sw_trigger_type(struct iio_sw_trigger_type *tt); struct iio_sw_trigger *iio_sw_trigger_create(const char *, const char *); void iio_sw_trigger_destroy(struct iio_sw_trigger *); -int iio_sw_trigger_type_configfs_register(struct iio_sw_trigger_type *tt); -void iio_sw_trigger_type_configfs_unregister(struct iio_sw_trigger_type *tt); - static inline void iio_swt_group_init_type_name(struct iio_sw_trigger *t, const char *name, -- cgit v1.2.3 From 373beef00f7d781a000b12c31fb17a5a9c25969c Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Mon, 11 Sep 2023 15:52:57 +0100 Subject: KVM: arm64: nvhe: Ignore SVE hint in SMCCC function ID When SVE is enabled, the host may set bit 16 in SMCCC function IDs, a hint that indicates an unused SVE state. At the moment NVHE doesn't account for this bit when inspecting the function ID, and rejects most calls. Clear the hint bit before comparing function IDs. About version compatibility: the host's PSCI driver initially probes the firmware for a SMCCC version number. If the firmware implements a protocol recent enough (1.3), subsequent SMCCC calls have the hint bit set. Since the hint bit was reserved in earlier versions of the protocol, clearing it is fine regardless of the version in use. When a new hint is added to the protocol in the future, it will be added to ARM_SMCCC_CALL_HINTS and NVHE will handle it straight away. This patch only clears known hints and leaves reserved bits as is, because future SMCCC versions could use reserved bits as modifiers for the function ID, rather than hints. Fixes: cfa7ff959a78 ("arm64: smccc: Support SMCCC v1.3 SVE register saving hint") Reported-by: Ben Horgan Signed-off-by: Jean-Philippe Brucker Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230911145254.934414-4-jean-philippe@linaro.org --- arch/arm64/include/asm/kvm_hyp.h | 2 +- arch/arm64/kvm/hyp/include/nvhe/ffa.h | 2 +- arch/arm64/kvm/hyp/nvhe/ffa.c | 3 +-- arch/arm64/kvm/hyp/nvhe/hyp-init.S | 1 + arch/arm64/kvm/hyp/nvhe/hyp-main.c | 8 ++++++-- arch/arm64/kvm/hyp/nvhe/psci-relay.c | 3 +-- include/linux/arm-smccc.h | 2 ++ 7 files changed, 13 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h index b7238c72a04c..66efd67ea7e8 100644 --- a/arch/arm64/include/asm/kvm_hyp.h +++ b/arch/arm64/include/asm/kvm_hyp.h @@ -118,7 +118,7 @@ void deactivate_traps_vhe_put(struct kvm_vcpu *vcpu); u64 __guest_enter(struct kvm_vcpu *vcpu); -bool kvm_host_psci_handler(struct kvm_cpu_context *host_ctxt); +bool kvm_host_psci_handler(struct kvm_cpu_context *host_ctxt, u32 func_id); #ifdef __KVM_NVHE_HYPERVISOR__ void __noreturn __hyp_do_panic(struct kvm_cpu_context *host_ctxt, u64 spsr, diff --git a/arch/arm64/kvm/hyp/include/nvhe/ffa.h b/arch/arm64/kvm/hyp/include/nvhe/ffa.h index 1becb10ecd80..d9fd5e6c7d3c 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/ffa.h +++ b/arch/arm64/kvm/hyp/include/nvhe/ffa.h @@ -12,6 +12,6 @@ #define FFA_MAX_FUNC_NUM 0x7F int hyp_ffa_init(void *pages); -bool kvm_host_ffa_handler(struct kvm_cpu_context *host_ctxt); +bool kvm_host_ffa_handler(struct kvm_cpu_context *host_ctxt, u32 func_id); #endif /* __KVM_HYP_FFA_H */ diff --git a/arch/arm64/kvm/hyp/nvhe/ffa.c b/arch/arm64/kvm/hyp/nvhe/ffa.c index ab4f5d160c58..6e4dba9eadef 100644 --- a/arch/arm64/kvm/hyp/nvhe/ffa.c +++ b/arch/arm64/kvm/hyp/nvhe/ffa.c @@ -634,9 +634,8 @@ out_handled: return true; } -bool kvm_host_ffa_handler(struct kvm_cpu_context *host_ctxt) +bool kvm_host_ffa_handler(struct kvm_cpu_context *host_ctxt, u32 func_id) { - DECLARE_REG(u64, func_id, host_ctxt, 0); struct arm_smccc_res res; /* diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-init.S b/arch/arm64/kvm/hyp/nvhe/hyp-init.S index 90fade1b032e..1cc06e6797bd 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-init.S +++ b/arch/arm64/kvm/hyp/nvhe/hyp-init.S @@ -57,6 +57,7 @@ __do_hyp_init: cmp x0, #HVC_STUB_HCALL_NR b.lo __kvm_handle_stub_hvc + bic x0, x0, #ARM_SMCCC_CALL_HINTS mov x3, #KVM_HOST_SMCCC_FUNC(__kvm_hyp_init) cmp x0, x3 b.eq 1f diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c index 857d9bc04fd4..2385fd03ed87 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c +++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c @@ -368,6 +368,7 @@ static void handle_host_hcall(struct kvm_cpu_context *host_ctxt) if (static_branch_unlikely(&kvm_protected_mode_initialized)) hcall_min = __KVM_HOST_SMCCC_FUNC___pkvm_prot_finalize; + id &= ~ARM_SMCCC_CALL_HINTS; id -= KVM_HOST_SMCCC_ID(0); if (unlikely(id < hcall_min || id >= ARRAY_SIZE(host_hcall))) @@ -392,11 +393,14 @@ static void default_host_smc_handler(struct kvm_cpu_context *host_ctxt) static void handle_host_smc(struct kvm_cpu_context *host_ctxt) { + DECLARE_REG(u64, func_id, host_ctxt, 0); bool handled; - handled = kvm_host_psci_handler(host_ctxt); + func_id &= ~ARM_SMCCC_CALL_HINTS; + + handled = kvm_host_psci_handler(host_ctxt, func_id); if (!handled) - handled = kvm_host_ffa_handler(host_ctxt); + handled = kvm_host_ffa_handler(host_ctxt, func_id); if (!handled) default_host_smc_handler(host_ctxt); diff --git a/arch/arm64/kvm/hyp/nvhe/psci-relay.c b/arch/arm64/kvm/hyp/nvhe/psci-relay.c index 24543d2a3490..d57bcb6ab94d 100644 --- a/arch/arm64/kvm/hyp/nvhe/psci-relay.c +++ b/arch/arm64/kvm/hyp/nvhe/psci-relay.c @@ -273,9 +273,8 @@ static unsigned long psci_1_0_handler(u64 func_id, struct kvm_cpu_context *host_ } } -bool kvm_host_psci_handler(struct kvm_cpu_context *host_ctxt) +bool kvm_host_psci_handler(struct kvm_cpu_context *host_ctxt, u32 func_id) { - DECLARE_REG(u64, func_id, host_ctxt, 0); unsigned long ret; switch (kvm_host_psci_config.version) { diff --git a/include/linux/arm-smccc.h b/include/linux/arm-smccc.h index 7c67c17321d4..083f85653716 100644 --- a/include/linux/arm-smccc.h +++ b/include/linux/arm-smccc.h @@ -67,6 +67,8 @@ #define ARM_SMCCC_VERSION_1_3 0x10003 #define ARM_SMCCC_1_3_SVE_HINT 0x10000 +#define ARM_SMCCC_CALL_HINTS ARM_SMCCC_1_3_SVE_HINT + #define ARM_SMCCC_VERSION_FUNC_ID \ ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \ -- cgit v1.2.3 From 08700ec705043eb0cee01b35cf5b9d63f0230d12 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 6 Sep 2023 03:46:57 +0900 Subject: linux/export: fix reference to exported functions for parisc64 John David Anglin reported parisc has been broken since commit ddb5cdbafaaa ("kbuild: generate KSYMTAB entries by modpost"). Like ia64, parisc64 uses a function descriptor. The function references must be prefixed with P%. Also, symbols prefixed $$ from the library have the symbol type STT_LOPROC instead of STT_FUNC. They should be handled as functions too. Fixes: ddb5cdbafaaa ("kbuild: generate KSYMTAB entries by modpost") Reported-by: John David Anglin Tested-by: John David Anglin Tested-by: Helge Deller Closes: https://lore.kernel.org/linux-parisc/1901598a-e11d-f7dd-a5d9-9a69d06e6b6e@bell.net/T/#u Signed-off-by: Masahiro Yamada Signed-off-by: Helge Deller --- include/linux/export-internal.h | 2 ++ scripts/mod/modpost.c | 9 +++++++++ 2 files changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/export-internal.h b/include/linux/export-internal.h index 1c849db953a5..45fca09b2319 100644 --- a/include/linux/export-internal.h +++ b/include/linux/export-internal.h @@ -52,6 +52,8 @@ #ifdef CONFIG_IA64 #define KSYM_FUNC(name) @fptr(name) +#elif defined(CONFIG_PARISC) && defined(CONFIG_64BIT) +#define KSYM_FUNC(name) P%name #else #define KSYM_FUNC(name) name #endif diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index b29b29707f10..ba981f22908a 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -1226,6 +1226,15 @@ static void check_export_symbol(struct module *mod, struct elf_info *elf, */ s->is_func = (ELF_ST_TYPE(sym->st_info) == STT_FUNC); + /* + * For parisc64, symbols prefixed $$ from the library have the symbol type + * STT_LOPROC. They should be handled as functions too. + */ + if (elf->hdr->e_ident[EI_CLASS] == ELFCLASS64 && + elf->hdr->e_machine == EM_PARISC && + ELF_ST_TYPE(sym->st_info) == STT_LOPROC) + s->is_func = true; + if (match(secname, PATTERNS(INIT_SECTIONS))) warn("%s: %s: EXPORT_SYMBOL used for init symbol. Remove __init or EXPORT_SYMBOL.\n", mod->name, name); -- cgit v1.2.3 From 25e73b7e3f72a25aa30cbb2eecb49036e0acf066 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 2 Aug 2023 12:55:46 +0200 Subject: x86/ibt: Suppress spurious ENDBR It was reported that under certain circumstances GCC emits ENDBR instructions for _THIS_IP_ usage. Specifically, when it appears at the start of a basic block -- but not elsewhere. Since _THIS_IP_ is never used for control flow, these ENDBR instructions are completely superfluous. Override the _THIS_IP_ definition for x86_64 to avoid this. Less ENDBR instructions is better. Fixes: 156ff4a544ae ("x86/ibt: Base IBT bits") Reported-by: David Kaplan Reviewed-by: Andrew Cooper Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20230802110323.016197440@infradead.org --- arch/x86/include/asm/linkage.h | 8 ++++++++ include/linux/instruction_pointer.h | 5 +++++ 2 files changed, 13 insertions(+) (limited to 'include/linux') diff --git a/arch/x86/include/asm/linkage.h b/arch/x86/include/asm/linkage.h index 97a3de7892d3..5ff49fd67732 100644 --- a/arch/x86/include/asm/linkage.h +++ b/arch/x86/include/asm/linkage.h @@ -8,6 +8,14 @@ #undef notrace #define notrace __attribute__((no_instrument_function)) +#ifdef CONFIG_64BIT +/* + * The generic version tends to create spurious ENDBR instructions under + * certain conditions. + */ +#define _THIS_IP_ ({ unsigned long __here; asm ("lea 0(%%rip), %0" : "=r" (__here)); __here; }) +#endif + #ifdef CONFIG_X86_32 #define asmlinkage CPP_ASMLINKAGE __attribute__((regparm(0))) #endif /* CONFIG_X86_32 */ diff --git a/include/linux/instruction_pointer.h b/include/linux/instruction_pointer.h index cda1f706eaeb..aa0b3ffea935 100644 --- a/include/linux/instruction_pointer.h +++ b/include/linux/instruction_pointer.h @@ -2,7 +2,12 @@ #ifndef _LINUX_INSTRUCTION_POINTER_H #define _LINUX_INSTRUCTION_POINTER_H +#include + #define _RET_IP_ (unsigned long)__builtin_return_address(0) + +#ifndef _THIS_IP_ #define _THIS_IP_ ({ __label__ __here; __here: (unsigned long)&&__here; }) +#endif #endif /* _LINUX_INSTRUCTION_POINTER_H */ -- cgit v1.2.3 From 133c4c0d37175f510a10fa9bed51e223936073fc Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 11 Sep 2023 17:05:31 +0000 Subject: tcp: defer regular ACK while processing socket backlog This idea came after a particular workload requested the quickack attribute set on routes, and a performance drop was noticed for large bulk transfers. For high throughput flows, it is best to use one cpu running the user thread issuing socket system calls, and a separate cpu to process incoming packets from BH context. (With TSO/GRO, bottleneck is usually the 'user' cpu) Problem is the user thread can spend a lot of time while holding the socket lock, forcing BH handler to queue most of incoming packets in the socket backlog. Whenever the user thread releases the socket lock, it must first process all accumulated packets in the backlog, potentially adding latency spikes. Due to flood mitigation, having too many packets in the backlog increases chance of unexpected drops. Backlog processing unfortunately shifts a fair amount of cpu cycles from the BH cpu to the 'user' cpu, thus reducing max throughput. This patch takes advantage of the backlog processing, and the fact that ACK are mostly cumulative. The idea is to detect we are in the backlog processing and defer all eligible ACK into a single one, sent from tcp_release_cb(). This saves cpu cycles on both sides, and network resources. Performance of a single TCP flow on a 200Gbit NIC: - Throughput is increased by 20% (100Gbit -> 120Gbit). - Number of generated ACK per second shrinks from 240,000 to 40,000. - Number of backlog drops per second shrinks from 230 to 0. Benchmark context: - Regular netperf TCP_STREAM (no zerocopy) - Intel(R) Xeon(R) Platinum 8481C (Saphire Rapids) - MAX_SKB_FRAGS = 17 (~60KB per GRO packet) This feature is guarded by a new sysctl, and enabled by default: /proc/sys/net/ipv4/tcp_backlog_ack_defer Signed-off-by: Eric Dumazet Acked-by: Yuchung Cheng Acked-by: Neal Cardwell Acked-by: Soheil Hassas Yeganeh Acked-by: Dave Taht Signed-off-by: Paolo Abeni --- Documentation/networking/ip-sysctl.rst | 7 +++++++ include/linux/tcp.h | 14 ++++++++------ include/net/netns/ipv4.h | 1 + net/ipv4/sysctl_net_ipv4.c | 9 +++++++++ net/ipv4/tcp_input.c | 8 ++++++++ net/ipv4/tcp_ipv4.c | 1 + net/ipv4/tcp_output.c | 5 ++++- 7 files changed, 38 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst index a66054d0763a..5bfa1837968c 100644 --- a/Documentation/networking/ip-sysctl.rst +++ b/Documentation/networking/ip-sysctl.rst @@ -745,6 +745,13 @@ tcp_comp_sack_nr - INTEGER Default : 44 +tcp_backlog_ack_defer - BOOLEAN + If set, user thread processing socket backlog tries sending + one ACK for the whole queue. This helps to avoid potential + long latencies at end of a TCP socket syscall. + + Default : true + tcp_slow_start_after_idle - BOOLEAN If set, provide RFC2861 behavior and time out the congestion window after an idle period. An idle period is defined at diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 3c5efeeb024f..44d946161d4a 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -463,15 +463,17 @@ enum tsq_enum { TCP_MTU_REDUCED_DEFERRED, /* tcp_v{4|6}_err() could not call * tcp_v{4|6}_mtu_reduced() */ + TCP_ACK_DEFERRED, /* TX pure ack is deferred */ }; enum tsq_flags { - TSQF_THROTTLED = (1UL << TSQ_THROTTLED), - TSQF_QUEUED = (1UL << TSQ_QUEUED), - TCPF_TSQ_DEFERRED = (1UL << TCP_TSQ_DEFERRED), - TCPF_WRITE_TIMER_DEFERRED = (1UL << TCP_WRITE_TIMER_DEFERRED), - TCPF_DELACK_TIMER_DEFERRED = (1UL << TCP_DELACK_TIMER_DEFERRED), - TCPF_MTU_REDUCED_DEFERRED = (1UL << TCP_MTU_REDUCED_DEFERRED), + TSQF_THROTTLED = BIT(TSQ_THROTTLED), + TSQF_QUEUED = BIT(TSQ_QUEUED), + TCPF_TSQ_DEFERRED = BIT(TCP_TSQ_DEFERRED), + TCPF_WRITE_TIMER_DEFERRED = BIT(TCP_WRITE_TIMER_DEFERRED), + TCPF_DELACK_TIMER_DEFERRED = BIT(TCP_DELACK_TIMER_DEFERRED), + TCPF_MTU_REDUCED_DEFERRED = BIT(TCP_MTU_REDUCED_DEFERRED), + TCPF_ACK_DEFERRED = BIT(TCP_ACK_DEFERRED), }; #define tcp_sk(ptr) container_of_const(ptr, struct tcp_sock, inet_conn.icsk_inet.sk) diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 7a41c4791536..d96d05b08819 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -132,6 +132,7 @@ struct netns_ipv4 { u8 sysctl_tcp_syncookies; u8 sysctl_tcp_migrate_req; u8 sysctl_tcp_comp_sack_nr; + u8 sysctl_tcp_backlog_ack_defer; int sysctl_tcp_reordering; u8 sysctl_tcp_retries1; u8 sysctl_tcp_retries2; diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 6ac890b4073f..e7f024d93572 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -1366,6 +1366,15 @@ static struct ctl_table ipv4_net_table[] = { .proc_handler = proc_dou8vec_minmax, .extra1 = SYSCTL_ZERO, }, + { + .procname = "tcp_backlog_ack_defer", + .data = &init_net.ipv4.sysctl_tcp_backlog_ack_defer, + .maxlen = sizeof(u8), + .mode = 0644, + .proc_handler = proc_dou8vec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }, { .procname = "tcp_reflect_tos", .data = &init_net.ipv4.sysctl_tcp_reflect_tos, diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 06fe1cf645d5..41b471748437 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -5553,6 +5553,14 @@ static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible) tcp_in_quickack_mode(sk) || /* Protocol state mandates a one-time immediate ACK */ inet_csk(sk)->icsk_ack.pending & ICSK_ACK_NOW) { + /* If we are running from __release_sock() in user context, + * Defer the ack until tcp_release_cb(). + */ + if (sock_owned_by_user_nocheck(sk) && + READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_backlog_ack_defer)) { + set_bit(TCP_ACK_DEFERRED, &sk->sk_tsq_flags); + return; + } send_now: tcp_send_ack(sk); return; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 27140e5cdc06..f13eb7e23d03 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -3263,6 +3263,7 @@ static int __net_init tcp_sk_init(struct net *net) net->ipv4.sysctl_tcp_comp_sack_delay_ns = NSEC_PER_MSEC; net->ipv4.sysctl_tcp_comp_sack_slack_ns = 100 * NSEC_PER_USEC; net->ipv4.sysctl_tcp_comp_sack_nr = 44; + net->ipv4.sysctl_tcp_backlog_ack_defer = 1; net->ipv4.sysctl_tcp_fastopen = TFO_CLIENT_ENABLE; net->ipv4.sysctl_tcp_fastopen_blackhole_timeout = 0; atomic_set(&net->ipv4.tfo_active_disable_times, 0); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index b4cac12d0e63..1fc1f879cfd6 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1077,7 +1077,8 @@ static void tcp_tasklet_func(struct tasklet_struct *t) #define TCP_DEFERRED_ALL (TCPF_TSQ_DEFERRED | \ TCPF_WRITE_TIMER_DEFERRED | \ TCPF_DELACK_TIMER_DEFERRED | \ - TCPF_MTU_REDUCED_DEFERRED) + TCPF_MTU_REDUCED_DEFERRED | \ + TCPF_ACK_DEFERRED) /** * tcp_release_cb - tcp release_sock() callback * @sk: socket @@ -1114,6 +1115,8 @@ void tcp_release_cb(struct sock *sk) inet_csk(sk)->icsk_af_ops->mtu_reduced(sk); __sock_put(sk); } + if ((flags & TCPF_ACK_DEFERRED) && inet_csk_ack_scheduled(sk)) + tcp_send_ack(sk); } EXPORT_SYMBOL(tcp_release_cb); -- cgit v1.2.3 From 2b5dcb31a19a2e0acd869b12c9db9b2d696ef544 Mon Sep 17 00:00:00 2001 From: Leon Hwang Date: Tue, 12 Sep 2023 23:04:41 +0800 Subject: bpf, x64: Fix tailcall infinite loop From commit ebf7d1f508a73871 ("bpf, x64: rework pro/epilogue and tailcall handling in JIT"), the tailcall on x64 works better than before. From commit e411901c0b775a3a ("bpf: allow for tailcalls in BPF subprograms for x64 JIT"), tailcall is able to run in BPF subprograms on x64. From commit 5b92a28aae4dd0f8 ("bpf: Support attaching tracing BPF program to other BPF programs"), BPF program is able to trace other BPF programs. How about combining them all together? 1. FENTRY/FEXIT on a BPF subprogram. 2. A tailcall runs in the BPF subprogram. 3. The tailcall calls the subprogram's caller. As a result, a tailcall infinite loop comes up. And the loop would halt the machine. As we know, in tail call context, the tail_call_cnt propagates by stack and rax register between BPF subprograms. So do in trampolines. Fixes: ebf7d1f508a7 ("bpf, x64: rework pro/epilogue and tailcall handling in JIT") Fixes: e411901c0b77 ("bpf: allow for tailcalls in BPF subprograms for x64 JIT") Reviewed-by: Maciej Fijalkowski Signed-off-by: Leon Hwang Link: https://lore.kernel.org/r/20230912150442.2009-3-hffilwlqm@gmail.com Signed-off-by: Alexei Starovoitov --- arch/x86/net/bpf_jit_comp.c | 28 ++++++++++++++++++++++------ include/linux/bpf.h | 5 +++++ kernel/bpf/trampoline.c | 4 ++-- kernel/bpf/verifier.c | 3 +++ 4 files changed, 32 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index bcca1c9b9a02..2846c21d75bf 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -1022,6 +1022,10 @@ static void emit_shiftx(u8 **pprog, u32 dst_reg, u8 src_reg, bool is64, u8 op) #define INSN_SZ_DIFF (((addrs[i] - addrs[i - 1]) - (prog - temp))) +/* mov rax, qword ptr [rbp - rounded_stack_depth - 8] */ +#define RESTORE_TAIL_CALL_CNT(stack) \ + EMIT3_off32(0x48, 0x8B, 0x85, -round_up(stack, 8) - 8) + static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image, int oldproglen, struct jit_context *ctx, bool jmp_padding) { @@ -1627,9 +1631,7 @@ st: if (is_imm8(insn->off)) func = (u8 *) __bpf_call_base + imm32; if (tail_call_reachable) { - /* mov rax, qword ptr [rbp - rounded_stack_depth - 8] */ - EMIT3_off32(0x48, 0x8B, 0x85, - -round_up(bpf_prog->aux->stack_depth, 8) - 8); + RESTORE_TAIL_CALL_CNT(bpf_prog->aux->stack_depth); if (!imm32) return -EINVAL; offs = 7 + x86_call_depth_emit_accounting(&prog, func); @@ -2404,6 +2406,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i * [ ... ] * [ stack_arg2 ] * RBP - arg_stack_off [ stack_arg1 ] + * RSP [ tail_call_cnt ] BPF_TRAMP_F_TAIL_CALL_CTX */ /* room for return value of orig_call or fentry prog */ @@ -2468,6 +2471,8 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i else /* sub rsp, stack_size */ EMIT4(0x48, 0x83, 0xEC, stack_size); + if (flags & BPF_TRAMP_F_TAIL_CALL_CTX) + EMIT1(0x50); /* push rax */ /* mov QWORD PTR [rbp - rbx_off], rbx */ emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_6, -rbx_off); @@ -2520,9 +2525,15 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i restore_regs(m, &prog, regs_off); save_args(m, &prog, arg_stack_off, true); + if (flags & BPF_TRAMP_F_TAIL_CALL_CTX) + /* Before calling the original function, restore the + * tail_call_cnt from stack to rax. + */ + RESTORE_TAIL_CALL_CNT(stack_size); + if (flags & BPF_TRAMP_F_ORIG_STACK) { - emit_ldx(&prog, BPF_DW, BPF_REG_0, BPF_REG_FP, 8); - EMIT2(0xff, 0xd0); /* call *rax */ + emit_ldx(&prog, BPF_DW, BPF_REG_6, BPF_REG_FP, 8); + EMIT2(0xff, 0xd3); /* call *rbx */ } else { /* call original function */ if (emit_rsb_call(&prog, orig_call, prog)) { @@ -2573,7 +2584,12 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i ret = -EINVAL; goto cleanup; } - } + } else if (flags & BPF_TRAMP_F_TAIL_CALL_CTX) + /* Before running the original function, restore the + * tail_call_cnt from stack to rax. + */ + RESTORE_TAIL_CALL_CNT(stack_size); + /* restore return value of orig_call or fentry prog back into RAX */ if (save_ret) emit_ldx(&prog, BPF_DW, BPF_REG_0, BPF_REG_FP, -8); diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 87eeb3a46a1d..b9e573159432 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1035,6 +1035,11 @@ struct btf_func_model { */ #define BPF_TRAMP_F_SHARE_IPMODIFY BIT(6) +/* Indicate that current trampoline is in a tail call context. Then, it has to + * cache and restore tail_call_cnt to avoid infinite tail call loop. + */ +#define BPF_TRAMP_F_TAIL_CALL_CTX BIT(7) + /* Each call __bpf_prog_enter + call bpf_func + call __bpf_prog_exit is ~50 * bytes on x86. */ diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c index 53ff50cac61e..e97aeda3a86b 100644 --- a/kernel/bpf/trampoline.c +++ b/kernel/bpf/trampoline.c @@ -415,8 +415,8 @@ static int bpf_trampoline_update(struct bpf_trampoline *tr, bool lock_direct_mut goto out; } - /* clear all bits except SHARE_IPMODIFY */ - tr->flags &= BPF_TRAMP_F_SHARE_IPMODIFY; + /* clear all bits except SHARE_IPMODIFY and TAIL_CALL_CTX */ + tr->flags &= (BPF_TRAMP_F_SHARE_IPMODIFY | BPF_TRAMP_F_TAIL_CALL_CTX); if (tlinks[BPF_TRAMP_FEXIT].nr_links || tlinks[BPF_TRAMP_MODIFY_RETURN].nr_links) { diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index dbba2b806017..18e673c0ac15 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -19774,6 +19774,9 @@ static int check_attach_btf_id(struct bpf_verifier_env *env) if (!tr) return -ENOMEM; + if (tgt_prog && tgt_prog->aux->tail_call_reachable) + tr->flags = BPF_TRAMP_F_TAIL_CALL_CTX; + prog->aux->dst_trampoline = tr; return 0; } -- cgit v1.2.3 From 8f012db27c9516be1a7aca93ea4a6ca9c75056c9 Mon Sep 17 00:00:00 2001 From: Alison Schofield Date: Mon, 10 Jul 2023 13:02:58 -0700 Subject: x86/numa: Introduce numa_fill_memblks() numa_fill_memblks() fills in the gaps in numa_meminfo memblks over an physical address range. The ACPI driver will use numa_fill_memblks() to implement a new Linux policy that prescribes extending proximity domains in a portion of a CFMWS window to the entire window. Dan Williams offered this explanation of the policy: A CFWMS is an ACPI data structure that indicates *potential* locations where CXL memory can be placed. It is the playground where the CXL driver has free reign to establish regions. That space can be populated by BIOS created regions, or driver created regions, after hotplug or other reconfiguration. When BIOS creates a region in a CXL Window it additionally describes that subset of the Window range in the other typical ACPI tables SRAT, SLIT, and HMAT. The rationale for BIOS not pre-describing the entire CXL Window in SRAT, SLIT, and HMAT is that it can not predict the future. I.e. there is nothing stopping higher or lower performance devices being placed in the same Window. Compare that to ACPI memory hotplug that just onlines additional capacity in the proximity domain with little freedom for dynamic performance differentiation. That leaves the OS with a choice, should unpopulated window capacity match the proximity domain of an existing region, or should it allocate a new one? This patch takes the simple position of minimizing proximity domain proliferation by reusing any proximity domain intersection for the entire Window. If the Window has no intersections then allocate a new proximity domain. Note that SRAT, SLIT and HMAT information can be enumerated dynamically in a standard way from device provided data. Think of CXL as the end of ACPI needing to describe memory attributes, CXL offers a standard discovery model for performance attributes, but Linux still needs to interoperate with the old regime. Reported-by: Derick Marks Suggested-by: Dan Williams Signed-off-by: Alison Schofield Signed-off-by: Dave Hansen Reviewed-by: Dan Williams Tested-by: Derick Marks Link: https://lore.kernel.org/all/ef078a6f056ca974e5af85997013c0fda9e3326d.1689018477.git.alison.schofield%40intel.com --- arch/x86/include/asm/sparsemem.h | 2 + arch/x86/mm/numa.c | 80 ++++++++++++++++++++++++++++++++++++++++ include/linux/numa.h | 7 ++++ 3 files changed, 89 insertions(+) (limited to 'include/linux') diff --git a/arch/x86/include/asm/sparsemem.h b/arch/x86/include/asm/sparsemem.h index 64df897c0ee3..1be13b2dfe8b 100644 --- a/arch/x86/include/asm/sparsemem.h +++ b/arch/x86/include/asm/sparsemem.h @@ -37,6 +37,8 @@ extern int phys_to_target_node(phys_addr_t start); #define phys_to_target_node phys_to_target_node extern int memory_add_physaddr_to_nid(u64 start); #define memory_add_physaddr_to_nid memory_add_physaddr_to_nid +extern int numa_fill_memblks(u64 start, u64 end); +#define numa_fill_memblks numa_fill_memblks #endif #endif /* __ASSEMBLY__ */ diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c index 2aadb2019b4f..c01c5506fd4a 100644 --- a/arch/x86/mm/numa.c +++ b/arch/x86/mm/numa.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include @@ -961,4 +962,83 @@ int memory_add_physaddr_to_nid(u64 start) return nid; } EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid); + +static int __init cmp_memblk(const void *a, const void *b) +{ + const struct numa_memblk *ma = *(const struct numa_memblk **)a; + const struct numa_memblk *mb = *(const struct numa_memblk **)b; + + return ma->start - mb->start; +} + +static struct numa_memblk *numa_memblk_list[NR_NODE_MEMBLKS] __initdata; + +/** + * numa_fill_memblks - Fill gaps in numa_meminfo memblks + * @start: address to begin fill + * @end: address to end fill + * + * Find and extend numa_meminfo memblks to cover the @start-@end + * physical address range, such that the first memblk includes + * @start, the last memblk includes @end, and any gaps in between + * are filled. + * + * RETURNS: + * 0 : Success + * NUMA_NO_MEMBLK : No memblk exists in @start-@end range + */ + +int __init numa_fill_memblks(u64 start, u64 end) +{ + struct numa_memblk **blk = &numa_memblk_list[0]; + struct numa_meminfo *mi = &numa_meminfo; + int count = 0; + u64 prev_end; + + /* + * Create a list of pointers to numa_meminfo memblks that + * overlap start, end. Exclude (start == bi->end) since + * end addresses in both a CFMWS range and a memblk range + * are exclusive. + * + * This list of pointers is used to make in-place changes + * that fill out the numa_meminfo memblks. + */ + for (int i = 0; i < mi->nr_blks; i++) { + struct numa_memblk *bi = &mi->blk[i]; + + if (start < bi->end && end >= bi->start) { + blk[count] = &mi->blk[i]; + count++; + } + } + if (!count) + return NUMA_NO_MEMBLK; + + /* Sort the list of pointers in memblk->start order */ + sort(&blk[0], count, sizeof(blk[0]), cmp_memblk, NULL); + + /* Make sure the first/last memblks include start/end */ + blk[0]->start = min(blk[0]->start, start); + blk[count - 1]->end = max(blk[count - 1]->end, end); + + /* + * Fill any gaps by tracking the previous memblks + * end address and backfilling to it if needed. + */ + prev_end = blk[0]->end; + for (int i = 1; i < count; i++) { + struct numa_memblk *curr = blk[i]; + + if (prev_end >= curr->start) { + if (prev_end < curr->end) + prev_end = curr->end; + } else { + curr->start = prev_end; + prev_end = curr->end; + } + } + return 0; +} + #endif diff --git a/include/linux/numa.h b/include/linux/numa.h index 59df211d051f..0f512c0aba54 100644 --- a/include/linux/numa.h +++ b/include/linux/numa.h @@ -12,6 +12,7 @@ #define MAX_NUMNODES (1 << NODES_SHIFT) #define NUMA_NO_NODE (-1) +#define NUMA_NO_MEMBLK (-1) /* optionally keep NUMA memory info available post init */ #ifdef CONFIG_NUMA_KEEP_MEMINFO @@ -43,6 +44,12 @@ static inline int phys_to_target_node(u64 start) return 0; } #endif +#ifndef numa_fill_memblks +static inline int __init numa_fill_memblks(u64 start, u64 end) +{ + return NUMA_NO_MEMBLK; +} +#endif #else /* !CONFIG_NUMA */ static inline int numa_map_to_online_node(int node) { -- cgit v1.2.3 From 5eb1e6e459cfa025f79c43014f66ff62a55542f1 Mon Sep 17 00:00:00 2001 From: Uwe Kleine-König Date: Tue, 5 Sep 2023 21:42:53 +0200 Subject: i2c: Drop legacy callback .probe_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now that all drivers are converted to the (new) .probe() callback, the temporary .probe_new() can go away. \o/ Link: https://lore.kernel.org/linux-i2c/20230626094548.559542-1-u.kleine-koenig@pengutronix.de Reviewed-by: Javier Martinez Canillas Reviewed-by: Jean Delvare Signed-off-by: Uwe Kleine-König Signed-off-by: Wolfram Sang --- include/linux/i2c.h | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/i2c.h b/include/linux/i2c.h index 3430cc2b05a6..0dae9db27538 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -237,7 +237,6 @@ enum i2c_driver_flags { * struct i2c_driver - represent an I2C device driver * @class: What kind of i2c device we instantiate (for detect) * @probe: Callback for device binding - * @probe_new: Transitional callback for device binding - do not use * @remove: Callback for device unbinding * @shutdown: Callback for device shutdown * @alert: Alert callback, for example for the SMBus alert protocol @@ -272,16 +271,8 @@ enum i2c_driver_flags { struct i2c_driver { unsigned int class; - union { /* Standard driver model interfaces */ - int (*probe)(struct i2c_client *client); - /* - * Legacy callback that was part of a conversion of .probe(). - * Today it has the same semantic as .probe(). Don't use for new - * code. - */ - int (*probe_new)(struct i2c_client *client); - }; + int (*probe)(struct i2c_client *client); void (*remove)(struct i2c_client *client); -- cgit v1.2.3 From bbaa6ffa5b6c9609d3b3c431c389b407eea5441f Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Wed, 13 Sep 2023 11:32:33 +0800 Subject: power: supply: core: Use blocking_notifier_call_chain to avoid RCU complaint AMD PMF driver can cause the following warning: [ 196.159546] ------------[ cut here ]------------ [ 196.159556] Voluntary context switch within RCU read-side critical section! [ 196.159571] WARNING: CPU: 0 PID: 9 at kernel/rcu/tree_plugin.h:320 rcu_note_context_switch+0x43d/0x560 [ 196.159604] Modules linked in: nvme_fabrics ccm rfcomm snd_hda_scodec_cs35l41_spi cmac algif_hash algif_skcipher af_alg bnep joydev btusb btrtl uvcvideo btintel btbcm videobuf2_vmalloc intel_rapl_msr btmtk videobuf2_memops uvc videobuf2_v4l2 intel_rapl_common binfmt_misc hid_sensor_als snd_sof_amd_vangogh hid_sensor_trigger bluetooth industrialio_triggered_buffer videodev snd_sof_amd_rembrandt hid_sensor_iio_common amdgpu ecdh_generic kfifo_buf videobuf2_common hp_wmi kvm_amd sparse_keymap snd_sof_amd_renoir wmi_bmof industrialio ecc mc nls_iso8859_1 kvm snd_sof_amd_acp irqbypass snd_sof_xtensa_dsp crct10dif_pclmul crc32_pclmul mt7921e snd_sof_pci snd_ctl_led polyval_clmulni mt7921_common polyval_generic snd_sof ghash_clmulni_intel mt792x_lib mt76_connac_lib sha512_ssse3 snd_sof_utils aesni_intel snd_hda_codec_realtek crypto_simd mt76 snd_hda_codec_generic cryptd snd_soc_core snd_hda_codec_hdmi rapl ledtrig_audio input_leds snd_compress i2c_algo_bit drm_ttm_helper mac80211 snd_pci_ps hid_multitouch ttm drm_exec [ 196.159970] drm_suballoc_helper snd_rpl_pci_acp6x amdxcp drm_buddy snd_hda_intel snd_acp_pci snd_hda_scodec_cs35l41_i2c serio_raw gpu_sched snd_hda_scodec_cs35l41 snd_acp_legacy_common snd_intel_dspcfg snd_hda_cs_dsp_ctls snd_hda_codec libarc4 drm_display_helper snd_pci_acp6x cs_dsp snd_hwdep snd_soc_cs35l41_lib video k10temp snd_pci_acp5x thunderbolt snd_hda_core drm_kms_helper cfg80211 snd_seq snd_rn_pci_acp3x snd_pcm snd_acp_config cec snd_soc_acpi snd_seq_device rc_core ccp snd_pci_acp3x snd_timer snd soundcore wmi amd_pmf platform_profile amd_pmc mac_hid serial_multi_instantiate wireless_hotkey hid_sensor_hub sch_fq_codel msr parport_pc ppdev lp parport efi_pstore ip_tables x_tables autofs4 btrfs blake2b_generic raid10 raid456 async_raid6_recov async_memcpy async_pq async_xor async_tx libcrc32c xor raid6_pq raid1 raid0 multipath linear dm_mirror dm_region_hash dm_log cdc_ether usbnet r8152 mii hid_generic nvme i2c_hid_acpi i2c_hid nvme_core i2c_piix4 xhci_pci amd_sfh drm xhci_pci_renesas nvme_common hid [ 196.160382] CPU: 0 PID: 9 Comm: kworker/0:1 Not tainted 6.6.0-rc1 #4 [ 196.160397] Hardware name: HP HP EliteBook 845 14 inch G10 Notebook PC/8B6E, BIOS V82 Ver. 01.02.00 08/24/2023 [ 196.160405] Workqueue: events power_supply_changed_work [ 196.160426] RIP: 0010:rcu_note_context_switch+0x43d/0x560 [ 196.160440] Code: 00 48 89 be 40 08 00 00 48 89 86 48 08 00 00 48 89 10 e9 63 fe ff ff 48 c7 c7 10 e7 b0 9e c6 05 e8 d8 20 02 01 e8 13 0f f3 ff <0f> 0b e9 27 fc ff ff a9 ff ff ff 7f 0f 84 cf fc ff ff 65 48 8b 3c [ 196.160450] RSP: 0018:ffffc900001878f0 EFLAGS: 00010046 [ 196.160462] RAX: 0000000000000000 RBX: ffff88885e834040 RCX: 0000000000000000 [ 196.160470] RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 [ 196.160476] RBP: ffffc90000187910 R08: 0000000000000000 R09: 0000000000000000 [ 196.160482] R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000 [ 196.160488] R13: 0000000000000000 R14: ffff888100990000 R15: ffff888100990000 [ 196.160495] FS: 0000000000000000(0000) GS:ffff88885e800000(0000) knlGS:0000000000000000 [ 196.160504] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 196.160512] CR2: 000055cb053c8246 CR3: 000000013443a000 CR4: 0000000000750ef0 [ 196.160520] PKRU: 55555554 [ 196.160526] Call Trace: [ 196.160532] [ 196.160548] ? show_regs+0x72/0x90 [ 196.160570] ? rcu_note_context_switch+0x43d/0x560 [ 196.160580] ? __warn+0x8d/0x160 [ 196.160600] ? rcu_note_context_switch+0x43d/0x560 [ 196.160613] ? report_bug+0x1bb/0x1d0 [ 196.160637] ? handle_bug+0x46/0x90 [ 196.160658] ? exc_invalid_op+0x19/0x80 [ 196.160675] ? asm_exc_invalid_op+0x1b/0x20 [ 196.160709] ? rcu_note_context_switch+0x43d/0x560 [ 196.160727] __schedule+0xb9/0x15f0 [ 196.160746] ? srso_alias_return_thunk+0x5/0x7f [ 196.160765] ? srso_alias_return_thunk+0x5/0x7f [ 196.160778] ? acpi_ns_search_one_scope+0xbe/0x270 [ 196.160806] schedule+0x68/0x110 [ 196.160820] schedule_timeout+0x151/0x160 [ 196.160829] ? srso_alias_return_thunk+0x5/0x7f [ 196.160842] ? srso_alias_return_thunk+0x5/0x7f [ 196.160855] ? acpi_ns_lookup+0x3c5/0xa90 [ 196.160878] __down_common+0xff/0x220 [ 196.160905] __down_timeout+0x16/0x30 [ 196.160920] down_timeout+0x64/0x70 [ 196.160938] acpi_os_wait_semaphore+0x85/0x200 [ 196.160959] acpi_ut_acquire_mutex+0x9e/0x280 [ 196.160979] acpi_ex_enter_interpreter+0x2d/0xb0 [ 196.160992] acpi_ns_evaluate+0x2f0/0x5f0 [ 196.161005] acpi_evaluate_object+0x172/0x490 [ 196.161018] ? acpi_os_signal_semaphore+0x8a/0xd0 [ 196.161038] acpi_evaluate_integer+0x52/0xe0 [ 196.161055] ? kfree+0x79/0x120 [ 196.161071] ? srso_alias_return_thunk+0x5/0x7f [ 196.161089] acpi_ac_get_state.part.0+0x27/0x80 [ 196.161110] get_ac_property+0x5c/0x70 [ 196.161127] ? __pfx___power_supply_is_system_supplied+0x10/0x10 [ 196.161146] __power_supply_is_system_supplied+0x44/0xb0 [ 196.161166] class_for_each_device+0x124/0x160 [ 196.161184] ? acpi_ac_get_state.part.0+0x27/0x80 [ 196.161203] ? srso_alias_return_thunk+0x5/0x7f [ 196.161223] power_supply_is_system_supplied+0x3c/0x70 [ 196.161243] amd_pmf_get_power_source+0xe/0x20 [amd_pmf] [ 196.161276] amd_pmf_power_slider_update_event+0x49/0x90 [amd_pmf] [ 196.161310] amd_pmf_pwr_src_notify_call+0xe7/0x100 [amd_pmf] [ 196.161340] notifier_call_chain+0x5f/0xe0 [ 196.161362] atomic_notifier_call_chain+0x33/0x60 [ 196.161378] power_supply_changed_work+0x84/0x110 [ 196.161394] process_one_work+0x178/0x360 [ 196.161412] ? __pfx_worker_thread+0x10/0x10 [ 196.161424] worker_thread+0x307/0x430 [ 196.161440] ? __pfx_worker_thread+0x10/0x10 [ 196.161451] kthread+0xf4/0x130 [ 196.161467] ? __pfx_kthread+0x10/0x10 [ 196.161486] ret_from_fork+0x43/0x70 [ 196.161502] ? __pfx_kthread+0x10/0x10 [ 196.161518] ret_from_fork_asm+0x1b/0x30 [ 196.161558] [ 196.161562] ---[ end trace 0000000000000000 ]--- Since there's no guarantee that all the callbacks can work in atomic context, switch to use blocking_notifier_call_chain to relax the constraint. Signed-off-by: Kai-Heng Feng Reported-by: Allen Zhong Fixes: 4c71ae414474 ("platform/x86/amd/pmf: Add support SPS PMF feature") Closes: https://bugzilla.kernel.org/show_bug.cgi?id=217571 Reviewed-by: Mario Limonciello Link: https://lore.kernel.org/r/20230913033233.602986-1-kai.heng.feng@canonical.com Signed-off-by: Sebastian Reichel --- drivers/power/supply/power_supply_core.c | 8 ++++---- include/linux/power_supply.h | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/drivers/power/supply/power_supply_core.c b/drivers/power/supply/power_supply_core.c index 0b69fb7bafd8..416409e2fd6d 100644 --- a/drivers/power/supply/power_supply_core.c +++ b/drivers/power/supply/power_supply_core.c @@ -29,7 +29,7 @@ struct class *power_supply_class; EXPORT_SYMBOL_GPL(power_supply_class); -ATOMIC_NOTIFIER_HEAD(power_supply_notifier); +BLOCKING_NOTIFIER_HEAD(power_supply_notifier); EXPORT_SYMBOL_GPL(power_supply_notifier); static struct device_type power_supply_dev_type; @@ -97,7 +97,7 @@ static void power_supply_changed_work(struct work_struct *work) class_for_each_device(power_supply_class, NULL, psy, __power_supply_changed_work); power_supply_update_leds(psy); - atomic_notifier_call_chain(&power_supply_notifier, + blocking_notifier_call_chain(&power_supply_notifier, PSY_EVENT_PROP_CHANGED, psy); kobject_uevent(&psy->dev.kobj, KOBJ_CHANGE); spin_lock_irqsave(&psy->changed_lock, flags); @@ -1262,13 +1262,13 @@ static void power_supply_dev_release(struct device *dev) int power_supply_reg_notifier(struct notifier_block *nb) { - return atomic_notifier_chain_register(&power_supply_notifier, nb); + return blocking_notifier_chain_register(&power_supply_notifier, nb); } EXPORT_SYMBOL_GPL(power_supply_reg_notifier); void power_supply_unreg_notifier(struct notifier_block *nb) { - atomic_notifier_chain_unregister(&power_supply_notifier, nb); + blocking_notifier_chain_unregister(&power_supply_notifier, nb); } EXPORT_SYMBOL_GPL(power_supply_unreg_notifier); diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h index a427f13c757f..85b86768c0b9 100644 --- a/include/linux/power_supply.h +++ b/include/linux/power_supply.h @@ -767,7 +767,7 @@ struct power_supply_battery_info { int bti_resistance_tolerance; }; -extern struct atomic_notifier_head power_supply_notifier; +extern struct blocking_notifier_head power_supply_notifier; extern int power_supply_reg_notifier(struct notifier_block *nb); extern void power_supply_unreg_notifier(struct notifier_block *nb); #if IS_ENABLED(CONFIG_POWER_SUPPLY) -- cgit v1.2.3 From d090ec0df81e56556af3a2bf04a7e89347ae5784 Mon Sep 17 00:00:00 2001 From: Leonardo Bras Date: Thu, 31 Aug 2023 03:31:28 -0300 Subject: smp: Change function signatures to use call_single_data_t call_single_data_t is a size-aligned typedef of struct __call_single_data. This alignment is desirable in order to have smp_call_function*() avoid bouncing an extra cacheline in case of an unaligned csd, given this would hurt performance. Since the removal of struct request->csd in commit 660e802c76c8 ("blk-mq: use percpu csd to remote complete instead of per-rq csd") there are no current users of smp_call_function*() with unaligned csd. Change every 'struct __call_single_data' function parameter to 'call_single_data_t', so we have warnings if any new code tries to introduce an smp_call_function*() call with unaligned csd. Signed-off-by: Leonardo Bras Reviewed-by: Guo Ren Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20230831063129.335425-1-leobras@redhat.com --- include/linux/smp.h | 2 +- include/trace/events/csd.h | 8 ++++---- kernel/smp.c | 26 +++++++++++++------------- kernel/up.c | 2 +- 4 files changed, 19 insertions(+), 19 deletions(-) (limited to 'include/linux') diff --git a/include/linux/smp.h b/include/linux/smp.h index 91ea4a67f8ca..e87520dc2959 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h @@ -53,7 +53,7 @@ int smp_call_function_single(int cpuid, smp_call_func_t func, void *info, void on_each_cpu_cond_mask(smp_cond_func_t cond_func, smp_call_func_t func, void *info, bool wait, const struct cpumask *mask); -int smp_call_function_single_async(int cpu, struct __call_single_data *csd); +int smp_call_function_single_async(int cpu, call_single_data_t *csd); /* * Cpus stopping functions in panic. All have default weak definitions. diff --git a/include/trace/events/csd.h b/include/trace/events/csd.h index 67e9d01f80c2..58cc83b99c34 100644 --- a/include/trace/events/csd.h +++ b/include/trace/events/csd.h @@ -12,7 +12,7 @@ TRACE_EVENT(csd_queue_cpu, TP_PROTO(const unsigned int cpu, unsigned long callsite, smp_call_func_t func, - struct __call_single_data *csd), + call_single_data_t *csd), TP_ARGS(cpu, callsite, func, csd), @@ -39,7 +39,7 @@ TRACE_EVENT(csd_queue_cpu, */ DECLARE_EVENT_CLASS(csd_function, - TP_PROTO(smp_call_func_t func, struct __call_single_data *csd), + TP_PROTO(smp_call_func_t func, call_single_data_t *csd), TP_ARGS(func, csd), @@ -57,12 +57,12 @@ DECLARE_EVENT_CLASS(csd_function, ); DEFINE_EVENT(csd_function, csd_function_entry, - TP_PROTO(smp_call_func_t func, struct __call_single_data *csd), + TP_PROTO(smp_call_func_t func, call_single_data_t *csd), TP_ARGS(func, csd) ); DEFINE_EVENT(csd_function, csd_function_exit, - TP_PROTO(smp_call_func_t func, struct __call_single_data *csd), + TP_PROTO(smp_call_func_t func, call_single_data_t *csd), TP_ARGS(func, csd) ); diff --git a/kernel/smp.c b/kernel/smp.c index 385179dae360..822fabb7e3e1 100644 --- a/kernel/smp.c +++ b/kernel/smp.c @@ -125,7 +125,7 @@ send_call_function_ipi_mask(struct cpumask *mask) } static __always_inline void -csd_do_func(smp_call_func_t func, void *info, struct __call_single_data *csd) +csd_do_func(smp_call_func_t func, void *info, call_single_data_t *csd) { trace_csd_function_entry(func, csd); func(info); @@ -172,7 +172,7 @@ module_param(csd_lock_timeout, ulong, 0444); static atomic_t csd_bug_count = ATOMIC_INIT(0); /* Record current CSD work for current CPU, NULL to erase. */ -static void __csd_lock_record(struct __call_single_data *csd) +static void __csd_lock_record(call_single_data_t *csd) { if (!csd) { smp_mb(); /* NULL cur_csd after unlock. */ @@ -187,13 +187,13 @@ static void __csd_lock_record(struct __call_single_data *csd) /* Or before unlock, as the case may be. */ } -static __always_inline void csd_lock_record(struct __call_single_data *csd) +static __always_inline void csd_lock_record(call_single_data_t *csd) { if (static_branch_unlikely(&csdlock_debug_enabled)) __csd_lock_record(csd); } -static int csd_lock_wait_getcpu(struct __call_single_data *csd) +static int csd_lock_wait_getcpu(call_single_data_t *csd) { unsigned int csd_type; @@ -208,7 +208,7 @@ static int csd_lock_wait_getcpu(struct __call_single_data *csd) * the CSD_TYPE_SYNC/ASYNC types provide the destination CPU, * so waiting on other types gets much less information. */ -static bool csd_lock_wait_toolong(struct __call_single_data *csd, u64 ts0, u64 *ts1, int *bug_id) +static bool csd_lock_wait_toolong(call_single_data_t *csd, u64 ts0, u64 *ts1, int *bug_id) { int cpu = -1; int cpux; @@ -272,7 +272,7 @@ static bool csd_lock_wait_toolong(struct __call_single_data *csd, u64 ts0, u64 * * previous function call. For multi-cpu calls its even more interesting * as we'll have to ensure no other cpu is observing our csd. */ -static void __csd_lock_wait(struct __call_single_data *csd) +static void __csd_lock_wait(call_single_data_t *csd) { int bug_id = 0; u64 ts0, ts1; @@ -286,7 +286,7 @@ static void __csd_lock_wait(struct __call_single_data *csd) smp_acquire__after_ctrl_dep(); } -static __always_inline void csd_lock_wait(struct __call_single_data *csd) +static __always_inline void csd_lock_wait(call_single_data_t *csd) { if (static_branch_unlikely(&csdlock_debug_enabled)) { __csd_lock_wait(csd); @@ -296,17 +296,17 @@ static __always_inline void csd_lock_wait(struct __call_single_data *csd) smp_cond_load_acquire(&csd->node.u_flags, !(VAL & CSD_FLAG_LOCK)); } #else -static void csd_lock_record(struct __call_single_data *csd) +static void csd_lock_record(call_single_data_t *csd) { } -static __always_inline void csd_lock_wait(struct __call_single_data *csd) +static __always_inline void csd_lock_wait(call_single_data_t *csd) { smp_cond_load_acquire(&csd->node.u_flags, !(VAL & CSD_FLAG_LOCK)); } #endif -static __always_inline void csd_lock(struct __call_single_data *csd) +static __always_inline void csd_lock(call_single_data_t *csd) { csd_lock_wait(csd); csd->node.u_flags |= CSD_FLAG_LOCK; @@ -319,7 +319,7 @@ static __always_inline void csd_lock(struct __call_single_data *csd) smp_wmb(); } -static __always_inline void csd_unlock(struct __call_single_data *csd) +static __always_inline void csd_unlock(call_single_data_t *csd) { WARN_ON(!(csd->node.u_flags & CSD_FLAG_LOCK)); @@ -372,7 +372,7 @@ void __smp_call_single_queue(int cpu, struct llist_node *node) * for execution on the given CPU. data must already have * ->func, ->info, and ->flags set. */ -static int generic_exec_single(int cpu, struct __call_single_data *csd) +static int generic_exec_single(int cpu, call_single_data_t *csd) { if (cpu == smp_processor_id()) { smp_call_func_t func = csd->func; @@ -658,7 +658,7 @@ EXPORT_SYMBOL(smp_call_function_single); * * Return: %0 on success or negative errno value on error */ -int smp_call_function_single_async(int cpu, struct __call_single_data *csd) +int smp_call_function_single_async(int cpu, call_single_data_t *csd) { int err = 0; diff --git a/kernel/up.c b/kernel/up.c index a38b8b095251..df50828cc2f0 100644 --- a/kernel/up.c +++ b/kernel/up.c @@ -25,7 +25,7 @@ int smp_call_function_single(int cpu, void (*func) (void *info), void *info, } EXPORT_SYMBOL(smp_call_function_single); -int smp_call_function_single_async(int cpu, struct __call_single_data *csd) +int smp_call_function_single_async(int cpu, call_single_data_t *csd) { unsigned long flags; -- cgit v1.2.3 From 85be6d842447067ce76047a14d4258c96fd33b7b Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 15 Aug 2023 12:52:04 +0200 Subject: cleanup: Make no_free_ptr() __must_check recent discussion brought about the realization that it makes sense for no_free_ptr() to have __must_check semantics in order to avoid leaking the resource. Additionally, add a few comments to clarify why/how things work. All credit to Linus on how to combine __must_check and the stmt-expression. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Link: https://lkml.kernel.org/r/20230816103102.GF980931@hirez.programming.kicks-ass.net --- include/linux/cleanup.h | 39 ++++++++++++++++++++++++++++++++++++--- 1 file changed, 36 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cleanup.h b/include/linux/cleanup.h index 53f1a7a932b0..9f1a9c455b68 100644 --- a/include/linux/cleanup.h +++ b/include/linux/cleanup.h @@ -7,8 +7,9 @@ /* * DEFINE_FREE(name, type, free): * simple helper macro that defines the required wrapper for a __free() - * based cleanup function. @free is an expression using '_T' to access - * the variable. + * based cleanup function. @free is an expression using '_T' to access the + * variable. @free should typically include a NULL test before calling a + * function, see the example below. * * __free(name): * variable attribute to add a scoped based cleanup to the variable. @@ -17,6 +18,9 @@ * like a non-atomic xchg(var, NULL), such that the cleanup function will * be inhibited -- provided it sanely deals with a NULL value. * + * NOTE: this has __must_check semantics so that it is harder to accidentally + * leak the resource. + * * return_ptr(p): * returns p while inhibiting the __free(). * @@ -24,6 +28,8 @@ * * DEFINE_FREE(kfree, void *, if (_T) kfree(_T)) * + * void *alloc_obj(...) + * { * struct obj *p __free(kfree) = kmalloc(...); * if (!p) * return NULL; @@ -32,6 +38,24 @@ * return NULL; * * return_ptr(p); + * } + * + * NOTE: the DEFINE_FREE()'s @free expression includes a NULL test even though + * kfree() is fine to be called with a NULL value. This is on purpose. This way + * the compiler sees the end of our alloc_obj() function as: + * + * tmp = p; + * p = NULL; + * if (p) + * kfree(p); + * return tmp; + * + * And through the magic of value-propagation and dead-code-elimination, it + * eliminates the actual cleanup call and compiles into: + * + * return p; + * + * Without the NULL test it turns into a mess and the compiler can't help us. */ #define DEFINE_FREE(_name, _type, _free) \ @@ -39,8 +63,17 @@ #define __free(_name) __cleanup(__free_##_name) +#define __get_and_null_ptr(p) \ + ({ __auto_type __ptr = &(p); \ + __auto_type __val = *__ptr; \ + *__ptr = NULL; __val; }) + +static inline __must_check +const volatile void * __must_check_fn(const volatile void *val) +{ return val; } + #define no_free_ptr(p) \ - ({ __auto_type __ptr = (p); (p) = NULL; __ptr; }) + ((typeof(p)) __must_check_fn(__get_and_null_ptr(p))) #define return_ptr(p) return no_free_ptr(p) -- cgit v1.2.3 From 6fb45460615358157a6d3c990e74f9c1395247e2 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 9 Jun 2023 20:45:16 +0200 Subject: sched: Simplify tg_set_cfs_bandwidth() Use guards to reduce gotos and simplify control flow. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar --- include/linux/cpu.h | 2 ++ kernel/sched/core.c | 38 +++++++++++++++++++------------------- 2 files changed, 21 insertions(+), 19 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 0abd60a7987b..f19f56501809 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -153,6 +153,8 @@ static inline int remove_cpu(unsigned int cpu) { return -EPERM; } static inline void smp_shutdown_nonboot_cpus(unsigned int primary_cpu) { } #endif /* !CONFIG_HOTPLUG_CPU */ +DEFINE_LOCK_GUARD_0(cpus_read_lock, cpus_read_lock(), cpus_read_unlock()) + #ifdef CONFIG_PM_SLEEP_SMP extern int freeze_secondary_cpus(int primary); extern void thaw_secondary_cpus(void); diff --git a/kernel/sched/core.c b/kernel/sched/core.c index a3f4fb8a6841..5d9f36359461 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -10802,11 +10802,12 @@ static int tg_set_cfs_bandwidth(struct task_group *tg, u64 period, u64 quota, * Prevent race between setting of cfs_rq->runtime_enabled and * unthrottle_offline_cfs_rqs(). */ - cpus_read_lock(); - mutex_lock(&cfs_constraints_mutex); + guard(cpus_read_lock)(); + guard(mutex)(&cfs_constraints_mutex); + ret = __cfs_schedulable(tg, period, quota); if (ret) - goto out_unlock; + return ret; runtime_enabled = quota != RUNTIME_INF; runtime_was_enabled = cfs_b->quota != RUNTIME_INF; @@ -10816,39 +10817,38 @@ static int tg_set_cfs_bandwidth(struct task_group *tg, u64 period, u64 quota, */ if (runtime_enabled && !runtime_was_enabled) cfs_bandwidth_usage_inc(); - raw_spin_lock_irq(&cfs_b->lock); - cfs_b->period = ns_to_ktime(period); - cfs_b->quota = quota; - cfs_b->burst = burst; - __refill_cfs_bandwidth_runtime(cfs_b); + scoped_guard (raw_spinlock_irq, &cfs_b->lock) { + cfs_b->period = ns_to_ktime(period); + cfs_b->quota = quota; + cfs_b->burst = burst; - /* Restart the period timer (if active) to handle new period expiry: */ - if (runtime_enabled) - start_cfs_bandwidth(cfs_b); + __refill_cfs_bandwidth_runtime(cfs_b); - raw_spin_unlock_irq(&cfs_b->lock); + /* + * Restart the period timer (if active) to handle new + * period expiry: + */ + if (runtime_enabled) + start_cfs_bandwidth(cfs_b); + } for_each_online_cpu(i) { struct cfs_rq *cfs_rq = tg->cfs_rq[i]; struct rq *rq = cfs_rq->rq; - struct rq_flags rf; - rq_lock_irq(rq, &rf); + guard(rq_lock_irq)(rq); cfs_rq->runtime_enabled = runtime_enabled; cfs_rq->runtime_remaining = 0; if (cfs_rq->throttled) unthrottle_cfs_rq(cfs_rq); - rq_unlock_irq(rq, &rf); } + if (runtime_was_enabled && !runtime_enabled) cfs_bandwidth_usage_dec(); -out_unlock: - mutex_unlock(&cfs_constraints_mutex); - cpus_read_unlock(); - return ret; + return 0; } static int tg_set_cfs_quota(struct task_group *tg, long cfs_quota_us) -- cgit v1.2.3 From 89de9921dfa77e43b985bde99a6031ab66511020 Mon Sep 17 00:00:00 2001 From: Paul M Stillwell Jr Date: Wed, 6 Sep 2023 13:57:01 -0600 Subject: virtchnl: Add CRC stripping capability Some VFs may want to disable CRC stripping on incoming packets so create an offload for that. The VF already sends information about configuring its RX queues so use that structure to indicate that the CRC stripping should be enabled or not. Signed-off-by: Paul M Stillwell Jr Reviewed-by: Jesse Brandeburg Reviewed-by: Paul Menzel Signed-off-by: Ahmed Zaki Signed-off-by: Tony Nguyen --- include/linux/avf/virtchnl.h | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/avf/virtchnl.h b/include/linux/avf/virtchnl.h index d0807ad43f93..dd71d3009771 100644 --- a/include/linux/avf/virtchnl.h +++ b/include/linux/avf/virtchnl.h @@ -240,6 +240,7 @@ VIRTCHNL_CHECK_STRUCT_LEN(16, virtchnl_vsi_resource); #define VIRTCHNL_VF_OFFLOAD_REQ_QUEUES BIT(6) /* used to negotiate communicating link speeds in Mbps */ #define VIRTCHNL_VF_CAP_ADV_LINK_SPEED BIT(7) +#define VIRTCHNL_VF_OFFLOAD_CRC BIT(10) #define VIRTCHNL_VF_OFFLOAD_VLAN_V2 BIT(15) #define VIRTCHNL_VF_OFFLOAD_VLAN BIT(16) #define VIRTCHNL_VF_OFFLOAD_RX_POLLING BIT(17) @@ -295,7 +296,13 @@ VIRTCHNL_CHECK_STRUCT_LEN(24, virtchnl_txq_info); /* VIRTCHNL_OP_CONFIG_RX_QUEUE * VF sends this message to set up parameters for one RX queue. * External data buffer contains one instance of virtchnl_rxq_info. - * PF configures requested queue and returns a status code. + * PF configures requested queue and returns a status code. The + * crc_disable flag disables CRC stripping on the VF. Setting + * the crc_disable flag to 1 will disable CRC stripping for each + * queue in the VF where the flag is set. The VIRTCHNL_VF_OFFLOAD_CRC + * offload must have been set prior to sending this info or the PF + * will ignore the request. This flag should be set the same for + * all of the queues for a VF. */ /* Rx queue config info */ @@ -307,7 +314,7 @@ struct virtchnl_rxq_info { u16 splithdr_enabled; /* deprecated with AVF 1.0 */ u32 databuffer_size; u32 max_pkt_size; - u8 pad0; + u8 crc_disable; u8 rxdid; u8 pad1[2]; u64 dma_ring_addr; -- cgit v1.2.3 From b193a78ddb5ee7dba074d3f28dc050069ba083c0 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 4 Sep 2023 12:34:40 -0400 Subject: NFS: Use the correct commit info in nfs_join_page_group() Ensure that nfs_clear_request_commit() updates the correct counters when it removes them from the commit list. Fixes: ed5d588fe47f ("NFS: Try to join page groups before an O_DIRECT retransmission") Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/direct.c | 8 +++++--- fs/nfs/write.c | 23 ++++++++++++----------- include/linux/nfs_page.h | 4 +++- 3 files changed, 20 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index a53e50123499..3391c8b97da5 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -498,7 +498,9 @@ static void nfs_direct_add_page_head(struct list_head *list, kref_get(&head->wb_kref); } -static void nfs_direct_join_group(struct list_head *list, struct inode *inode) +static void nfs_direct_join_group(struct list_head *list, + struct nfs_commit_info *cinfo, + struct inode *inode) { struct nfs_page *req, *subreq; @@ -520,7 +522,7 @@ static void nfs_direct_join_group(struct list_head *list, struct inode *inode) nfs_release_request(subreq); } } while ((subreq = subreq->wb_this_page) != req); - nfs_join_page_group(req, inode); + nfs_join_page_group(req, cinfo, inode); } } @@ -545,7 +547,7 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq) nfs_init_cinfo_from_dreq(&cinfo, dreq); nfs_direct_write_scan_commit_list(dreq->inode, &reqs, &cinfo); - nfs_direct_join_group(&reqs, dreq->inode); + nfs_direct_join_group(&reqs, &cinfo, dreq->inode); nfs_clear_pnfs_ds_commit_verifiers(&dreq->ds_cinfo); get_dreq(dreq); diff --git a/fs/nfs/write.c b/fs/nfs/write.c index f4cca8f00c0c..8c1ee1a1a28f 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -59,7 +59,8 @@ static const struct nfs_pgio_completion_ops nfs_async_write_completion_ops; static const struct nfs_commit_completion_ops nfs_commit_completion_ops; static const struct nfs_rw_ops nfs_rw_write_ops; static void nfs_inode_remove_request(struct nfs_page *req); -static void nfs_clear_request_commit(struct nfs_page *req); +static void nfs_clear_request_commit(struct nfs_commit_info *cinfo, + struct nfs_page *req); static void nfs_init_cinfo_from_inode(struct nfs_commit_info *cinfo, struct inode *inode); static struct nfs_page * @@ -502,8 +503,8 @@ nfs_destroy_unlinked_subrequests(struct nfs_page *destroy_list, * the (former) group. All subrequests are removed from any write or commit * lists, unlinked from the group and destroyed. */ -void -nfs_join_page_group(struct nfs_page *head, struct inode *inode) +void nfs_join_page_group(struct nfs_page *head, struct nfs_commit_info *cinfo, + struct inode *inode) { struct nfs_page *subreq; struct nfs_page *destroy_list = NULL; @@ -533,7 +534,7 @@ nfs_join_page_group(struct nfs_page *head, struct inode *inode) * Commit list removal accounting is done after locks are dropped */ subreq = head; do { - nfs_clear_request_commit(subreq); + nfs_clear_request_commit(cinfo, subreq); subreq = subreq->wb_this_page; } while (subreq != head); @@ -566,8 +567,10 @@ static struct nfs_page *nfs_lock_and_join_requests(struct folio *folio) { struct inode *inode = folio_file_mapping(folio)->host; struct nfs_page *head; + struct nfs_commit_info cinfo; int ret; + nfs_init_cinfo_from_inode(&cinfo, inode); /* * A reference is taken only on the head request which acts as a * reference to the whole page group - the group will not be destroyed @@ -584,7 +587,7 @@ static struct nfs_page *nfs_lock_and_join_requests(struct folio *folio) return ERR_PTR(ret); } - nfs_join_page_group(head, inode); + nfs_join_page_group(head, &cinfo, inode); return head; } @@ -955,18 +958,16 @@ static void nfs_folio_clear_commit(struct folio *folio) } /* Called holding the request lock on @req */ -static void -nfs_clear_request_commit(struct nfs_page *req) +static void nfs_clear_request_commit(struct nfs_commit_info *cinfo, + struct nfs_page *req) { if (test_bit(PG_CLEAN, &req->wb_flags)) { struct nfs_open_context *ctx = nfs_req_openctx(req); struct inode *inode = d_inode(ctx->dentry); - struct nfs_commit_info cinfo; - nfs_init_cinfo_from_inode(&cinfo, inode); mutex_lock(&NFS_I(inode)->commit_mutex); - if (!pnfs_clear_request_commit(req, &cinfo)) { - nfs_request_remove_commit_list(req, &cinfo); + if (!pnfs_clear_request_commit(req, cinfo)) { + nfs_request_remove_commit_list(req, cinfo); } mutex_unlock(&NFS_I(inode)->commit_mutex); nfs_folio_clear_commit(nfs_page_to_folio(req)); diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index aa9f4c6ebe26..1c315f854ea8 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -157,7 +157,9 @@ extern void nfs_unlock_request(struct nfs_page *req); extern void nfs_unlock_and_release_request(struct nfs_page *); extern struct nfs_page *nfs_page_group_lock_head(struct nfs_page *req); extern int nfs_page_group_lock_subrequests(struct nfs_page *head); -extern void nfs_join_page_group(struct nfs_page *head, struct inode *inode); +extern void nfs_join_page_group(struct nfs_page *head, + struct nfs_commit_info *cinfo, + struct inode *inode); extern int nfs_page_group_lock(struct nfs_page *); extern void nfs_page_group_unlock(struct nfs_page *); extern bool nfs_page_group_sync_on_bit(struct nfs_page *, unsigned int); -- cgit v1.2.3 From 806a3bc421a115fbb287c1efce63a48c54ee804b Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Wed, 30 Aug 2023 15:29:34 -0400 Subject: NFSv4.1: fix pnfs MDS=DS session trunking Currently, when GETDEVICEINFO returns multiple locations where each is a different IP but the server's identity is same as MDS, then nfs4_set_ds_client() finds the existing nfs_client structure which has the MDS's max_connect value (and if it's 1), then the 1st IP on the DS's list will get dropped due to MDS trunking rules. Other IPs would be added as they fall under the pnfs trunking rules. For the list of IPs the 1st goes thru calling nfs4_set_ds_client() which will eventually call nfs4_add_trunk() and call into rpc_clnt_test_and_add_xprt() which has the check for MDS trunking. The other IPs (after the 1st one), would call rpc_clnt_add_xprt() which doesn't go thru that check. nfs4_add_trunk() is called when MDS trunking is happening and it needs to enforce the usage of max_connect mount option of the 1st mount. However, this shouldn't be applied to pnfs flow. Instead, this patch proposed to treat MDS=DS as DS trunking and make sure that MDS's max_connect limit does not apply to the 1st IP returned in the GETDEVICEINFO list. It does so by marking the newly created client with a new flag NFS_CS_PNFS which then used to pass max_connect value to use into the rpc_clnt_test_and_add_xprt() instead of the existing rpc client's max_connect value set by the MDS connection. For example, mount was done without max_connect value set so MDS's rpc client has cl_max_connect=1. Upon calling into rpc_clnt_test_and_add_xprt() and using rpc client's value, the caller passes in max_connect value which is previously been set in the pnfs path (as a part of handling GETDEVICEINFO list of IPs) in nfs4_set_ds_client(). However, when NFS_CS_PNFS flag is not set and we know we are doing MDS trunking, comparing a new IP of the same server, we then set the max_connect value to the existing MDS's value and pass that into rpc_clnt_test_and_add_xprt(). Fixes: dc48e0abee24 ("SUNRPC enforce creation of no more than max_connect xprts") Signed-off-by: Olga Kornievskaia Signed-off-by: Anna Schumaker --- fs/nfs/nfs4client.c | 6 +++++- include/linux/nfs_fs_sb.h | 1 + net/sunrpc/clnt.c | 11 +++++++---- 3 files changed, 13 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index 27fb25567ce7..11e3a285594c 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -417,6 +417,8 @@ static void nfs4_add_trunk(struct nfs_client *clp, struct nfs_client *old) .net = old->cl_net, .servername = old->cl_hostname, }; + int max_connect = test_bit(NFS_CS_PNFS, &clp->cl_flags) ? + clp->cl_max_connect : old->cl_max_connect; if (clp->cl_proto != old->cl_proto) return; @@ -430,7 +432,7 @@ static void nfs4_add_trunk(struct nfs_client *clp, struct nfs_client *old) xprt_args.addrlen = clp_salen; rpc_clnt_add_xprt(old->cl_rpcclient, &xprt_args, - rpc_clnt_test_and_add_xprt, NULL); + rpc_clnt_test_and_add_xprt, &max_connect); } /** @@ -1010,6 +1012,8 @@ struct nfs_client *nfs4_set_ds_client(struct nfs_server *mds_srv, __set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags); __set_bit(NFS_CS_DS, &cl_init.init_flags); + __set_bit(NFS_CS_PNFS, &cl_init.init_flags); + cl_init.max_connect = NFS_MAX_TRANSPORTS; /* * Set an authflavor equual to the MDS value. Use the MDS nfs_client * cl_ipaddr so as to use the same EXCHANGE_ID co_ownerid as the MDS diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 20eeba8b009d..cd628c4b011e 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -48,6 +48,7 @@ struct nfs_client { #define NFS_CS_NOPING 6 /* - don't ping on connect */ #define NFS_CS_DS 7 /* - Server is a DS */ #define NFS_CS_REUSEPORT 8 /* - reuse src port on reconnect */ +#define NFS_CS_PNFS 9 /* - Server used for pnfs */ struct sockaddr_storage cl_addr; /* server identifier */ size_t cl_addrlen; char * cl_hostname; /* hostname of server */ diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index edbcfdd84e1f..37b0b212b934 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -2908,19 +2908,22 @@ static const struct rpc_call_ops rpc_cb_add_xprt_call_ops = { * @clnt: pointer to struct rpc_clnt * @xps: pointer to struct rpc_xprt_switch, * @xprt: pointer struct rpc_xprt - * @dummy: unused + * @in_max_connect: pointer to the max_connect value for the passed in xprt transport */ int rpc_clnt_test_and_add_xprt(struct rpc_clnt *clnt, struct rpc_xprt_switch *xps, struct rpc_xprt *xprt, - void *dummy) + void *in_max_connect) { struct rpc_cb_add_xprt_calldata *data; struct rpc_task *task; + int max_connect = clnt->cl_max_connect; - if (xps->xps_nunique_destaddr_xprts + 1 > clnt->cl_max_connect) { + if (in_max_connect) + max_connect = *(int *)in_max_connect; + if (xps->xps_nunique_destaddr_xprts + 1 > max_connect) { rcu_read_lock(); pr_warn("SUNRPC: reached max allowed number (%d) did not add " - "transport to server: %s\n", clnt->cl_max_connect, + "transport to server: %s\n", max_connect, rpc_peeraddr2str(clnt, RPC_DISPLAY_ADDR)); rcu_read_unlock(); return -EINVAL; -- cgit v1.2.3 From e4c89f9380017b6b2e63836e2de1af8eb4535384 Mon Sep 17 00:00:00 2001 From: Maximilian Luz Date: Sun, 27 Aug 2023 23:14:04 +0200 Subject: lib/ucs2_string: Add UCS-2 strscpy function Add a ucs2_strscpy() function for UCS-2 strings. The behavior is equivalent to the standard strscpy() function, just for 16-bit character UCS-2 strings. Signed-off-by: Maximilian Luz Reviewed-by: Bjorn Andersson Link: https://lore.kernel.org/r/20230827211408.689076-2-luzmaximilian@gmail.com Signed-off-by: Bjorn Andersson --- include/linux/ucs2_string.h | 1 + lib/ucs2_string.c | 52 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 53 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ucs2_string.h b/include/linux/ucs2_string.h index cf3ada3e820e..c499ae809c7d 100644 --- a/include/linux/ucs2_string.h +++ b/include/linux/ucs2_string.h @@ -10,6 +10,7 @@ typedef u16 ucs2_char_t; unsigned long ucs2_strnlen(const ucs2_char_t *s, size_t maxlength); unsigned long ucs2_strlen(const ucs2_char_t *s); unsigned long ucs2_strsize(const ucs2_char_t *data, unsigned long maxlength); +ssize_t ucs2_strscpy(ucs2_char_t *dst, const ucs2_char_t *src, size_t count); int ucs2_strncmp(const ucs2_char_t *a, const ucs2_char_t *b, size_t len); unsigned long ucs2_utf8size(const ucs2_char_t *src); diff --git a/lib/ucs2_string.c b/lib/ucs2_string.c index 0a559a42359b..9308bcfb2ad5 100644 --- a/lib/ucs2_string.c +++ b/lib/ucs2_string.c @@ -32,6 +32,58 @@ ucs2_strsize(const ucs2_char_t *data, unsigned long maxlength) } EXPORT_SYMBOL(ucs2_strsize); +/** + * ucs2_strscpy() - Copy a UCS2 string into a sized buffer. + * + * @dst: Pointer to the destination buffer where to copy the string to. + * @src: Pointer to the source buffer where to copy the string from. + * @count: Size of the destination buffer, in UCS2 (16-bit) characters. + * + * Like strscpy(), only for UCS2 strings. + * + * Copy the source string @src, or as much of it as fits, into the destination + * buffer @dst. The behavior is undefined if the string buffers overlap. The + * destination buffer @dst is always NUL-terminated, unless it's zero-sized. + * + * Return: The number of characters copied into @dst (excluding the trailing + * %NUL terminator) or -E2BIG if @count is 0 or @src was truncated due to the + * destination buffer being too small. + */ +ssize_t ucs2_strscpy(ucs2_char_t *dst, const ucs2_char_t *src, size_t count) +{ + long res; + + /* + * Ensure that we have a valid amount of space. We need to store at + * least one NUL-character. + */ + if (count == 0 || WARN_ON_ONCE(count > INT_MAX / sizeof(*dst))) + return -E2BIG; + + /* + * Copy at most 'count' characters, return early if we find a + * NUL-terminator. + */ + for (res = 0; res < count; res++) { + ucs2_char_t c; + + c = src[res]; + dst[res] = c; + + if (!c) + return res; + } + + /* + * The loop above terminated without finding a NUL-terminator, + * exceeding the 'count': Enforce proper NUL-termination and return + * error. + */ + dst[count - 1] = 0; + return -E2BIG; +} +EXPORT_SYMBOL(ucs2_strscpy); + int ucs2_strncmp(const ucs2_char_t *a, const ucs2_char_t *b, size_t len) { -- cgit v1.2.3 From 00b1248606ba3979ccae30ed11df8cdc1a84245a Mon Sep 17 00:00:00 2001 From: Maximilian Luz Date: Sun, 27 Aug 2023 23:14:05 +0200 Subject: firmware: qcom_scm: Add support for Qualcomm Secure Execution Environment SCM interface Add support for SCM calls to Secure OS and the Secure Execution Environment (SEE) residing in the TrustZone (TZ) via the QSEECOM interface. This allows communication with Secure/TZ applications, for example 'uefisecapp' managing access to UEFI variables. For better separation, make qcom_scm spin up a dedicated child (platform) device in case QSEECOM support has been detected. The corresponding driver for this device is then responsible for managing any QSEECOM clients. Specifically, this driver attempts to automatically detect known and supported applications, creating a client (auxiliary) device for each one. The respective client/auxiliary driver is then responsible for managing and communicating with the application. While this patch introduces only a very basic interface without the more advanced features (such as re-entrant and blocking SCM calls and listeners/callbacks), this is enough to talk to the aforementioned 'uefisecapp'. Signed-off-by: Maximilian Luz Reviewed-by: Johan Hovold Link: https://lore.kernel.org/r/20230827211408.689076-3-luzmaximilian@gmail.com Signed-off-by: Bjorn Andersson --- MAINTAINERS | 6 + drivers/firmware/Kconfig | 16 ++ drivers/firmware/Makefile | 1 + drivers/firmware/qcom_qseecom.c | 118 +++++++++ drivers/firmware/qcom_scm.c | 394 +++++++++++++++++++++++++++++ include/linux/firmware/qcom/qcom_qseecom.h | 46 ++++ include/linux/firmware/qcom/qcom_scm.h | 22 ++ 7 files changed, 603 insertions(+) create mode 100644 drivers/firmware/qcom_qseecom.c create mode 100644 include/linux/firmware/qcom/qcom_qseecom.h (limited to 'include/linux') diff --git a/MAINTAINERS b/MAINTAINERS index 90f13281d297..8f373d8bdd6a 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -17800,6 +17800,12 @@ S: Maintained F: Documentation/devicetree/bindings/mtd/qcom,nandc.yaml F: drivers/mtd/nand/raw/qcom_nandc.c +QUALCOMM QSEECOM DRIVER +M: Maximilian Luz +L: linux-arm-msm@vger.kernel.org +S: Maintained +F: drivers/firmware/qcom_qseecom.c + QUALCOMM RMNET DRIVER M: Subash Abhinov Kasiviswanathan M: Sean Tranchetti diff --git a/drivers/firmware/Kconfig b/drivers/firmware/Kconfig index b59e3041fd62..3e41efe494d4 100644 --- a/drivers/firmware/Kconfig +++ b/drivers/firmware/Kconfig @@ -226,6 +226,22 @@ config QCOM_SCM_DOWNLOAD_MODE_DEFAULT Say Y here to enable "download mode" by default. +config QCOM_QSEECOM + bool "Qualcomm QSEECOM interface driver" + depends on QCOM_SCM=y + help + Various Qualcomm SoCs have a Secure Execution Environment (SEE) running + in the Trust Zone. This module provides an interface to that via the + QSEECOM mechanism, using SCM calls. + + The QSEECOM interface allows, among other things, access to applications + running in the SEE. An example of such an application is 'uefisecapp', + which is required to access UEFI variables on certain systems. If + selected, the interface will also attempt to detect and register client + devices for supported applications. + + Select Y here to enable the QSEECOM interface driver. + config SYSFB bool select BOOT_VESA_SUPPORT diff --git a/drivers/firmware/Makefile b/drivers/firmware/Makefile index 28fcddcd688f..aa48e0821b7d 100644 --- a/drivers/firmware/Makefile +++ b/drivers/firmware/Makefile @@ -20,6 +20,7 @@ obj-$(CONFIG_RASPBERRYPI_FIRMWARE) += raspberrypi.o obj-$(CONFIG_FW_CFG_SYSFS) += qemu_fw_cfg.o obj-$(CONFIG_QCOM_SCM) += qcom-scm.o qcom-scm-objs += qcom_scm.o qcom_scm-smc.o qcom_scm-legacy.o +obj-$(CONFIG_QCOM_QSEECOM) += qcom_qseecom.o obj-$(CONFIG_SYSFB) += sysfb.o obj-$(CONFIG_SYSFB_SIMPLEFB) += sysfb_simplefb.o obj-$(CONFIG_TI_SCI_PROTOCOL) += ti_sci.o diff --git a/drivers/firmware/qcom_qseecom.c b/drivers/firmware/qcom_qseecom.c new file mode 100644 index 000000000000..bba32096c956 --- /dev/null +++ b/drivers/firmware/qcom_qseecom.c @@ -0,0 +1,118 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Driver for Qualcomm Secure Execution Environment (SEE) interface (QSEECOM). + * Responsible for setting up and managing QSEECOM client devices. + * + * Copyright (C) 2023 Maximilian Luz + */ +#include +#include +#include +#include +#include + +#include +#include + +struct qseecom_app_desc { + const char *app_name; + const char *dev_name; +}; + +static void qseecom_client_release(struct device *dev) +{ + struct qseecom_client *client; + + client = container_of(dev, struct qseecom_client, aux_dev.dev); + kfree(client); +} + +static void qseecom_client_remove(void *data) +{ + struct qseecom_client *client = data; + + auxiliary_device_delete(&client->aux_dev); + auxiliary_device_uninit(&client->aux_dev); +} + +static int qseecom_client_register(struct platform_device *qseecom_dev, + const struct qseecom_app_desc *desc) +{ + struct qseecom_client *client; + u32 app_id; + int ret; + + /* Try to find the app ID, skip device if not found */ + ret = qcom_scm_qseecom_app_get_id(desc->app_name, &app_id); + if (ret) + return ret == -ENOENT ? 0 : ret; + + dev_info(&qseecom_dev->dev, "setting up client for %s\n", desc->app_name); + + /* Allocate and set-up the client device */ + client = kzalloc(sizeof(*client), GFP_KERNEL); + if (!client) + return -ENOMEM; + + client->aux_dev.name = desc->dev_name; + client->aux_dev.dev.parent = &qseecom_dev->dev; + client->aux_dev.dev.release = qseecom_client_release; + client->app_id = app_id; + + ret = auxiliary_device_init(&client->aux_dev); + if (ret) { + kfree(client); + return ret; + } + + ret = auxiliary_device_add(&client->aux_dev); + if (ret) { + auxiliary_device_uninit(&client->aux_dev); + return ret; + } + + ret = devm_add_action_or_reset(&qseecom_dev->dev, qseecom_client_remove, client); + if (ret) + return ret; + + return 0; +} + +/* + * List of supported applications. One client device will be created per entry, + * assuming the app has already been loaded (usually by firmware bootloaders) + * and its ID can be queried successfully. + */ +static const struct qseecom_app_desc qcom_qseecom_apps[] = {}; + +static int qcom_qseecom_probe(struct platform_device *qseecom_dev) +{ + int ret; + int i; + + /* Set up client devices for each base application */ + for (i = 0; i < ARRAY_SIZE(qcom_qseecom_apps); i++) { + ret = qseecom_client_register(qseecom_dev, &qcom_qseecom_apps[i]); + if (ret) + return ret; + } + + return 0; +} + +static struct platform_driver qcom_qseecom_driver = { + .driver = { + .name = "qcom_qseecom", + }, + .probe = qcom_qseecom_probe, +}; + +static int __init qcom_qseecom_init(void) +{ + return platform_driver_register(&qcom_qseecom_driver); +} +subsys_initcall(qcom_qseecom_init); + +MODULE_AUTHOR("Maximilian Luz "); +MODULE_DESCRIPTION("Driver for the Qualcomm SEE (QSEECOM) interface"); +MODULE_LICENSE("GPL"); diff --git a/drivers/firmware/qcom_scm.c b/drivers/firmware/qcom_scm.c index 06fe8aca870d..f9d5c31b8ec7 100644 --- a/drivers/firmware/qcom_scm.c +++ b/drivers/firmware/qcom_scm.c @@ -55,6 +55,53 @@ struct qcom_scm_mem_map_info { __le64 mem_size; }; +/** + * struct qcom_scm_qseecom_resp - QSEECOM SCM call response. + * @result: Result or status of the SCM call. See &enum qcom_scm_qseecom_result. + * @resp_type: Type of the response. See &enum qcom_scm_qseecom_resp_type. + * @data: Response data. The type of this data is given in @resp_type. + */ +struct qcom_scm_qseecom_resp { + u64 result; + u64 resp_type; + u64 data; +}; + +enum qcom_scm_qseecom_result { + QSEECOM_RESULT_SUCCESS = 0, + QSEECOM_RESULT_INCOMPLETE = 1, + QSEECOM_RESULT_BLOCKED_ON_LISTENER = 2, + QSEECOM_RESULT_FAILURE = 0xFFFFFFFF, +}; + +enum qcom_scm_qseecom_resp_type { + QSEECOM_SCM_RES_APP_ID = 0xEE01, + QSEECOM_SCM_RES_QSEOS_LISTENER_ID = 0xEE02, +}; + +enum qcom_scm_qseecom_tz_owner { + QSEECOM_TZ_OWNER_SIP = 2, + QSEECOM_TZ_OWNER_TZ_APPS = 48, + QSEECOM_TZ_OWNER_QSEE_OS = 50 +}; + +enum qcom_scm_qseecom_tz_svc { + QSEECOM_TZ_SVC_APP_ID_PLACEHOLDER = 0, + QSEECOM_TZ_SVC_APP_MGR = 1, + QSEECOM_TZ_SVC_INFO = 6, +}; + +enum qcom_scm_qseecom_tz_cmd_app { + QSEECOM_TZ_CMD_APP_SEND = 1, + QSEECOM_TZ_CMD_APP_LOOKUP = 3, +}; + +enum qcom_scm_qseecom_tz_cmd_info { + QSEECOM_TZ_CMD_INFO_VERSION = 3, +}; + +#define QSEECOM_MAX_APP_NAME_SIZE 64 + /* Each bit configures cold/warm boot address for one of the 4 CPUs */ static const u8 qcom_scm_cpu_cold_bits[QCOM_SCM_BOOT_MAX_CPUS] = { 0, BIT(0), BIT(3), BIT(5) @@ -1321,6 +1368,340 @@ static int qcom_scm_find_dload_address(struct device *dev, u64 *addr) return 0; } +#ifdef CONFIG_QCOM_QSEECOM + +/* Lock for QSEECOM SCM call executions */ +static DEFINE_MUTEX(qcom_scm_qseecom_call_lock); + +static int __qcom_scm_qseecom_call(const struct qcom_scm_desc *desc, + struct qcom_scm_qseecom_resp *res) +{ + struct qcom_scm_res scm_res = {}; + int status; + + /* + * QSEECOM SCM calls should not be executed concurrently. Therefore, we + * require the respective call lock to be held. + */ + lockdep_assert_held(&qcom_scm_qseecom_call_lock); + + status = qcom_scm_call(__scm->dev, desc, &scm_res); + + res->result = scm_res.result[0]; + res->resp_type = scm_res.result[1]; + res->data = scm_res.result[2]; + + if (status) + return status; + + return 0; +} + +/** + * qcom_scm_qseecom_call() - Perform a QSEECOM SCM call. + * @desc: SCM call descriptor. + * @res: SCM call response (output). + * + * Performs the QSEECOM SCM call described by @desc, returning the response in + * @rsp. + * + * Return: Zero on success, nonzero on failure. + */ +static int qcom_scm_qseecom_call(const struct qcom_scm_desc *desc, + struct qcom_scm_qseecom_resp *res) +{ + int status; + + /* + * Note: Multiple QSEECOM SCM calls should not be executed same time, + * so lock things here. This needs to be extended to callback/listener + * handling when support for that is implemented. + */ + + mutex_lock(&qcom_scm_qseecom_call_lock); + status = __qcom_scm_qseecom_call(desc, res); + mutex_unlock(&qcom_scm_qseecom_call_lock); + + dev_dbg(__scm->dev, "%s: owner=%x, svc=%x, cmd=%x, result=%lld, type=%llx, data=%llx\n", + __func__, desc->owner, desc->svc, desc->cmd, res->result, + res->resp_type, res->data); + + if (status) { + dev_err(__scm->dev, "qseecom: scm call failed with error %d\n", status); + return status; + } + + /* + * TODO: Handle incomplete and blocked calls: + * + * Incomplete and blocked calls are not supported yet. Some devices + * and/or commands require those, some don't. Let's warn about them + * prominently in case someone attempts to try these commands with a + * device/command combination that isn't supported yet. + */ + WARN_ON(res->result == QSEECOM_RESULT_INCOMPLETE); + WARN_ON(res->result == QSEECOM_RESULT_BLOCKED_ON_LISTENER); + + return 0; +} + +/** + * qcom_scm_qseecom_get_version() - Query the QSEECOM version. + * @version: Pointer where the QSEECOM version will be stored. + * + * Performs the QSEECOM SCM querying the QSEECOM version currently running in + * the TrustZone. + * + * Return: Zero on success, nonzero on failure. + */ +static int qcom_scm_qseecom_get_version(u32 *version) +{ + struct qcom_scm_desc desc = {}; + struct qcom_scm_qseecom_resp res = {}; + u32 feature = 10; + int ret; + + desc.owner = QSEECOM_TZ_OWNER_SIP; + desc.svc = QSEECOM_TZ_SVC_INFO; + desc.cmd = QSEECOM_TZ_CMD_INFO_VERSION; + desc.arginfo = QCOM_SCM_ARGS(1, QCOM_SCM_VAL); + desc.args[0] = feature; + + ret = qcom_scm_qseecom_call(&desc, &res); + if (ret) + return ret; + + *version = res.result; + return 0; +} + +/** + * qcom_scm_qseecom_app_get_id() - Query the app ID for a given QSEE app name. + * @app_name: The name of the app. + * @app_id: The returned app ID. + * + * Query and return the application ID of the SEE app identified by the given + * name. This returned ID is the unique identifier of the app required for + * subsequent communication. + * + * Return: Zero on success, nonzero on failure, -ENOENT if the app has not been + * loaded or could not be found. + */ +int qcom_scm_qseecom_app_get_id(const char *app_name, u32 *app_id) +{ + unsigned long name_buf_size = QSEECOM_MAX_APP_NAME_SIZE; + unsigned long app_name_len = strlen(app_name); + struct qcom_scm_desc desc = {}; + struct qcom_scm_qseecom_resp res = {}; + dma_addr_t name_buf_phys; + char *name_buf; + int status; + + if (app_name_len >= name_buf_size) + return -EINVAL; + + name_buf = kzalloc(name_buf_size, GFP_KERNEL); + if (!name_buf) + return -ENOMEM; + + memcpy(name_buf, app_name, app_name_len); + + name_buf_phys = dma_map_single(__scm->dev, name_buf, name_buf_size, DMA_TO_DEVICE); + status = dma_mapping_error(__scm->dev, name_buf_phys); + if (status) { + kfree(name_buf); + dev_err(__scm->dev, "qseecom: failed to map dma address\n"); + return status; + } + + desc.owner = QSEECOM_TZ_OWNER_QSEE_OS; + desc.svc = QSEECOM_TZ_SVC_APP_MGR; + desc.cmd = QSEECOM_TZ_CMD_APP_LOOKUP; + desc.arginfo = QCOM_SCM_ARGS(2, QCOM_SCM_RW, QCOM_SCM_VAL); + desc.args[0] = name_buf_phys; + desc.args[1] = app_name_len; + + status = qcom_scm_qseecom_call(&desc, &res); + dma_unmap_single(__scm->dev, name_buf_phys, name_buf_size, DMA_TO_DEVICE); + kfree(name_buf); + + if (status) + return status; + + if (res.result == QSEECOM_RESULT_FAILURE) + return -ENOENT; + + if (res.result != QSEECOM_RESULT_SUCCESS) + return -EINVAL; + + if (res.resp_type != QSEECOM_SCM_RES_APP_ID) + return -EINVAL; + + *app_id = res.data; + return 0; +} +EXPORT_SYMBOL_GPL(qcom_scm_qseecom_app_get_id); + +/** + * qcom_scm_qseecom_app_send() - Send to and receive data from a given QSEE app. + * @app_id: The ID of the target app. + * @req: Request buffer sent to the app (must be DMA-mappable). + * @req_size: Size of the request buffer. + * @rsp: Response buffer, written to by the app (must be DMA-mappable). + * @rsp_size: Size of the response buffer. + * + * Sends a request to the QSEE app associated with the given ID and read back + * its response. The caller must provide two DMA memory regions, one for the + * request and one for the response, and fill out the @req region with the + * respective (app-specific) request data. The QSEE app reads this and returns + * its response in the @rsp region. + * + * Return: Zero on success, nonzero on failure. + */ +int qcom_scm_qseecom_app_send(u32 app_id, void *req, size_t req_size, void *rsp, + size_t rsp_size) +{ + struct qcom_scm_qseecom_resp res = {}; + struct qcom_scm_desc desc = {}; + dma_addr_t req_phys; + dma_addr_t rsp_phys; + int status; + + /* Map request buffer */ + req_phys = dma_map_single(__scm->dev, req, req_size, DMA_TO_DEVICE); + status = dma_mapping_error(__scm->dev, req_phys); + if (status) { + dev_err(__scm->dev, "qseecom: failed to map request buffer\n"); + return status; + } + + /* Map response buffer */ + rsp_phys = dma_map_single(__scm->dev, rsp, rsp_size, DMA_FROM_DEVICE); + status = dma_mapping_error(__scm->dev, rsp_phys); + if (status) { + dma_unmap_single(__scm->dev, req_phys, req_size, DMA_TO_DEVICE); + dev_err(__scm->dev, "qseecom: failed to map response buffer\n"); + return status; + } + + /* Set up SCM call data */ + desc.owner = QSEECOM_TZ_OWNER_TZ_APPS; + desc.svc = QSEECOM_TZ_SVC_APP_ID_PLACEHOLDER; + desc.cmd = QSEECOM_TZ_CMD_APP_SEND; + desc.arginfo = QCOM_SCM_ARGS(5, QCOM_SCM_VAL, + QCOM_SCM_RW, QCOM_SCM_VAL, + QCOM_SCM_RW, QCOM_SCM_VAL); + desc.args[0] = app_id; + desc.args[1] = req_phys; + desc.args[2] = req_size; + desc.args[3] = rsp_phys; + desc.args[4] = rsp_size; + + /* Perform call */ + status = qcom_scm_qseecom_call(&desc, &res); + + /* Unmap buffers */ + dma_unmap_single(__scm->dev, rsp_phys, rsp_size, DMA_FROM_DEVICE); + dma_unmap_single(__scm->dev, req_phys, req_size, DMA_TO_DEVICE); + + if (status) + return status; + + if (res.result != QSEECOM_RESULT_SUCCESS) + return -EIO; + + return 0; +} +EXPORT_SYMBOL_GPL(qcom_scm_qseecom_app_send); + +/* + * We do not yet support re-entrant calls via the qseecom interface. To prevent + + any potential issues with this, only allow validated machines for now. + */ +static const struct of_device_id qcom_scm_qseecom_allowlist[] = { + { .compatible = "lenovo,thinkpad-x13s", }, + { } +}; + +static bool qcom_scm_qseecom_machine_is_allowed(void) +{ + struct device_node *np; + bool match; + + np = of_find_node_by_path("/"); + if (!np) + return false; + + match = of_match_node(qcom_scm_qseecom_allowlist, np); + of_node_put(np); + + return match; +} + +static void qcom_scm_qseecom_free(void *data) +{ + struct platform_device *qseecom_dev = data; + + platform_device_del(qseecom_dev); + platform_device_put(qseecom_dev); +} + +static int qcom_scm_qseecom_init(struct qcom_scm *scm) +{ + struct platform_device *qseecom_dev; + u32 version; + int ret; + + /* + * Note: We do two steps of validation here: First, we try to query the + * QSEECOM version as a check to see if the interface exists on this + * device. Second, we check against known good devices due to current + * driver limitations (see comment in qcom_scm_qseecom_allowlist). + * + * Note that we deliberately do the machine check after the version + * check so that we can log potentially supported devices. This should + * be safe as downstream sources indicate that the version query is + * neither blocking nor reentrant. + */ + ret = qcom_scm_qseecom_get_version(&version); + if (ret) + return 0; + + dev_info(scm->dev, "qseecom: found qseecom with version 0x%x\n", version); + + if (!qcom_scm_qseecom_machine_is_allowed()) { + dev_info(scm->dev, "qseecom: untested machine, skipping\n"); + return 0; + } + + /* + * Set up QSEECOM interface device. All application clients will be + * set up and managed by the corresponding driver for it. + */ + qseecom_dev = platform_device_alloc("qcom_qseecom", -1); + if (!qseecom_dev) + return -ENOMEM; + + qseecom_dev->dev.parent = scm->dev; + + ret = platform_device_add(qseecom_dev); + if (ret) { + platform_device_put(qseecom_dev); + return ret; + } + + return devm_add_action_or_reset(scm->dev, qcom_scm_qseecom_free, qseecom_dev); +} + +#else /* CONFIG_QCOM_QSEECOM */ + +static int qcom_scm_qseecom_init(struct qcom_scm *scm) +{ + return 0; +} + +#endif /* CONFIG_QCOM_QSEECOM */ + /** * qcom_scm_is_available() - Checks if SCM is available */ @@ -1468,6 +1849,19 @@ static int qcom_scm_probe(struct platform_device *pdev) if (download_mode) qcom_scm_set_download_mode(true); + /* + * Initialize the QSEECOM interface. + * + * Note: QSEECOM is fairly self-contained and this only adds the + * interface device (the driver of which does most of the heavy + * lifting). So any errors returned here should be either -ENOMEM or + * -EINVAL (with the latter only in case there's a bug in our code). + * This means that there is no need to bring down the whole SCM driver. + * Just log the error instead and let SCM live. + */ + ret = qcom_scm_qseecom_init(scm); + WARN(ret < 0, "failed to initialize qseecom: %d\n", ret); + return 0; } diff --git a/include/linux/firmware/qcom/qcom_qseecom.h b/include/linux/firmware/qcom/qcom_qseecom.h new file mode 100644 index 000000000000..b531547e1dc9 --- /dev/null +++ b/include/linux/firmware/qcom/qcom_qseecom.h @@ -0,0 +1,46 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Driver for Qualcomm Secure Execution Environment (SEE) interface (QSEECOM). + * Responsible for setting up and managing QSEECOM client devices. + * + * Copyright (C) 2023 Maximilian Luz + */ +#include +#include + +#include + +/** + * struct qseecom_client - QSEECOM client device. + * @aux_dev: Underlying auxiliary device. + * @app_id: ID of the loaded application. + */ +struct qseecom_client { + struct auxiliary_device aux_dev; + u32 app_id; +}; + +/** + * qcom_qseecom_app_send() - Send to and receive data from a given QSEE app. + * @client: The QSEECOM client associated with the target app. + * @req: Request buffer sent to the app (must be DMA-mappable). + * @req_size: Size of the request buffer. + * @rsp: Response buffer, written to by the app (must be DMA-mappable). + * @rsp_size: Size of the response buffer. + * + * Sends a request to the QSEE app associated with the given client and read + * back its response. The caller must provide two DMA memory regions, one for + * the request and one for the response, and fill out the @req region with the + * respective (app-specific) request data. The QSEE app reads this and returns + * its response in the @rsp region. + * + * Note: This is a convenience wrapper around qcom_scm_qseecom_app_send(). + * Clients should prefer to use this wrapper. + * + * Return: Zero on success, nonzero on failure. + */ +static inline int qcom_qseecom_app_send(struct qseecom_client *client, void *req, size_t req_size, + void *rsp, size_t rsp_size) +{ + return qcom_scm_qseecom_app_send(client->app_id, req, req_size, rsp, rsp_size); +} diff --git a/include/linux/firmware/qcom/qcom_scm.h b/include/linux/firmware/qcom/qcom_scm.h index 0c091a3f6d49..e1ea2eb56d04 100644 --- a/include/linux/firmware/qcom/qcom_scm.h +++ b/include/linux/firmware/qcom/qcom_scm.h @@ -122,4 +122,26 @@ extern int qcom_scm_lmh_dcvsh(u32 payload_fn, u32 payload_reg, u32 payload_val, extern int qcom_scm_lmh_profile_change(u32 profile_id); extern bool qcom_scm_lmh_dcvsh_available(void); +#ifdef CONFIG_QCOM_QSEECOM + +int qcom_scm_qseecom_app_get_id(const char *app_name, u32 *app_id); +int qcom_scm_qseecom_app_send(u32 app_id, void *req, size_t req_size, void *rsp, + size_t rsp_size); + +#else /* CONFIG_QCOM_QSEECOM */ + +static inline int qcom_scm_qseecom_app_get_id(const char *app_name, u32 *app_id) +{ + return -EINVAL; +} + +static inline int qcom_scm_qseecom_app_send(u32 app_id, void *req, + size_t req_size, void *rsp, + size_t rsp_size) +{ + return -EINVAL; +} + +#endif /* CONFIG_QCOM_QSEECOM */ + #endif -- cgit v1.2.3 From b93c5fe16e4aa177cd072c1c4652cbe1b19a7812 Mon Sep 17 00:00:00 2001 From: Yue Haibing Date: Thu, 3 Aug 2023 21:49:19 +0800 Subject: rcu: Remove unused function declaration rcu_eqs_special_set() Commit a86baa69c2b7 ("rcu: Remove special bit at the bottom of the ->dynticks counter") left behind this, remove it. Signed-off-by: Yue Haibing Signed-off-by: Paul E. McKenney Signed-off-by: Frederic Weisbecker --- include/linux/rcutree.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index 126f6b418f6a..153cfc7bbffd 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -37,7 +37,6 @@ void synchronize_rcu_expedited(void); void kvfree_call_rcu(struct rcu_head *head, void *ptr); void rcu_barrier(void); -bool rcu_eqs_special_set(int cpu); void rcu_momentary_dyntick_idle(void); void kfree_rcu_scheduler_running(void); bool rcu_gp_might_be_stalled(void); -- cgit v1.2.3 From 6e284c55fc0bef7d25fd34d29db11f483da60ea4 Mon Sep 17 00:00:00 2001 From: Zhen Lei Date: Sat, 5 Aug 2023 11:17:25 +0800 Subject: mm: Remove kmem_valid_obj() Function kmem_dump_obj() will splat if passed a pointer to a non-slab object. So nothing calls it directly, instead calling kmem_valid_obj() first to determine whether the passed pointer to a valid slab object. This means that merging kmem_valid_obj() into kmem_dump_obj() will make the code more concise. Therefore, convert kmem_dump_obj() to work the same way as vmalloc_dump_obj(), removing the need for the kmem_dump_obj() caller to check kmem_valid_obj(). After this, there are no remaining calls to kmem_valid_obj() anymore, and it can be safely removed. Suggested-by: Matthew Wilcox Signed-off-by: Zhen Lei Reviewed-by: Matthew Wilcox (Oracle) Acked-by: Vlastimil Babka Signed-off-by: Paul E. McKenney Signed-off-by: Frederic Weisbecker --- include/linux/slab.h | 5 +++-- mm/slab_common.c | 41 +++++++++++------------------------------ mm/util.c | 4 +--- 3 files changed, 15 insertions(+), 35 deletions(-) (limited to 'include/linux') diff --git a/include/linux/slab.h b/include/linux/slab.h index 8228d1276a2f..ff56ab804bf6 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -245,8 +245,9 @@ DEFINE_FREE(kfree, void *, if (_T) kfree(_T)) size_t ksize(const void *objp); #ifdef CONFIG_PRINTK -bool kmem_valid_obj(void *object); -void kmem_dump_obj(void *object); +bool kmem_dump_obj(void *object); +#else +static inline bool kmem_dump_obj(void *object) { return false; } #endif /* diff --git a/mm/slab_common.c b/mm/slab_common.c index cd71f9581e67..a425bedf2103 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -528,26 +528,6 @@ bool slab_is_available(void) } #ifdef CONFIG_PRINTK -/** - * kmem_valid_obj - does the pointer reference a valid slab object? - * @object: pointer to query. - * - * Return: %true if the pointer is to a not-yet-freed object from - * kmalloc() or kmem_cache_alloc(), either %true or %false if the pointer - * is to an already-freed object, and %false otherwise. - */ -bool kmem_valid_obj(void *object) -{ - struct folio *folio; - - /* Some arches consider ZERO_SIZE_PTR to be a valid address. */ - if (object < (void *)PAGE_SIZE || !virt_addr_valid(object)) - return false; - folio = virt_to_folio(object); - return folio_test_slab(folio); -} -EXPORT_SYMBOL_GPL(kmem_valid_obj); - static void kmem_obj_info(struct kmem_obj_info *kpp, void *object, struct slab *slab) { if (__kfence_obj_info(kpp, object, slab)) @@ -566,11 +546,11 @@ static void kmem_obj_info(struct kmem_obj_info *kpp, void *object, struct slab * * and, if available, the slab name, return address, and stack trace from * the allocation and last free path of that object. * - * This function will splat if passed a pointer to a non-slab object. - * If you are not sure what type of object you have, you should instead - * use mem_dump_obj(). + * Return: %true if the pointer is to a not-yet-freed object from + * kmalloc() or kmem_cache_alloc(), either %true or %false if the pointer + * is to an already-freed object, and %false otherwise. */ -void kmem_dump_obj(void *object) +bool kmem_dump_obj(void *object) { char *cp = IS_ENABLED(CONFIG_MMU) ? "" : "/vmalloc"; int i; @@ -578,13 +558,13 @@ void kmem_dump_obj(void *object) unsigned long ptroffset; struct kmem_obj_info kp = { }; - if (WARN_ON_ONCE(!virt_addr_valid(object))) - return; + /* Some arches consider ZERO_SIZE_PTR to be a valid address. */ + if (object < (void *)PAGE_SIZE || !virt_addr_valid(object)) + return false; slab = virt_to_slab(object); - if (WARN_ON_ONCE(!slab)) { - pr_cont(" non-slab memory.\n"); - return; - } + if (!slab) + return false; + kmem_obj_info(&kp, object, slab); if (kp.kp_slab_cache) pr_cont(" slab%s %s", cp, kp.kp_slab_cache->name); @@ -621,6 +601,7 @@ void kmem_dump_obj(void *object) pr_info(" %pS\n", kp.kp_free_stack[i]); } + return true; } EXPORT_SYMBOL_GPL(kmem_dump_obj); #endif diff --git a/mm/util.c b/mm/util.c index 8cbbfd3a3d59..6eddd891198e 100644 --- a/mm/util.c +++ b/mm/util.c @@ -1060,10 +1060,8 @@ void mem_dump_obj(void *object) { const char *type; - if (kmem_valid_obj(object)) { - kmem_dump_obj(object); + if (kmem_dump_obj(object)) return; - } if (vmalloc_dump_obj(object)) return; -- cgit v1.2.3 From 25cc71d1527b55c880d333e7cc1dc37aeef9843f Mon Sep 17 00:00:00 2001 From: Khadija Kamran Date: Wed, 23 Aug 2023 11:44:41 +0500 Subject: lsm: constify 'sb' parameter in security_quotactl() SELinux registers the implementation for the "quotactl" hook. Looking at the function implementation we observe that the parameter "sb" is not changing. Mark the "sb" parameter of LSM hook security_quotactl() as "const" since it will not be changing in the LSM hook. Signed-off-by: Khadija Kamran Signed-off-by: Paul Moore --- include/linux/lsm_hook_defs.h | 2 +- include/linux/security.h | 4 ++-- security/security.c | 2 +- security/selinux/hooks.c | 4 ++-- 4 files changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h index ac962c4cb44b..b464f9c1894f 100644 --- a/include/linux/lsm_hook_defs.h +++ b/include/linux/lsm_hook_defs.h @@ -43,7 +43,7 @@ LSM_HOOK(int, 0, capset, struct cred *new, const struct cred *old, const kernel_cap_t *permitted) LSM_HOOK(int, 0, capable, const struct cred *cred, struct user_namespace *ns, int cap, unsigned int opts) -LSM_HOOK(int, 0, quotactl, int cmds, int type, int id, struct super_block *sb) +LSM_HOOK(int, 0, quotactl, int cmds, int type, int id, const struct super_block *sb) LSM_HOOK(int, 0, quota_on, struct dentry *dentry) LSM_HOOK(int, 0, syslog, int type) LSM_HOOK(int, 0, settime, const struct timespec64 *ts, diff --git a/include/linux/security.h b/include/linux/security.h index 5f16eecde00b..1a02e67e682f 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -284,7 +284,7 @@ int security_capable(const struct cred *cred, struct user_namespace *ns, int cap, unsigned int opts); -int security_quotactl(int cmds, int type, int id, struct super_block *sb); +int security_quotactl(int cmds, int type, int id, const struct super_block *sb); int security_quota_on(struct dentry *dentry); int security_syslog(int type); int security_settime64(const struct timespec64 *ts, const struct timezone *tz); @@ -581,7 +581,7 @@ static inline int security_capable(const struct cred *cred, } static inline int security_quotactl(int cmds, int type, int id, - struct super_block *sb) + const struct super_block *sb) { return 0; } diff --git a/security/security.c b/security/security.c index 23b129d482a7..b944b19e4512 100644 --- a/security/security.c +++ b/security/security.c @@ -957,7 +957,7 @@ int security_capable(const struct cred *cred, * * Return: Returns 0 if permission is granted. */ -int security_quotactl(int cmds, int type, int id, struct super_block *sb) +int security_quotactl(int cmds, int type, int id, const struct super_block *sb) { return call_int_hook(quotactl, 0, cmds, type, id, sb); } diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 10350534de6d..e1c7640a5df1 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -1937,7 +1937,7 @@ static inline int may_rename(struct inode *old_dir, /* Check whether a task can perform a filesystem operation. */ static int superblock_has_perm(const struct cred *cred, - struct super_block *sb, + const struct super_block *sb, u32 perms, struct common_audit_data *ad) { @@ -2139,7 +2139,7 @@ static int selinux_capable(const struct cred *cred, struct user_namespace *ns, return cred_has_capability(cred, cap, opts, ns == &init_user_ns); } -static int selinux_quotactl(int cmds, int type, int id, struct super_block *sb) +static int selinux_quotactl(int cmds, int type, int id, const struct super_block *sb) { const struct cred *cred = current_cred(); int rc = 0; -- cgit v1.2.3 From 4a00c673068e72c12d243f5c31000246d6984e44 Mon Sep 17 00:00:00 2001 From: Khadija Kamran Date: Wed, 23 Aug 2023 12:17:29 +0500 Subject: lsm: constify 'file' parameter in security_bprm_creds_from_file() The 'bprm_creds_from_file' hook has implementation registered in commoncap. Looking at the function implementation we observe that the 'file' parameter is not changing. Mark the 'file' parameter of LSM hook security_bprm_creds_from_file() as 'const' since it will not be changing in the LSM hook. Signed-off-by: Khadija Kamran Signed-off-by: Paul Moore --- include/linux/fs.h | 2 +- include/linux/lsm_hook_defs.h | 2 +- include/linux/security.h | 6 +++--- security/commoncap.c | 4 ++-- security/security.c | 2 +- 5 files changed, 8 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 4aeb3fa11927..f69d085e531f 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2450,7 +2450,7 @@ struct filename { }; static_assert(offsetof(struct filename, iname) % sizeof(long) == 0); -static inline struct mnt_idmap *file_mnt_idmap(struct file *file) +static inline struct mnt_idmap *file_mnt_idmap(const struct file *file) { return mnt_idmap(file->f_path.mnt); } diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h index b464f9c1894f..5dfe67d69aba 100644 --- a/include/linux/lsm_hook_defs.h +++ b/include/linux/lsm_hook_defs.h @@ -50,7 +50,7 @@ LSM_HOOK(int, 0, settime, const struct timespec64 *ts, const struct timezone *tz) LSM_HOOK(int, 0, vm_enough_memory, struct mm_struct *mm, long pages) LSM_HOOK(int, 0, bprm_creds_for_exec, struct linux_binprm *bprm) -LSM_HOOK(int, 0, bprm_creds_from_file, struct linux_binprm *bprm, struct file *file) +LSM_HOOK(int, 0, bprm_creds_from_file, struct linux_binprm *bprm, const struct file *file) LSM_HOOK(int, 0, bprm_check_security, struct linux_binprm *bprm) LSM_HOOK(void, LSM_RET_VOID, bprm_committing_creds, struct linux_binprm *bprm) LSM_HOOK(void, LSM_RET_VOID, bprm_committed_creds, struct linux_binprm *bprm) diff --git a/include/linux/security.h b/include/linux/security.h index 1a02e67e682f..edbea3e0a13f 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -151,7 +151,7 @@ extern int cap_capset(struct cred *new, const struct cred *old, const kernel_cap_t *effective, const kernel_cap_t *inheritable, const kernel_cap_t *permitted); -extern int cap_bprm_creds_from_file(struct linux_binprm *bprm, struct file *file); +extern int cap_bprm_creds_from_file(struct linux_binprm *bprm, const struct file *file); int cap_inode_setxattr(struct dentry *dentry, const char *name, const void *value, size_t size, int flags); int cap_inode_removexattr(struct mnt_idmap *idmap, @@ -290,7 +290,7 @@ int security_syslog(int type); int security_settime64(const struct timespec64 *ts, const struct timezone *tz); int security_vm_enough_memory_mm(struct mm_struct *mm, long pages); int security_bprm_creds_for_exec(struct linux_binprm *bprm); -int security_bprm_creds_from_file(struct linux_binprm *bprm, struct file *file); +int security_bprm_creds_from_file(struct linux_binprm *bprm, const struct file *file); int security_bprm_check(struct linux_binprm *bprm); void security_bprm_committing_creds(struct linux_binprm *bprm); void security_bprm_committed_creds(struct linux_binprm *bprm); @@ -613,7 +613,7 @@ static inline int security_bprm_creds_for_exec(struct linux_binprm *bprm) } static inline int security_bprm_creds_from_file(struct linux_binprm *bprm, - struct file *file) + const struct file *file) { return cap_bprm_creds_from_file(bprm, file); } diff --git a/security/commoncap.c b/security/commoncap.c index bc0521104197..8e8c630ce204 100644 --- a/security/commoncap.c +++ b/security/commoncap.c @@ -720,7 +720,7 @@ int get_vfs_caps_from_disk(struct mnt_idmap *idmap, * its xattrs and, if present, apply them to the proposed credentials being * constructed by execve(). */ -static int get_file_caps(struct linux_binprm *bprm, struct file *file, +static int get_file_caps(struct linux_binprm *bprm, const struct file *file, bool *effective, bool *has_fcap) { int rc = 0; @@ -882,7 +882,7 @@ static inline bool nonroot_raised_pE(struct cred *new, const struct cred *old, * * Return: 0 if successful, -ve on error. */ -int cap_bprm_creds_from_file(struct linux_binprm *bprm, struct file *file) +int cap_bprm_creds_from_file(struct linux_binprm *bprm, const struct file *file) { /* Process setpcap binaries and capabilities for uid 0 */ const struct cred *old = current_cred(); diff --git a/security/security.c b/security/security.c index b944b19e4512..bde8813e89ff 100644 --- a/security/security.c +++ b/security/security.c @@ -1079,7 +1079,7 @@ int security_bprm_creds_for_exec(struct linux_binprm *bprm) * * Return: Returns 0 if the hook is successful and permission is granted. */ -int security_bprm_creds_from_file(struct linux_binprm *bprm, struct file *file) +int security_bprm_creds_from_file(struct linux_binprm *bprm, const struct file *file) { return call_int_hook(bprm_creds_from_file, 0, bprm, file); } -- cgit v1.2.3 From 64fc9526147c7fc14535134d8ea79b9c8dc549a7 Mon Sep 17 00:00:00 2001 From: Khadija Kamran Date: Wed, 23 Aug 2023 12:47:56 +0500 Subject: lsm: constify 'bprm' parameter in security_bprm_committing_creds() The 'bprm_committing_creds' hook has implementations registered in SELinux and Apparmor. Looking at the function implementations we observe that the 'bprm' parameter is not changing. Mark the 'bprm' parameter of LSM hook security_bprm_committing_creds() as 'const' since it will not be changing in the LSM hook. Signed-off-by: Khadija Kamran Signed-off-by: Paul Moore --- include/linux/lsm_hook_defs.h | 2 +- include/linux/security.h | 4 ++-- security/apparmor/lsm.c | 2 +- security/security.c | 2 +- security/selinux/hooks.c | 2 +- 5 files changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h index 5dfe67d69aba..f6acc3ed66a3 100644 --- a/include/linux/lsm_hook_defs.h +++ b/include/linux/lsm_hook_defs.h @@ -52,7 +52,7 @@ LSM_HOOK(int, 0, vm_enough_memory, struct mm_struct *mm, long pages) LSM_HOOK(int, 0, bprm_creds_for_exec, struct linux_binprm *bprm) LSM_HOOK(int, 0, bprm_creds_from_file, struct linux_binprm *bprm, const struct file *file) LSM_HOOK(int, 0, bprm_check_security, struct linux_binprm *bprm) -LSM_HOOK(void, LSM_RET_VOID, bprm_committing_creds, struct linux_binprm *bprm) +LSM_HOOK(void, LSM_RET_VOID, bprm_committing_creds, const struct linux_binprm *bprm) LSM_HOOK(void, LSM_RET_VOID, bprm_committed_creds, struct linux_binprm *bprm) LSM_HOOK(int, 0, fs_context_submount, struct fs_context *fc, struct super_block *reference) LSM_HOOK(int, 0, fs_context_dup, struct fs_context *fc, diff --git a/include/linux/security.h b/include/linux/security.h index edbea3e0a13f..885053f81019 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -292,7 +292,7 @@ int security_vm_enough_memory_mm(struct mm_struct *mm, long pages); int security_bprm_creds_for_exec(struct linux_binprm *bprm); int security_bprm_creds_from_file(struct linux_binprm *bprm, const struct file *file); int security_bprm_check(struct linux_binprm *bprm); -void security_bprm_committing_creds(struct linux_binprm *bprm); +void security_bprm_committing_creds(const struct linux_binprm *bprm); void security_bprm_committed_creds(struct linux_binprm *bprm); int security_fs_context_submount(struct fs_context *fc, struct super_block *reference); int security_fs_context_dup(struct fs_context *fc, struct fs_context *src_fc); @@ -623,7 +623,7 @@ static inline int security_bprm_check(struct linux_binprm *bprm) return 0; } -static inline void security_bprm_committing_creds(struct linux_binprm *bprm) +static inline void security_bprm_committing_creds(const struct linux_binprm *bprm) { } diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c index 108eccc5ada5..b03f46e0f6c5 100644 --- a/security/apparmor/lsm.c +++ b/security/apparmor/lsm.c @@ -734,7 +734,7 @@ fail: * apparmor_bprm_committing_creds - do task cleanup on committing new creds * @bprm: binprm for the exec (NOT NULL) */ -static void apparmor_bprm_committing_creds(struct linux_binprm *bprm) +static void apparmor_bprm_committing_creds(const struct linux_binprm *bprm) { struct aa_label *label = aa_current_raw_label(); struct aa_label *new_label = cred_label(bprm->cred); diff --git a/security/security.c b/security/security.c index bde8813e89ff..77a1601ead36 100644 --- a/security/security.c +++ b/security/security.c @@ -1118,7 +1118,7 @@ int security_bprm_check(struct linux_binprm *bprm) * open file descriptors to which access will no longer be granted when the * attributes are changed. This is called immediately before commit_creds(). */ -void security_bprm_committing_creds(struct linux_binprm *bprm) +void security_bprm_committing_creds(const struct linux_binprm *bprm) { call_void_hook(bprm_committing_creds, bprm); } diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index e1c7640a5df1..f42a1b78bc43 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -2455,7 +2455,7 @@ static inline void flush_unauthorized_files(const struct cred *cred, /* * Prepare a process for imminent new credential changes due to exec */ -static void selinux_bprm_committing_creds(struct linux_binprm *bprm) +static void selinux_bprm_committing_creds(const struct linux_binprm *bprm) { struct task_security_struct *new_tsec; struct rlimit *rlim, *initrlim; -- cgit v1.2.3 From 81b36803ac139827538ac5ce4028e750a3c53f53 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 12 Sep 2023 09:17:21 +0000 Subject: udp: introduce udp->udp_flags According to syzbot, it is time to use proper atomic flags for various UDP flags. Add udp_flags field, and convert udp->corkflag to first bit in it. Signed-off-by: Eric Dumazet Reviewed-by: Willem de Bruijn Signed-off-by: Paolo Abeni --- include/linux/udp.h | 28 +++++++++++++++++++++------- net/ipv4/udp.c | 12 ++++++------ net/ipv6/udp.c | 6 +++--- 3 files changed, 30 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/include/linux/udp.h b/include/linux/udp.h index 43c1fb2d2c21..23f0693e0d9c 100644 --- a/include/linux/udp.h +++ b/include/linux/udp.h @@ -32,14 +32,20 @@ static inline u32 udp_hashfn(const struct net *net, u32 num, u32 mask) return (num + net_hash_mix(net)) & mask; } +enum { + UDP_FLAGS_CORK, /* Cork is required */ +}; + struct udp_sock { /* inet_sock has to be the first member */ struct inet_sock inet; #define udp_port_hash inet.sk.__sk_common.skc_u16hashes[0] #define udp_portaddr_hash inet.sk.__sk_common.skc_u16hashes[1] #define udp_portaddr_node inet.sk.__sk_common.skc_portaddr_node + + unsigned long udp_flags; + int pending; /* Any pending frames ? */ - unsigned int corkflag; /* Cork is required */ __u8 encap_type; /* Is this an Encapsulation socket? */ unsigned char no_check6_tx:1,/* Send zero UDP6 checksums on TX? */ no_check6_rx:1,/* Allow zero UDP6 checksums on RX? */ @@ -51,6 +57,11 @@ struct udp_sock { gro_enabled:1, /* Request GRO aggregation */ accept_udp_l4:1, accept_udp_fraglist:1; +/* indicator bits used by pcflag: */ +#define UDPLITE_BIT 0x1 /* set by udplite proto init function */ +#define UDPLITE_SEND_CC 0x2 /* set via udplite setsockopt */ +#define UDPLITE_RECV_CC 0x4 /* set via udplite setsocktopt */ + __u8 pcflag; /* marks socket as UDP-Lite if > 0 */ /* * Following member retains the information to create a UDP header * when the socket is uncorked. @@ -62,12 +73,6 @@ struct udp_sock { */ __u16 pcslen; __u16 pcrlen; -/* indicator bits used by pcflag: */ -#define UDPLITE_BIT 0x1 /* set by udplite proto init function */ -#define UDPLITE_SEND_CC 0x2 /* set via udplite setsockopt */ -#define UDPLITE_RECV_CC 0x4 /* set via udplite setsocktopt */ - __u8 pcflag; /* marks socket as UDP-Lite if > 0 */ - __u8 unused[3]; /* * For encapsulation sockets. */ @@ -95,6 +100,15 @@ struct udp_sock { int forward_threshold; }; +#define udp_test_bit(nr, sk) \ + test_bit(UDP_FLAGS_##nr, &udp_sk(sk)->udp_flags) +#define udp_set_bit(nr, sk) \ + set_bit(UDP_FLAGS_##nr, &udp_sk(sk)->udp_flags) +#define udp_clear_bit(nr, sk) \ + clear_bit(UDP_FLAGS_##nr, &udp_sk(sk)->udp_flags) +#define udp_assign_bit(nr, sk, val) \ + assign_bit(UDP_FLAGS_##nr, &udp_sk(sk)->udp_flags, val) + #define UDP_MAX_SEGMENTS (1 << 6UL) #define udp_sk(ptr) container_of_const(ptr, struct udp_sock, inet.sk) diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index f39b9c844580..9709f8a532dc 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1051,7 +1051,7 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) u8 tos, scope; __be16 dport; int err, is_udplite = IS_UDPLITE(sk); - int corkreq = READ_ONCE(up->corkflag) || msg->msg_flags&MSG_MORE; + int corkreq = udp_test_bit(CORK, sk) || msg->msg_flags & MSG_MORE; int (*getfrag)(void *, char *, int, int, int, struct sk_buff *); struct sk_buff *skb; struct ip_options_data opt_copy; @@ -1315,11 +1315,11 @@ void udp_splice_eof(struct socket *sock) struct sock *sk = sock->sk; struct udp_sock *up = udp_sk(sk); - if (!up->pending || READ_ONCE(up->corkflag)) + if (!up->pending || udp_test_bit(CORK, sk)) return; lock_sock(sk); - if (up->pending && !READ_ONCE(up->corkflag)) + if (up->pending && !udp_test_bit(CORK, sk)) udp_push_pending_frames(sk); release_sock(sk); } @@ -2658,9 +2658,9 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, switch (optname) { case UDP_CORK: if (val != 0) { - WRITE_ONCE(up->corkflag, 1); + udp_set_bit(CORK, sk); } else { - WRITE_ONCE(up->corkflag, 0); + udp_clear_bit(CORK, sk); lock_sock(sk); push_pending_frames(sk); release_sock(sk); @@ -2783,7 +2783,7 @@ int udp_lib_getsockopt(struct sock *sk, int level, int optname, switch (optname) { case UDP_CORK: - val = READ_ONCE(up->corkflag); + val = udp_test_bit(CORK, sk); break; case UDP_ENCAP: diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 86b5d509a468..0c6973cd22ce 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1332,7 +1332,7 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) int addr_len = msg->msg_namelen; bool connected = false; int ulen = len; - int corkreq = READ_ONCE(up->corkflag) || msg->msg_flags&MSG_MORE; + int corkreq = udp_test_bit(CORK, sk) || msg->msg_flags & MSG_MORE; int err; int is_udplite = IS_UDPLITE(sk); int (*getfrag)(void *, char *, int, int, int, struct sk_buff *); @@ -1644,11 +1644,11 @@ static void udpv6_splice_eof(struct socket *sock) struct sock *sk = sock->sk; struct udp_sock *up = udp_sk(sk); - if (!up->pending || READ_ONCE(up->corkflag)) + if (!up->pending || udp_test_bit(CORK, sk)) return; lock_sock(sk); - if (up->pending && !READ_ONCE(up->corkflag)) + if (up->pending && !udp_test_bit(CORK, sk)) udp_v6_push_pending_frames(sk); release_sock(sk); } -- cgit v1.2.3 From a0002127cd746fcaa182ad3386ef6931c37f3bda Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 12 Sep 2023 09:17:22 +0000 Subject: udp: move udp->no_check6_tx to udp->udp_flags syzbot reported that udp->no_check6_tx can be read locklessly. Use one atomic bit from udp->udp_flags Fixes: 1c19448c9ba6 ("net: Make enabling of zero UDP6 csums more restrictive") Reported-by: syzbot Signed-off-by: Eric Dumazet Reviewed-by: Willem de Bruijn Signed-off-by: Paolo Abeni --- include/linux/udp.h | 10 +++++----- net/ipv4/udp.c | 4 ++-- net/ipv6/udp.c | 4 ++-- 3 files changed, 9 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/udp.h b/include/linux/udp.h index 23f0693e0d9c..e3f2a6c7ac1d 100644 --- a/include/linux/udp.h +++ b/include/linux/udp.h @@ -34,6 +34,7 @@ static inline u32 udp_hashfn(const struct net *net, u32 num, u32 mask) enum { UDP_FLAGS_CORK, /* Cork is required */ + UDP_FLAGS_NO_CHECK6_TX, /* Send zero UDP6 checksums on TX? */ }; struct udp_sock { @@ -47,8 +48,7 @@ struct udp_sock { int pending; /* Any pending frames ? */ __u8 encap_type; /* Is this an Encapsulation socket? */ - unsigned char no_check6_tx:1,/* Send zero UDP6 checksums on TX? */ - no_check6_rx:1,/* Allow zero UDP6 checksums on RX? */ + unsigned char no_check6_rx:1,/* Allow zero UDP6 checksums on RX? */ encap_enabled:1, /* This socket enabled encap * processing; UDP tunnels and * different encapsulation layer set @@ -115,7 +115,7 @@ struct udp_sock { static inline void udp_set_no_check6_tx(struct sock *sk, bool val) { - udp_sk(sk)->no_check6_tx = val; + udp_assign_bit(NO_CHECK6_TX, sk, val); } static inline void udp_set_no_check6_rx(struct sock *sk, bool val) @@ -123,9 +123,9 @@ static inline void udp_set_no_check6_rx(struct sock *sk, bool val) udp_sk(sk)->no_check6_rx = val; } -static inline bool udp_get_no_check6_tx(struct sock *sk) +static inline bool udp_get_no_check6_tx(const struct sock *sk) { - return udp_sk(sk)->no_check6_tx; + return udp_test_bit(NO_CHECK6_TX, sk); } static inline bool udp_get_no_check6_rx(struct sock *sk) diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 9709f8a532dc..0c6998291c99 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -2694,7 +2694,7 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, break; case UDP_NO_CHECK6_TX: - up->no_check6_tx = valbool; + udp_set_no_check6_tx(sk, valbool); break; case UDP_NO_CHECK6_RX: @@ -2791,7 +2791,7 @@ int udp_lib_getsockopt(struct sock *sk, int level, int optname, break; case UDP_NO_CHECK6_TX: - val = up->no_check6_tx; + val = udp_get_no_check6_tx(sk); break; case UDP_NO_CHECK6_RX: diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 0c6973cd22ce..469df0ca561f 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1241,7 +1241,7 @@ static int udp_v6_send_skb(struct sk_buff *skb, struct flowi6 *fl6, kfree_skb(skb); return -EINVAL; } - if (udp_sk(sk)->no_check6_tx) { + if (udp_get_no_check6_tx(sk)) { kfree_skb(skb); return -EINVAL; } @@ -1262,7 +1262,7 @@ static int udp_v6_send_skb(struct sk_buff *skb, struct flowi6 *fl6, if (is_udplite) csum = udplite_csum(skb); - else if (udp_sk(sk)->no_check6_tx) { /* UDP csum disabled */ + else if (udp_get_no_check6_tx(sk)) { /* UDP csum disabled */ skb->ip_summed = CHECKSUM_NONE; goto send; } else if (skb->ip_summed == CHECKSUM_PARTIAL) { /* UDP hardware csum */ -- cgit v1.2.3 From bcbc1b1de884647aa0318bf74eb7f293d72a1e40 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 12 Sep 2023 09:17:23 +0000 Subject: udp: move udp->no_check6_rx to udp->udp_flags syzbot reported that udp->no_check6_rx can be read locklessly. Use one atomic bit from udp->udp_flags. Fixes: 1c19448c9ba6 ("net: Make enabling of zero UDP6 csums more restrictive") Reported-by: syzbot Signed-off-by: Eric Dumazet Reviewed-by: Willem de Bruijn Signed-off-by: Paolo Abeni --- include/linux/udp.h | 10 +++++----- net/ipv4/udp.c | 4 ++-- net/ipv6/udp.c | 6 +++--- 3 files changed, 10 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/udp.h b/include/linux/udp.h index e3f2a6c7ac1d..8d4c3835b1b2 100644 --- a/include/linux/udp.h +++ b/include/linux/udp.h @@ -35,6 +35,7 @@ static inline u32 udp_hashfn(const struct net *net, u32 num, u32 mask) enum { UDP_FLAGS_CORK, /* Cork is required */ UDP_FLAGS_NO_CHECK6_TX, /* Send zero UDP6 checksums on TX? */ + UDP_FLAGS_NO_CHECK6_RX, /* Allow zero UDP6 checksums on RX? */ }; struct udp_sock { @@ -48,8 +49,7 @@ struct udp_sock { int pending; /* Any pending frames ? */ __u8 encap_type; /* Is this an Encapsulation socket? */ - unsigned char no_check6_rx:1,/* Allow zero UDP6 checksums on RX? */ - encap_enabled:1, /* This socket enabled encap + unsigned char encap_enabled:1, /* This socket enabled encap * processing; UDP tunnels and * different encapsulation layer set * this @@ -120,7 +120,7 @@ static inline void udp_set_no_check6_tx(struct sock *sk, bool val) static inline void udp_set_no_check6_rx(struct sock *sk, bool val) { - udp_sk(sk)->no_check6_rx = val; + udp_assign_bit(NO_CHECK6_RX, sk, val); } static inline bool udp_get_no_check6_tx(const struct sock *sk) @@ -128,9 +128,9 @@ static inline bool udp_get_no_check6_tx(const struct sock *sk) return udp_test_bit(NO_CHECK6_TX, sk); } -static inline bool udp_get_no_check6_rx(struct sock *sk) +static inline bool udp_get_no_check6_rx(const struct sock *sk) { - return udp_sk(sk)->no_check6_rx; + return udp_test_bit(NO_CHECK6_RX, sk); } static inline void udp_cmsg_recv(struct msghdr *msg, struct sock *sk, diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 0c6998291c99..cb32826a1db2 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -2698,7 +2698,7 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, break; case UDP_NO_CHECK6_RX: - up->no_check6_rx = valbool; + udp_set_no_check6_rx(sk, valbool); break; case UDP_SEGMENT: @@ -2795,7 +2795,7 @@ int udp_lib_getsockopt(struct sock *sk, int level, int optname, break; case UDP_NO_CHECK6_RX: - val = up->no_check6_rx; + val = udp_get_no_check6_rx(sk); break; case UDP_SEGMENT: diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 469df0ca561f..6e1ea3029260 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -858,7 +858,7 @@ start_lookup: /* If zero checksum and no_check is not on for * the socket then skip it. */ - if (!uh->check && !udp_sk(sk)->no_check6_rx) + if (!uh->check && !udp_get_no_check6_rx(sk)) continue; if (!first) { first = sk; @@ -980,7 +980,7 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, if (unlikely(rcu_dereference(sk->sk_rx_dst) != dst)) udp6_sk_rx_dst_set(sk, dst); - if (!uh->check && !udp_sk(sk)->no_check6_rx) { + if (!uh->check && !udp_get_no_check6_rx(sk)) { if (refcounted) sock_put(sk); goto report_csum_error; @@ -1002,7 +1002,7 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, /* Unicast */ sk = __udp6_lib_lookup_skb(skb, uh->source, uh->dest, udptable); if (sk) { - if (!uh->check && !udp_sk(sk)->no_check6_rx) + if (!uh->check && !udp_get_no_check6_rx(sk)) goto report_csum_error; return udp6_unicast_rcv_skb(sk, skb, uh); } -- cgit v1.2.3 From e1dc0615c6b08ef36414f08c011965b8fb56198b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 12 Sep 2023 09:17:24 +0000 Subject: udp: move udp->gro_enabled to udp->udp_flags syzbot reported that udp->gro_enabled can be read locklessly. Use one atomic bit from udp->udp_flags. Fixes: e20cf8d3f1f7 ("udp: implement GRO for plain UDP sockets.") Reported-by: syzbot Signed-off-by: Eric Dumazet Reviewed-by: Willem de Bruijn Signed-off-by: Paolo Abeni --- include/linux/udp.h | 2 +- net/ipv4/udp.c | 6 +++--- net/ipv4/udp_offload.c | 4 ++-- net/ipv6/udp.c | 2 +- 4 files changed, 7 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/udp.h b/include/linux/udp.h index 8d4c3835b1b2..b344bd2e41fc 100644 --- a/include/linux/udp.h +++ b/include/linux/udp.h @@ -36,6 +36,7 @@ enum { UDP_FLAGS_CORK, /* Cork is required */ UDP_FLAGS_NO_CHECK6_TX, /* Send zero UDP6 checksums on TX? */ UDP_FLAGS_NO_CHECK6_RX, /* Allow zero UDP6 checksums on RX? */ + UDP_FLAGS_GRO_ENABLED, /* Request GRO aggregation */ }; struct udp_sock { @@ -54,7 +55,6 @@ struct udp_sock { * different encapsulation layer set * this */ - gro_enabled:1, /* Request GRO aggregation */ accept_udp_l4:1, accept_udp_fraglist:1; /* indicator bits used by pcflag: */ diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index cb32826a1db2..1debc10a0f02 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1868,7 +1868,7 @@ try_again: (struct sockaddr *)sin); } - if (udp_sk(sk)->gro_enabled) + if (udp_test_bit(GRO_ENABLED, sk)) udp_cmsg_recv(msg, sk, skb); if (inet_cmsg_flags(inet)) @@ -2713,7 +2713,7 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, /* when enabling GRO, accept the related GSO packet type */ if (valbool) udp_tunnel_encap_enable(sk->sk_socket); - up->gro_enabled = valbool; + udp_assign_bit(GRO_ENABLED, sk, valbool); up->accept_udp_l4 = valbool; release_sock(sk); break; @@ -2803,7 +2803,7 @@ int udp_lib_getsockopt(struct sock *sk, int level, int optname, break; case UDP_GRO: - val = up->gro_enabled; + val = udp_test_bit(GRO_ENABLED, sk); break; /* The following two cannot be changed on UDP sockets, the return is diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index 0f46b3c2e4ac..6c95d28d0c4a 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -557,10 +557,10 @@ struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb, NAPI_GRO_CB(skb)->is_flist = 0; if (!sk || !udp_sk(sk)->gro_receive) { if (skb->dev->features & NETIF_F_GRO_FRAGLIST) - NAPI_GRO_CB(skb)->is_flist = sk ? !udp_sk(sk)->gro_enabled : 1; + NAPI_GRO_CB(skb)->is_flist = sk ? !udp_test_bit(GRO_ENABLED, sk) : 1; if ((!sk && (skb->dev->features & NETIF_F_GRO_UDP_FWD)) || - (sk && udp_sk(sk)->gro_enabled) || NAPI_GRO_CB(skb)->is_flist) + (sk && udp_test_bit(GRO_ENABLED, sk)) || NAPI_GRO_CB(skb)->is_flist) return call_gro_receive(udp_gro_receive_segment, head, skb); /* no GRO, be sure flush the current packet */ diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 6e1ea3029260..2c3281879b6d 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -413,7 +413,7 @@ try_again: (struct sockaddr *)sin6); } - if (udp_sk(sk)->gro_enabled) + if (udp_test_bit(GRO_ENABLED, sk)) udp_cmsg_recv(msg, sk, skb); if (np->rxopt.all) -- cgit v1.2.3 From f5f52f0884a595ff99ab1a608643fe4025fca2d5 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 12 Sep 2023 09:17:26 +0000 Subject: udp: move udp->accept_udp_{l4|fraglist} to udp->udp_flags These are read locklessly, move them to udp_flags to fix data-races. Signed-off-by: Eric Dumazet Reviewed-by: Willem de Bruijn Signed-off-by: Paolo Abeni --- include/linux/udp.h | 16 +++++++++------- net/ipv4/udp.c | 2 +- 2 files changed, 10 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/udp.h b/include/linux/udp.h index b344bd2e41fc..bb2b87adfbea 100644 --- a/include/linux/udp.h +++ b/include/linux/udp.h @@ -37,6 +37,8 @@ enum { UDP_FLAGS_NO_CHECK6_TX, /* Send zero UDP6 checksums on TX? */ UDP_FLAGS_NO_CHECK6_RX, /* Allow zero UDP6 checksums on RX? */ UDP_FLAGS_GRO_ENABLED, /* Request GRO aggregation */ + UDP_FLAGS_ACCEPT_FRAGLIST, + UDP_FLAGS_ACCEPT_L4, }; struct udp_sock { @@ -50,13 +52,11 @@ struct udp_sock { int pending; /* Any pending frames ? */ __u8 encap_type; /* Is this an Encapsulation socket? */ - unsigned char encap_enabled:1, /* This socket enabled encap + unsigned char encap_enabled:1; /* This socket enabled encap * processing; UDP tunnels and * different encapsulation layer set * this */ - accept_udp_l4:1, - accept_udp_fraglist:1; /* indicator bits used by pcflag: */ #define UDPLITE_BIT 0x1 /* set by udplite proto init function */ #define UDPLITE_SEND_CC 0x2 /* set via udplite setsockopt */ @@ -149,10 +149,12 @@ static inline bool udp_unexpected_gso(struct sock *sk, struct sk_buff *skb) if (!skb_is_gso(skb)) return false; - if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4 && !udp_sk(sk)->accept_udp_l4) + if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4 && + !udp_test_bit(ACCEPT_L4, sk)) return true; - if (skb_shinfo(skb)->gso_type & SKB_GSO_FRAGLIST && !udp_sk(sk)->accept_udp_fraglist) + if (skb_shinfo(skb)->gso_type & SKB_GSO_FRAGLIST && + !udp_test_bit(ACCEPT_FRAGLIST, sk)) return true; return false; @@ -160,8 +162,8 @@ static inline bool udp_unexpected_gso(struct sock *sk, struct sk_buff *skb) static inline void udp_allow_gso(struct sock *sk) { - udp_sk(sk)->accept_udp_l4 = 1; - udp_sk(sk)->accept_udp_fraglist = 1; + udp_set_bit(ACCEPT_L4, sk); + udp_set_bit(ACCEPT_FRAGLIST, sk); } #define udp_portaddr_for_each_entry(__sk, list) \ diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index db43907b9a3e..75ba86a87bb6 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -2716,7 +2716,7 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, if (valbool) udp_tunnel_encap_enable(sk->sk_socket); udp_assign_bit(GRO_ENABLED, sk, valbool); - up->accept_udp_l4 = valbool; + udp_assign_bit(ACCEPT_L4, sk, valbool); release_sock(sk); break; -- cgit v1.2.3 From ac9a7f4ce5dda1472e8f44096f33066c6ec1a3b4 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 12 Sep 2023 09:17:27 +0000 Subject: udp: lockless UDP_ENCAP_L2TPINUDP / UDP_GRO Move udp->encap_enabled to udp->udp_flags. Add udp_test_and_set_bit() helper to allow lockless udp_tunnel_encap_enable() implementation. Signed-off-by: Eric Dumazet Reviewed-by: Willem de Bruijn Signed-off-by: Paolo Abeni --- include/linux/udp.h | 9 ++++----- include/net/udp_tunnel.h | 9 +++------ net/ipv4/udp.c | 10 +++------- net/ipv4/udp_tunnel_core.c | 2 +- net/ipv6/udp.c | 2 +- 5 files changed, 12 insertions(+), 20 deletions(-) (limited to 'include/linux') diff --git a/include/linux/udp.h b/include/linux/udp.h index bb2b87adfbea..0cf83270a4a2 100644 --- a/include/linux/udp.h +++ b/include/linux/udp.h @@ -39,6 +39,7 @@ enum { UDP_FLAGS_GRO_ENABLED, /* Request GRO aggregation */ UDP_FLAGS_ACCEPT_FRAGLIST, UDP_FLAGS_ACCEPT_L4, + UDP_FLAGS_ENCAP_ENABLED, /* This socket enabled encap */ }; struct udp_sock { @@ -52,11 +53,7 @@ struct udp_sock { int pending; /* Any pending frames ? */ __u8 encap_type; /* Is this an Encapsulation socket? */ - unsigned char encap_enabled:1; /* This socket enabled encap - * processing; UDP tunnels and - * different encapsulation layer set - * this - */ + /* indicator bits used by pcflag: */ #define UDPLITE_BIT 0x1 /* set by udplite proto init function */ #define UDPLITE_SEND_CC 0x2 /* set via udplite setsockopt */ @@ -104,6 +101,8 @@ struct udp_sock { test_bit(UDP_FLAGS_##nr, &udp_sk(sk)->udp_flags) #define udp_set_bit(nr, sk) \ set_bit(UDP_FLAGS_##nr, &udp_sk(sk)->udp_flags) +#define udp_test_and_set_bit(nr, sk) \ + test_and_set_bit(UDP_FLAGS_##nr, &udp_sk(sk)->udp_flags) #define udp_clear_bit(nr, sk) \ clear_bit(UDP_FLAGS_##nr, &udp_sk(sk)->udp_flags) #define udp_assign_bit(nr, sk, val) \ diff --git a/include/net/udp_tunnel.h b/include/net/udp_tunnel.h index 0ca9b7a11baf..29251c3519cf 100644 --- a/include/net/udp_tunnel.h +++ b/include/net/udp_tunnel.h @@ -174,16 +174,13 @@ static inline int udp_tunnel_handle_offloads(struct sk_buff *skb, bool udp_csum) } #endif -static inline void udp_tunnel_encap_enable(struct socket *sock) +static inline void udp_tunnel_encap_enable(struct sock *sk) { - struct udp_sock *up = udp_sk(sock->sk); - - if (up->encap_enabled) + if (udp_test_and_set_bit(ENCAP_ENABLED, sk)) return; - up->encap_enabled = 1; #if IS_ENABLED(CONFIG_IPV6) - if (sock->sk->sk_family == PF_INET6) + if (READ_ONCE(sk->sk_family) == PF_INET6) ipv6_stub->udpv6_encap_enable(); #endif udp_encap_enable(); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 75ba86a87bb6..637a4faf9aff 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -2618,7 +2618,7 @@ void udp_destroy_sock(struct sock *sk) if (encap_destroy) encap_destroy(sk); } - if (up->encap_enabled) + if (udp_test_bit(ENCAP_ENABLED, sk)) static_branch_dec(&udp_encap_needed_key); } } @@ -2685,9 +2685,7 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, fallthrough; case UDP_ENCAP_L2TPINUDP: up->encap_type = val; - lock_sock(sk); - udp_tunnel_encap_enable(sk->sk_socket); - release_sock(sk); + udp_tunnel_encap_enable(sk); break; default: err = -ENOPROTOOPT; @@ -2710,14 +2708,12 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, break; case UDP_GRO: - lock_sock(sk); /* when enabling GRO, accept the related GSO packet type */ if (valbool) - udp_tunnel_encap_enable(sk->sk_socket); + udp_tunnel_encap_enable(sk); udp_assign_bit(GRO_ENABLED, sk, valbool); udp_assign_bit(ACCEPT_L4, sk, valbool); - release_sock(sk); break; /* diff --git a/net/ipv4/udp_tunnel_core.c b/net/ipv4/udp_tunnel_core.c index 9b18f371af0d..1e7e4aecdc48 100644 --- a/net/ipv4/udp_tunnel_core.c +++ b/net/ipv4/udp_tunnel_core.c @@ -78,7 +78,7 @@ void setup_udp_tunnel_sock(struct net *net, struct socket *sock, udp_sk(sk)->gro_receive = cfg->gro_receive; udp_sk(sk)->gro_complete = cfg->gro_complete; - udp_tunnel_encap_enable(sock); + udp_tunnel_encap_enable(sk); } EXPORT_SYMBOL_GPL(setup_udp_tunnel_sock); diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 2c3281879b6d..90688877e900 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1670,7 +1670,7 @@ void udpv6_destroy_sock(struct sock *sk) if (encap_destroy) encap_destroy(sk); } - if (up->encap_enabled) { + if (udp_test_bit(ENCAP_ENABLED, sk)) { static_branch_dec(&udpv6_encap_needed_key); udp_encap_disable(); } -- cgit v1.2.3 From 729549aa350c56a777bb342941ed4d69b6585769 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 12 Sep 2023 09:17:29 +0000 Subject: udplite: remove UDPLITE_BIT This flag is set but never read, we can remove it. Signed-off-by: Eric Dumazet Reviewed-by: Willem de Bruijn Signed-off-by: Paolo Abeni --- include/linux/udp.h | 5 ++--- net/ipv4/udplite.c | 1 - net/ipv6/udplite.c | 1 - 3 files changed, 2 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/udp.h b/include/linux/udp.h index 0cf83270a4a2..58156edec009 100644 --- a/include/linux/udp.h +++ b/include/linux/udp.h @@ -55,9 +55,8 @@ struct udp_sock { __u8 encap_type; /* Is this an Encapsulation socket? */ /* indicator bits used by pcflag: */ -#define UDPLITE_BIT 0x1 /* set by udplite proto init function */ -#define UDPLITE_SEND_CC 0x2 /* set via udplite setsockopt */ -#define UDPLITE_RECV_CC 0x4 /* set via udplite setsocktopt */ +#define UDPLITE_SEND_CC 0x1 /* set via udplite setsockopt */ +#define UDPLITE_RECV_CC 0x2 /* set via udplite setsocktopt */ __u8 pcflag; /* marks socket as UDP-Lite if > 0 */ /* * Following member retains the information to create a UDP header diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c index 39ecdad1b50c..af37af3ab727 100644 --- a/net/ipv4/udplite.c +++ b/net/ipv4/udplite.c @@ -21,7 +21,6 @@ EXPORT_SYMBOL(udplite_table); static int udplite_sk_init(struct sock *sk) { udp_init_sock(sk); - udp_sk(sk)->pcflag = UDPLITE_BIT; pr_warn_once("UDP-Lite is deprecated and scheduled to be removed in 2025, " "please contact the netdev mailing list\n"); return 0; diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c index 267d491e9707..a60bec9b14f1 100644 --- a/net/ipv6/udplite.c +++ b/net/ipv6/udplite.c @@ -17,7 +17,6 @@ static int udplitev6_sk_init(struct sock *sk) { udpv6_init_sock(sk); - udp_sk(sk)->pcflag = UDPLITE_BIT; pr_warn_once("UDP-Lite is deprecated and scheduled to be removed in 2025, " "please contact the netdev mailing list\n"); return 0; -- cgit v1.2.3 From 882af43a0fc37e26d85fb0df0c9edd3bed928de4 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 12 Sep 2023 09:17:30 +0000 Subject: udplite: fix various data-races udp->pcflag, udp->pcslen and udp->pcrlen reads/writes are racy. Move udp->pcflag to udp->udp_flags for atomicity, and add READ_ONCE()/WRITE_ONCE() annotations for pcslen and pcrlen. Fixes: ba4e58eca8aa ("[NET]: Supporting UDP-Lite (RFC 3828) in Linux") Signed-off-by: Eric Dumazet Reviewed-by: Willem de Bruijn Signed-off-by: Paolo Abeni --- include/linux/udp.h | 6 ++---- include/net/udplite.h | 14 +++++++++----- net/ipv4/udp.c | 21 +++++++++++---------- net/ipv6/udp.c | 9 +++++---- 4 files changed, 27 insertions(+), 23 deletions(-) (limited to 'include/linux') diff --git a/include/linux/udp.h b/include/linux/udp.h index 58156edec009..d04188714dca 100644 --- a/include/linux/udp.h +++ b/include/linux/udp.h @@ -40,6 +40,8 @@ enum { UDP_FLAGS_ACCEPT_FRAGLIST, UDP_FLAGS_ACCEPT_L4, UDP_FLAGS_ENCAP_ENABLED, /* This socket enabled encap */ + UDP_FLAGS_UDPLITE_SEND_CC, /* set via udplite setsockopt */ + UDP_FLAGS_UDPLITE_RECV_CC, /* set via udplite setsockopt */ }; struct udp_sock { @@ -54,10 +56,6 @@ struct udp_sock { int pending; /* Any pending frames ? */ __u8 encap_type; /* Is this an Encapsulation socket? */ -/* indicator bits used by pcflag: */ -#define UDPLITE_SEND_CC 0x1 /* set via udplite setsockopt */ -#define UDPLITE_RECV_CC 0x2 /* set via udplite setsocktopt */ - __u8 pcflag; /* marks socket as UDP-Lite if > 0 */ /* * Following member retains the information to create a UDP header * when the socket is uncorked. diff --git a/include/net/udplite.h b/include/net/udplite.h index bd33ff2b8f42..786919d29f8d 100644 --- a/include/net/udplite.h +++ b/include/net/udplite.h @@ -66,14 +66,18 @@ static inline int udplite_checksum_init(struct sk_buff *skb, struct udphdr *uh) /* Fast-path computation of checksum. Socket may not be locked. */ static inline __wsum udplite_csum(struct sk_buff *skb) { - const struct udp_sock *up = udp_sk(skb->sk); const int off = skb_transport_offset(skb); + const struct sock *sk = skb->sk; int len = skb->len - off; - if ((up->pcflag & UDPLITE_SEND_CC) && up->pcslen < len) { - if (0 < up->pcslen) - len = up->pcslen; - udp_hdr(skb)->len = htons(up->pcslen); + if (udp_test_bit(UDPLITE_SEND_CC, sk)) { + u16 pcslen = READ_ONCE(udp_sk(sk)->pcslen); + + if (pcslen < len) { + if (pcslen > 0) + len = pcslen; + udp_hdr(skb)->len = htons(pcslen); + } } skb->ip_summed = CHECKSUM_NONE; /* no HW support for checksumming */ diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 2eeab4af17a1..c3ff984b6354 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -2120,7 +2120,8 @@ static int udp_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb) /* * UDP-Lite specific tests, ignored on UDP sockets */ - if ((up->pcflag & UDPLITE_RECV_CC) && UDP_SKB_CB(skb)->partial_cov) { + if (udp_test_bit(UDPLITE_RECV_CC, sk) && UDP_SKB_CB(skb)->partial_cov) { + u16 pcrlen = READ_ONCE(up->pcrlen); /* * MIB statistics other than incrementing the error count are @@ -2133,7 +2134,7 @@ static int udp_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb) * delivery of packets with coverage values less than a value * provided by the application." */ - if (up->pcrlen == 0) { /* full coverage was set */ + if (pcrlen == 0) { /* full coverage was set */ net_dbg_ratelimited("UDPLite: partial coverage %d while full coverage %d requested\n", UDP_SKB_CB(skb)->cscov, skb->len); goto drop; @@ -2144,9 +2145,9 @@ static int udp_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb) * that it wants x while sender emits packets of smaller size y. * Therefore the above ...()->partial_cov statement is essential. */ - if (UDP_SKB_CB(skb)->cscov < up->pcrlen) { + if (UDP_SKB_CB(skb)->cscov < pcrlen) { net_dbg_ratelimited("UDPLite: coverage %d too small, need min %d\n", - UDP_SKB_CB(skb)->cscov, up->pcrlen); + UDP_SKB_CB(skb)->cscov, pcrlen); goto drop; } } @@ -2729,8 +2730,8 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, val = 8; else if (val > USHRT_MAX) val = USHRT_MAX; - up->pcslen = val; - up->pcflag |= UDPLITE_SEND_CC; + WRITE_ONCE(up->pcslen, val); + udp_set_bit(UDPLITE_SEND_CC, sk); break; /* The receiver specifies a minimum checksum coverage value. To make @@ -2743,8 +2744,8 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, val = 8; else if (val > USHRT_MAX) val = USHRT_MAX; - up->pcrlen = val; - up->pcflag |= UDPLITE_RECV_CC; + WRITE_ONCE(up->pcrlen, val); + udp_set_bit(UDPLITE_RECV_CC, sk); break; default: @@ -2808,11 +2809,11 @@ int udp_lib_getsockopt(struct sock *sk, int level, int optname, /* The following two cannot be changed on UDP sockets, the return is * always 0 (which corresponds to the full checksum coverage of UDP). */ case UDPLITE_SEND_CSCOV: - val = up->pcslen; + val = READ_ONCE(up->pcslen); break; case UDPLITE_RECV_CSCOV: - val = up->pcrlen; + val = READ_ONCE(up->pcrlen); break; default: diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 0e79d189613b..f60ba4295435 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -727,16 +727,17 @@ static int udpv6_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb) /* * UDP-Lite specific tests, ignored on UDP sockets (see net/ipv4/udp.c). */ - if ((up->pcflag & UDPLITE_RECV_CC) && UDP_SKB_CB(skb)->partial_cov) { + if (udp_test_bit(UDPLITE_RECV_CC, sk) && UDP_SKB_CB(skb)->partial_cov) { + u16 pcrlen = READ_ONCE(up->pcrlen); - if (up->pcrlen == 0) { /* full coverage was set */ + if (pcrlen == 0) { /* full coverage was set */ net_dbg_ratelimited("UDPLITE6: partial coverage %d while full coverage %d requested\n", UDP_SKB_CB(skb)->cscov, skb->len); goto drop; } - if (UDP_SKB_CB(skb)->cscov < up->pcrlen) { + if (UDP_SKB_CB(skb)->cscov < pcrlen) { net_dbg_ratelimited("UDPLITE6: coverage %d too small, need min %d\n", - UDP_SKB_CB(skb)->cscov, up->pcrlen); + UDP_SKB_CB(skb)->cscov, pcrlen); goto drop; } } -- cgit v1.2.3 From 2758ac3a11d78af56e6969af04dec611806a62de Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Wed, 13 Sep 2023 21:28:25 +0200 Subject: firmware: qcom-scm: drop unneeded 'extern' specifiers The 'extern' specifier in front of a function declaration has no effect. Remove all of them from the qcom-scm header. Signed-off-by: Bartosz Golaszewski Reviewed-by: Krzysztof Kozlowski Reviewed-by: Bjorn Andersson Link: https://lore.kernel.org/r/20230913192826.36187-1-bartosz.golaszewski@linaro.org Signed-off-by: Bjorn Andersson --- include/linux/firmware/qcom/qcom_scm.h | 101 +++++++++++++++------------------ 1 file changed, 47 insertions(+), 54 deletions(-) (limited to 'include/linux') diff --git a/include/linux/firmware/qcom/qcom_scm.h b/include/linux/firmware/qcom/qcom_scm.h index e1ea2eb56d04..ccaf28846054 100644 --- a/include/linux/firmware/qcom/qcom_scm.h +++ b/include/linux/firmware/qcom/qcom_scm.h @@ -59,12 +59,12 @@ enum qcom_scm_ice_cipher { #define QCOM_SCM_PERM_RW (QCOM_SCM_PERM_READ | QCOM_SCM_PERM_WRITE) #define QCOM_SCM_PERM_RWX (QCOM_SCM_PERM_RW | QCOM_SCM_PERM_EXEC) -extern bool qcom_scm_is_available(void); +bool qcom_scm_is_available(void); -extern int qcom_scm_set_cold_boot_addr(void *entry); -extern int qcom_scm_set_warm_boot_addr(void *entry); -extern void qcom_scm_cpu_power_down(u32 flags); -extern int qcom_scm_set_remote_state(u32 state, u32 id); +int qcom_scm_set_cold_boot_addr(void *entry); +int qcom_scm_set_warm_boot_addr(void *entry); +void qcom_scm_cpu_power_down(u32 flags); +int qcom_scm_set_remote_state(u32 state, u32 id); struct qcom_scm_pas_metadata { void *ptr; @@ -72,55 +72,48 @@ struct qcom_scm_pas_metadata { ssize_t size; }; -extern int qcom_scm_pas_init_image(u32 peripheral, const void *metadata, - size_t size, - struct qcom_scm_pas_metadata *ctx); -extern void qcom_scm_pas_metadata_release(struct qcom_scm_pas_metadata *ctx); -extern int qcom_scm_pas_mem_setup(u32 peripheral, phys_addr_t addr, - phys_addr_t size); -extern int qcom_scm_pas_auth_and_reset(u32 peripheral); -extern int qcom_scm_pas_shutdown(u32 peripheral); -extern bool qcom_scm_pas_supported(u32 peripheral); - -extern int qcom_scm_io_readl(phys_addr_t addr, unsigned int *val); -extern int qcom_scm_io_writel(phys_addr_t addr, unsigned int val); - -extern bool qcom_scm_restore_sec_cfg_available(void); -extern int qcom_scm_restore_sec_cfg(u32 device_id, u32 spare); -extern int qcom_scm_iommu_secure_ptbl_size(u32 spare, size_t *size); -extern int qcom_scm_iommu_secure_ptbl_init(u64 addr, u32 size, u32 spare); -extern int qcom_scm_iommu_set_cp_pool_size(u32 spare, u32 size); -extern int qcom_scm_mem_protect_video_var(u32 cp_start, u32 cp_size, - u32 cp_nonpixel_start, - u32 cp_nonpixel_size); -extern int qcom_scm_assign_mem(phys_addr_t mem_addr, size_t mem_sz, - u64 *src, - const struct qcom_scm_vmperm *newvm, - unsigned int dest_cnt); - -extern bool qcom_scm_ocmem_lock_available(void); -extern int qcom_scm_ocmem_lock(enum qcom_scm_ocmem_client id, u32 offset, - u32 size, u32 mode); -extern int qcom_scm_ocmem_unlock(enum qcom_scm_ocmem_client id, u32 offset, - u32 size); - -extern bool qcom_scm_ice_available(void); -extern int qcom_scm_ice_invalidate_key(u32 index); -extern int qcom_scm_ice_set_key(u32 index, const u8 *key, u32 key_size, - enum qcom_scm_ice_cipher cipher, - u32 data_unit_size); - -extern bool qcom_scm_hdcp_available(void); -extern int qcom_scm_hdcp_req(struct qcom_scm_hdcp_req *req, u32 req_cnt, - u32 *resp); - -extern int qcom_scm_iommu_set_pt_format(u32 sec_id, u32 ctx_num, u32 pt_fmt); -extern int qcom_scm_qsmmu500_wait_safe_toggle(bool en); - -extern int qcom_scm_lmh_dcvsh(u32 payload_fn, u32 payload_reg, u32 payload_val, - u64 limit_node, u32 node_id, u64 version); -extern int qcom_scm_lmh_profile_change(u32 profile_id); -extern bool qcom_scm_lmh_dcvsh_available(void); +int qcom_scm_pas_init_image(u32 peripheral, const void *metadata, size_t size, + struct qcom_scm_pas_metadata *ctx); +void qcom_scm_pas_metadata_release(struct qcom_scm_pas_metadata *ctx); +int qcom_scm_pas_mem_setup(u32 peripheral, phys_addr_t addr, phys_addr_t size); +int qcom_scm_pas_auth_and_reset(u32 peripheral); +int qcom_scm_pas_shutdown(u32 peripheral); +bool qcom_scm_pas_supported(u32 peripheral); + +int qcom_scm_io_readl(phys_addr_t addr, unsigned int *val); +int qcom_scm_io_writel(phys_addr_t addr, unsigned int val); + +bool qcom_scm_restore_sec_cfg_available(void); +int qcom_scm_restore_sec_cfg(u32 device_id, u32 spare); +int qcom_scm_iommu_secure_ptbl_size(u32 spare, size_t *size); +int qcom_scm_iommu_secure_ptbl_init(u64 addr, u32 size, u32 spare); +int qcom_scm_iommu_set_cp_pool_size(u32 spare, u32 size); +int qcom_scm_mem_protect_video_var(u32 cp_start, u32 cp_size, + u32 cp_nonpixel_start, u32 cp_nonpixel_size); +int qcom_scm_assign_mem(phys_addr_t mem_addr, size_t mem_sz, u64 *src, + const struct qcom_scm_vmperm *newvm, + unsigned int dest_cnt); + +bool qcom_scm_ocmem_lock_available(void); +int qcom_scm_ocmem_lock(enum qcom_scm_ocmem_client id, u32 offset, u32 size, + u32 mode); +int qcom_scm_ocmem_unlock(enum qcom_scm_ocmem_client id, u32 offset, u32 size); + +bool qcom_scm_ice_available(void); +int qcom_scm_ice_invalidate_key(u32 index); +int qcom_scm_ice_set_key(u32 index, const u8 *key, u32 key_size, + enum qcom_scm_ice_cipher cipher, u32 data_unit_size); + +bool qcom_scm_hdcp_available(void); +int qcom_scm_hdcp_req(struct qcom_scm_hdcp_req *req, u32 req_cnt, u32 *resp); + +int qcom_scm_iommu_set_pt_format(u32 sec_id, u32 ctx_num, u32 pt_fmt); +int qcom_scm_qsmmu500_wait_safe_toggle(bool en); + +int qcom_scm_lmh_dcvsh(u32 payload_fn, u32 payload_reg, u32 payload_val, + u64 limit_node, u32 node_id, u64 version); +int qcom_scm_lmh_profile_change(u32 profile_id); +bool qcom_scm_lmh_dcvsh_available(void); #ifdef CONFIG_QCOM_QSEECOM -- cgit v1.2.3 From a721f7b8c3548e943e514a957f2a37f4763b9888 Mon Sep 17 00:00:00 2001 From: Khadija Kamran Date: Wed, 23 Aug 2023 13:16:40 +0500 Subject: lsm: constify 'bprm' parameter in security_bprm_committed_creds() Three LSMs register the implementations for the 'bprm_committed_creds()' hook: AppArmor, SELinux and tomoyo. Looking at the function implementations we may observe that the 'bprm' parameter is not changing. Mark the 'bprm' parameter of LSM hook security_bprm_committed_creds() as 'const' since it will not be changing in the LSM hook. Signed-off-by: Khadija Kamran [PM: minor merge fuzzing due to other constification patches] Signed-off-by: Paul Moore --- include/linux/lsm_hook_defs.h | 2 +- include/linux/security.h | 4 ++-- security/apparmor/lsm.c | 2 +- security/security.c | 2 +- security/selinux/hooks.c | 2 +- security/tomoyo/tomoyo.c | 2 +- 6 files changed, 7 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h index f6acc3ed66a3..3b0f5cfca464 100644 --- a/include/linux/lsm_hook_defs.h +++ b/include/linux/lsm_hook_defs.h @@ -53,7 +53,7 @@ LSM_HOOK(int, 0, bprm_creds_for_exec, struct linux_binprm *bprm) LSM_HOOK(int, 0, bprm_creds_from_file, struct linux_binprm *bprm, const struct file *file) LSM_HOOK(int, 0, bprm_check_security, struct linux_binprm *bprm) LSM_HOOK(void, LSM_RET_VOID, bprm_committing_creds, const struct linux_binprm *bprm) -LSM_HOOK(void, LSM_RET_VOID, bprm_committed_creds, struct linux_binprm *bprm) +LSM_HOOK(void, LSM_RET_VOID, bprm_committed_creds, const struct linux_binprm *bprm) LSM_HOOK(int, 0, fs_context_submount, struct fs_context *fc, struct super_block *reference) LSM_HOOK(int, 0, fs_context_dup, struct fs_context *fc, struct fs_context *src_sc) diff --git a/include/linux/security.h b/include/linux/security.h index 885053f81019..3148103123fb 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -293,7 +293,7 @@ int security_bprm_creds_for_exec(struct linux_binprm *bprm); int security_bprm_creds_from_file(struct linux_binprm *bprm, const struct file *file); int security_bprm_check(struct linux_binprm *bprm); void security_bprm_committing_creds(const struct linux_binprm *bprm); -void security_bprm_committed_creds(struct linux_binprm *bprm); +void security_bprm_committed_creds(const struct linux_binprm *bprm); int security_fs_context_submount(struct fs_context *fc, struct super_block *reference); int security_fs_context_dup(struct fs_context *fc, struct fs_context *src_fc); int security_fs_context_parse_param(struct fs_context *fc, struct fs_parameter *param); @@ -627,7 +627,7 @@ static inline void security_bprm_committing_creds(const struct linux_binprm *bpr { } -static inline void security_bprm_committed_creds(struct linux_binprm *bprm) +static inline void security_bprm_committed_creds(const struct linux_binprm *bprm) { } diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c index b03f46e0f6c5..3fa325d5efac 100644 --- a/security/apparmor/lsm.c +++ b/security/apparmor/lsm.c @@ -756,7 +756,7 @@ static void apparmor_bprm_committing_creds(const struct linux_binprm *bprm) * apparmor_bprm_committed_creds() - do cleanup after new creds committed * @bprm: binprm for the exec (NOT NULL) */ -static void apparmor_bprm_committed_creds(struct linux_binprm *bprm) +static void apparmor_bprm_committed_creds(const struct linux_binprm *bprm) { /* clear out temporary/transitional state from the context */ aa_clear_task_ctx_trans(task_ctx(current)); diff --git a/security/security.c b/security/security.c index 77a1601ead36..e4aec0f65f75 100644 --- a/security/security.c +++ b/security/security.c @@ -1134,7 +1134,7 @@ void security_bprm_committing_creds(const struct linux_binprm *bprm) * process such as clearing out non-inheritable signal state. This is called * immediately after commit_creds(). */ -void security_bprm_committed_creds(struct linux_binprm *bprm) +void security_bprm_committed_creds(const struct linux_binprm *bprm) { call_void_hook(bprm_committed_creds, bprm); } diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index f42a1b78bc43..e9ee008a9537 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -2501,7 +2501,7 @@ static void selinux_bprm_committing_creds(const struct linux_binprm *bprm) * Clean up the process immediately after the installation of new credentials * due to exec */ -static void selinux_bprm_committed_creds(struct linux_binprm *bprm) +static void selinux_bprm_committed_creds(const struct linux_binprm *bprm) { const struct task_security_struct *tsec = selinux_cred(current_cred()); u32 osid, sid; diff --git a/security/tomoyo/tomoyo.c b/security/tomoyo/tomoyo.c index 25006fddc964..255f1b470295 100644 --- a/security/tomoyo/tomoyo.c +++ b/security/tomoyo/tomoyo.c @@ -52,7 +52,7 @@ static int tomoyo_cred_prepare(struct cred *new, const struct cred *old, * * @bprm: Pointer to "struct linux_binprm". */ -static void tomoyo_bprm_committed_creds(struct linux_binprm *bprm) +static void tomoyo_bprm_committed_creds(const struct linux_binprm *bprm) { /* Clear old_domain_info saved by execve() request. */ struct tomoyo_task *s = tomoyo_task(current); -- cgit v1.2.3 From 20a2aa47097aae7016209c4dbe392b3b25e0d883 Mon Sep 17 00:00:00 2001 From: Khadija Kamran Date: Wed, 23 Aug 2023 14:01:28 +0500 Subject: lsm: constify 'sb' parameter in security_sb_kern_mount() The "sb_kern_mount" hook has implementation registered in SELinux. Looking at the function implementation we observe that the "sb" parameter is not changing. Mark the "sb" parameter of LSM hook security_sb_kern_mount() as "const" since it will not be changing in the LSM hook. Signed-off-by: Khadija Kamran [PM: minor merge fuzzing due to other constification patches] Signed-off-by: Paul Moore --- include/linux/lsm_hook_defs.h | 2 +- include/linux/security.h | 2 +- security/security.c | 2 +- security/selinux/hooks.c | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h index 3b0f5cfca464..99b8176c3738 100644 --- a/include/linux/lsm_hook_defs.h +++ b/include/linux/lsm_hook_defs.h @@ -66,7 +66,7 @@ LSM_HOOK(void, LSM_RET_VOID, sb_free_mnt_opts, void *mnt_opts) LSM_HOOK(int, 0, sb_eat_lsm_opts, char *orig, void **mnt_opts) LSM_HOOK(int, 0, sb_mnt_opts_compat, struct super_block *sb, void *mnt_opts) LSM_HOOK(int, 0, sb_remount, struct super_block *sb, void *mnt_opts) -LSM_HOOK(int, 0, sb_kern_mount, struct super_block *sb) +LSM_HOOK(int, 0, sb_kern_mount, const struct super_block *sb) LSM_HOOK(int, 0, sb_show_options, struct seq_file *m, struct super_block *sb) LSM_HOOK(int, 0, sb_statfs, struct dentry *dentry) LSM_HOOK(int, 0, sb_mount, const char *dev_name, const struct path *path, diff --git a/include/linux/security.h b/include/linux/security.h index 3148103123fb..1d1df326c881 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -304,7 +304,7 @@ void security_free_mnt_opts(void **mnt_opts); int security_sb_eat_lsm_opts(char *options, void **mnt_opts); int security_sb_mnt_opts_compat(struct super_block *sb, void *mnt_opts); int security_sb_remount(struct super_block *sb, void *mnt_opts); -int security_sb_kern_mount(struct super_block *sb); +int security_sb_kern_mount(const struct super_block *sb); int security_sb_show_options(struct seq_file *m, struct super_block *sb); int security_sb_statfs(struct dentry *dentry); int security_sb_mount(const char *dev_name, const struct path *path, diff --git a/security/security.c b/security/security.c index e4aec0f65f75..7b0052e96806 100644 --- a/security/security.c +++ b/security/security.c @@ -1319,7 +1319,7 @@ EXPORT_SYMBOL(security_sb_remount); * * Return: Returns 0 if permission is granted. */ -int security_sb_kern_mount(struct super_block *sb) +int security_sb_kern_mount(const struct super_block *sb) { return call_int_hook(sb_kern_mount, 0, sb); } diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index e9ee008a9537..195db92ac99c 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -2721,7 +2721,7 @@ out_bad_option: return -EINVAL; } -static int selinux_sb_kern_mount(struct super_block *sb) +static int selinux_sb_kern_mount(const struct super_block *sb) { const struct cred *cred = current_cred(); struct common_audit_data ad; -- cgit v1.2.3 From 4de7b17fd05d03fa919e8c47fc66122bd24d7b6c Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 21 Aug 2023 14:44:28 +0100 Subject: sched: Assert for_each_thread() is properly locked list_for_each_entry_rcu() takes an optional fourth argument which allows RCU to assert that the correct lock is held. Several callers of for_each_thread() rely on their caller to be holding the appropriate lock, so this is a useful assertion to include. Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Ingo Molnar Reviewed-by: Joel Fernandes (Google) Link: https://lore.kernel.org/r/20230821134428.2504912-1-willy@infradead.org --- include/linux/sched/signal.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h index 0014d3adaf84..9610bad018a3 100644 --- a/include/linux/sched/signal.h +++ b/include/linux/sched/signal.h @@ -656,7 +656,8 @@ extern bool current_is_single_threaded(void); while ((t = next_thread(t)) != g) #define __for_each_thread(signal, t) \ - list_for_each_entry_rcu(t, &(signal)->thread_head, thread_node) + list_for_each_entry_rcu(t, &(signal)->thread_head, thread_node, \ + lockdep_is_held(&tasklist_lock)) #define for_each_thread(p, t) \ __for_each_thread((p)->signal, t) -- cgit v1.2.3 From b0adfba7ee770fef20b1b6d86706c28f7fccfb07 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 12 Sep 2023 16:01:59 +0000 Subject: ipv6: lockless IPV6_UNICAST_HOPS implementation Some np->hop_limit accesses are racy, when socket lock is not held. Add missing annotations and switch to full lockless implementation. Signed-off-by: Eric Dumazet Reviewed-by: David Ahern Signed-off-by: David S. Miller --- include/linux/ipv6.h | 12 +----------- include/net/ipv6.h | 2 +- net/ipv6/ip6_output.c | 2 +- net/ipv6/ipv6_sockglue.c | 20 +++++++++++--------- net/ipv6/mcast.c | 2 +- net/ipv6/ndisc.c | 2 +- 6 files changed, 16 insertions(+), 24 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index af8a771a053c..c2e087071384 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -213,17 +213,7 @@ struct ipv6_pinfo { __be32 flow_label; __u32 frag_size; - /* - * Packed in 16bits. - * Omit one shift by putting the signed field at MSB. - */ -#if defined(__BIG_ENDIAN_BITFIELD) - __s16 hop_limit:9; - __u16 __unused_1:7; -#else - __u16 __unused_1:7; - __s16 hop_limit:9; -#endif + s16 hop_limit; #if defined(__BIG_ENDIAN_BITFIELD) /* Packed in 16bits. */ diff --git a/include/net/ipv6.h b/include/net/ipv6.h index c6932d1a3fa8..2e8e7e31e02e 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -916,7 +916,7 @@ static inline int ip6_sk_dst_hoplimit(struct ipv6_pinfo *np, struct flowi6 *fl6, if (ipv6_addr_is_multicast(&fl6->daddr)) hlimit = np->mcast_hops; else - hlimit = np->hop_limit; + hlimit = READ_ONCE(np->hop_limit); if (hlimit < 0) hlimit = ip6_dst_hoplimit(dst); return hlimit; diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 54fc4c711f2c..1e16d56d8c38 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -309,7 +309,7 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, * Fill in the IPv6 header */ if (np) - hlimit = np->hop_limit; + hlimit = READ_ONCE(np->hop_limit); if (hlimit < 0) hlimit = ip6_dst_hoplimit(dst); diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 0e2a0847b387..f27993a1470d 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -415,6 +415,16 @@ int do_ipv6_setsockopt(struct sock *sk, int level, int optname, if (ip6_mroute_opt(optname)) return ip6_mroute_setsockopt(sk, optname, optval, optlen); + /* Handle options that can be set without locking the socket. */ + switch (optname) { + case IPV6_UNICAST_HOPS: + if (optlen < sizeof(int)) + return -EINVAL; + if (val > 255 || val < -1) + return -EINVAL; + WRITE_ONCE(np->hop_limit, val); + return 0; + } if (needs_rtnl) rtnl_lock(); sockopt_lock_sock(sk); @@ -733,14 +743,6 @@ done: } break; } - case IPV6_UNICAST_HOPS: - if (optlen < sizeof(int)) - goto e_inval; - if (val > 255 || val < -1) - goto e_inval; - np->hop_limit = val; - retv = 0; - break; case IPV6_MULTICAST_HOPS: if (sk->sk_type == SOCK_STREAM) @@ -1347,7 +1349,7 @@ int do_ipv6_getsockopt(struct sock *sk, int level, int optname, struct dst_entry *dst; if (optname == IPV6_UNICAST_HOPS) - val = np->hop_limit; + val = READ_ONCE(np->hop_limit); else val = np->mcast_hops; diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 421264a69e97..4a7967623909 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -1716,7 +1716,7 @@ static void ip6_mc_hdr(const struct sock *sk, struct sk_buff *skb, hdr->payload_len = htons(len); hdr->nexthdr = proto; - hdr->hop_limit = inet6_sk(sk)->hop_limit; + hdr->hop_limit = READ_ONCE(inet6_sk(sk)->hop_limit); hdr->saddr = *saddr; hdr->daddr = *daddr; diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 553c8664e0a7..b554fd40bdc3 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -500,7 +500,7 @@ void ndisc_send_skb(struct sk_buff *skb, const struct in6_addr *daddr, csum_partial(icmp6h, skb->len, 0)); - ip6_nd_hdr(skb, saddr, daddr, inet6_sk(sk)->hop_limit, skb->len); + ip6_nd_hdr(skb, saddr, daddr, READ_ONCE(inet6_sk(sk)->hop_limit), skb->len); rcu_read_lock(); idev = __in6_dev_get(dst->dev); -- cgit v1.2.3 From d986f52124e062753e33b6fe303be5904a997eac Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 12 Sep 2023 16:02:00 +0000 Subject: ipv6: lockless IPV6_MULTICAST_LOOP implementation Add inet6_{test|set|clear|assign}_bit() helpers. Note that I am using bits from inet->inet_flags, this might change in the future if we need more flags. While solving data-races accessing np->mc_loop, this patch also allows to implement lockless accesses to np->mcast_hops in the following patch. Also constify sk_mc_loop() argument. Signed-off-by: Eric Dumazet Reviewed-by: David Ahern Signed-off-by: David S. Miller --- include/linux/ipv6.h | 18 ++++++++++++++---- include/net/inet_sock.h | 1 + include/net/sock.h | 2 +- net/core/sock.c | 4 ++-- net/ipv6/af_inet6.c | 2 +- net/ipv6/ipv6_sockglue.c | 18 ++++++++---------- net/ipv6/ndisc.c | 2 +- net/netfilter/ipvs/ip_vs_sync.c | 8 ++------ 8 files changed, 30 insertions(+), 25 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index c2e087071384..68cf1ca94914 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -218,11 +218,9 @@ struct ipv6_pinfo { #if defined(__BIG_ENDIAN_BITFIELD) /* Packed in 16bits. */ __s16 mcast_hops:9; - __u16 __unused_2:6, - mc_loop:1; + __u16 __unused_2:7, #else - __u16 mc_loop:1, - __unused_2:6; + __u16 __unused_2:7; __s16 mcast_hops:9; #endif int ucast_oif; @@ -283,6 +281,18 @@ struct ipv6_pinfo { struct inet6_cork cork; }; +/* We currently use available bits from inet_sk(sk)->inet_flags, + * this could change in the future. + */ +#define inet6_test_bit(nr, sk) \ + test_bit(INET_FLAGS_##nr, &inet_sk(sk)->inet_flags) +#define inet6_set_bit(nr, sk) \ + set_bit(INET_FLAGS_##nr, &inet_sk(sk)->inet_flags) +#define inet6_clear_bit(nr, sk) \ + clear_bit(INET_FLAGS_##nr, &inet_sk(sk)->inet_flags) +#define inet6_assign_bit(nr, sk, val) \ + assign_bit(INET_FLAGS_##nr, &inet_sk(sk)->inet_flags, val) + /* WARNING: don't change the layout of the members in {raw,udp,tcp}6_sock! */ struct raw6_sock { /* inet_sock has to be the first member of raw6_sock */ diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index 2de0e4d4a027..b5a9dca92fb4 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -268,6 +268,7 @@ enum { INET_FLAGS_NODEFRAG = 17, INET_FLAGS_BIND_ADDRESS_NO_PORT = 18, INET_FLAGS_DEFER_CONNECT = 19, + INET_FLAGS_MC6_LOOP = 20, }; /* cmsg flags for inet */ diff --git a/include/net/sock.h b/include/net/sock.h index 676146e9d181..56ac1abadea5 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -2238,7 +2238,7 @@ static inline void sock_confirm_neigh(struct sk_buff *skb, struct neighbour *n) } } -bool sk_mc_loop(struct sock *sk); +bool sk_mc_loop(const struct sock *sk); static inline bool sk_can_gso(const struct sock *sk) { diff --git a/net/core/sock.c b/net/core/sock.c index bb89b88bc1e8..213a62ac13f2 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -759,7 +759,7 @@ out: return ret; } -bool sk_mc_loop(struct sock *sk) +bool sk_mc_loop(const struct sock *sk) { if (dev_recursion_level()) return false; @@ -771,7 +771,7 @@ bool sk_mc_loop(struct sock *sk) return inet_test_bit(MC_LOOP, sk); #if IS_ENABLED(CONFIG_IPV6) case AF_INET6: - return inet6_sk(sk)->mc_loop; + return inet6_test_bit(MC6_LOOP, sk); #endif } WARN_ON_ONCE(1); diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 368824fe9719..bbd4aa1b96d0 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -217,7 +217,7 @@ lookup_protocol: inet_sk(sk)->pinet6 = np = inet6_sk_generic(sk); np->hop_limit = -1; np->mcast_hops = IPV6_DEFAULT_MCASTHOPS; - np->mc_loop = 1; + inet6_set_bit(MC6_LOOP, sk); np->mc_all = 1; np->pmtudisc = IPV6_PMTUDISC_WANT; np->repflow = net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_ESTABLISHED; diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index f27993a1470d..755fac85a120 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -424,6 +424,13 @@ int do_ipv6_setsockopt(struct sock *sk, int level, int optname, return -EINVAL; WRITE_ONCE(np->hop_limit, val); return 0; + case IPV6_MULTICAST_LOOP: + if (optlen < sizeof(int)) + return -EINVAL; + if (val != valbool) + return -EINVAL; + inet6_assign_bit(MC6_LOOP, sk, valbool); + return 0; } if (needs_rtnl) rtnl_lock(); @@ -755,15 +762,6 @@ done: retv = 0; break; - case IPV6_MULTICAST_LOOP: - if (optlen < sizeof(int)) - goto e_inval; - if (val != valbool) - goto e_inval; - np->mc_loop = valbool; - retv = 0; - break; - case IPV6_UNICAST_IF: { struct net_device *dev = NULL; @@ -1367,7 +1365,7 @@ int do_ipv6_getsockopt(struct sock *sk, int level, int optname, } case IPV6_MULTICAST_LOOP: - val = np->mc_loop; + val = inet6_test_bit(MC6_LOOP, sk); break; case IPV6_MULTICAST_IF: diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index b554fd40bdc3..679443d7ecb5 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1996,7 +1996,7 @@ static int __net_init ndisc_net_init(struct net *net) np = inet6_sk(sk); np->hop_limit = 255; /* Do not loopback ndisc messages */ - np->mc_loop = 0; + inet6_clear_bit(MC6_LOOP, sk); return 0; } diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index da5af28ff57b..3c2251cabd04 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -1298,17 +1298,13 @@ static void set_sock_size(struct sock *sk, int mode, int val) static void set_mcast_loop(struct sock *sk, u_char loop) { /* setsockopt(sock, SOL_IP, IP_MULTICAST_LOOP, &loop, sizeof(loop)); */ - lock_sock(sk); inet_assign_bit(MC_LOOP, sk, loop); #ifdef CONFIG_IP_VS_IPV6 - if (sk->sk_family == AF_INET6) { - struct ipv6_pinfo *np = inet6_sk(sk); - + if (READ_ONCE(sk->sk_family) == AF_INET6) { /* IPV6_MULTICAST_LOOP */ - np->mc_loop = loop ? 1 : 0; + inet6_assign_bit(MC6_LOOP, sk, loop); } #endif - release_sock(sk); } /* -- cgit v1.2.3 From 2da23eb07c91241d962f3ff05565065484cd8929 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 12 Sep 2023 16:02:01 +0000 Subject: ipv6: lockless IPV6_MULTICAST_HOPS implementation This fixes data-races around np->mcast_hops, and make IPV6_MULTICAST_HOPS lockless. Note that np->mcast_hops is never negative, thus can fit an u8 field instead of s16. Signed-off-by: Eric Dumazet Reviewed-by: David Ahern Signed-off-by: David S. Miller --- include/linux/ipv6.h | 9 +-------- include/net/ipv6.h | 2 +- net/dccp/ipv6.c | 2 +- net/ipv6/ipv6_sockglue.c | 28 +++++++++++++++------------- net/ipv6/tcp_ipv6.c | 3 ++- net/netfilter/ipvs/ip_vs_sync.c | 2 +- 6 files changed, 21 insertions(+), 25 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 68cf1ca94914..9cc278b5e4f4 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -214,15 +214,8 @@ struct ipv6_pinfo { __u32 frag_size; s16 hop_limit; + u8 mcast_hops; -#if defined(__BIG_ENDIAN_BITFIELD) - /* Packed in 16bits. */ - __s16 mcast_hops:9; - __u16 __unused_2:7, -#else - __u16 __unused_2:7; - __s16 mcast_hops:9; -#endif int ucast_oif; int mcast_oif; diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 2e8e7e31e02e..8a04a8985336 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -914,7 +914,7 @@ static inline int ip6_sk_dst_hoplimit(struct ipv6_pinfo *np, struct flowi6 *fl6, int hlimit; if (ipv6_addr_is_multicast(&fl6->daddr)) - hlimit = np->mcast_hops; + hlimit = READ_ONCE(np->mcast_hops); else hlimit = READ_ONCE(np->hop_limit); if (hlimit < 0) diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 33f6ccf6ba77..83617a16b98e 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -676,7 +676,7 @@ ipv6_pktoptions: if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo) np->mcast_oif = inet6_iif(opt_skb); if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) - np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit; + WRITE_ONCE(np->mcast_hops, ipv6_hdr(opt_skb)->hop_limit); if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass) np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb)); if (np->repflow) diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 755fac85a120..5fff19a87c75 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -431,6 +431,16 @@ int do_ipv6_setsockopt(struct sock *sk, int level, int optname, return -EINVAL; inet6_assign_bit(MC6_LOOP, sk, valbool); return 0; + case IPV6_MULTICAST_HOPS: + if (sk->sk_type == SOCK_STREAM) + return retv; + if (optlen < sizeof(int)) + return -EINVAL; + if (val > 255 || val < -1) + return -EINVAL; + WRITE_ONCE(np->mcast_hops, + val == -1 ? IPV6_DEFAULT_MCASTHOPS : val); + return 0; } if (needs_rtnl) rtnl_lock(); @@ -751,16 +761,6 @@ done: break; } - case IPV6_MULTICAST_HOPS: - if (sk->sk_type == SOCK_STREAM) - break; - if (optlen < sizeof(int)) - goto e_inval; - if (val > 255 || val < -1) - goto e_inval; - np->mcast_hops = (val == -1 ? IPV6_DEFAULT_MCASTHOPS : val); - retv = 0; - break; case IPV6_UNICAST_IF: { @@ -1180,7 +1180,8 @@ int do_ipv6_getsockopt(struct sock *sk, int level, int optname, put_cmsg(&msg, SOL_IPV6, IPV6_PKTINFO, sizeof(src_info), &src_info); } if (np->rxopt.bits.rxhlim) { - int hlim = np->mcast_hops; + int hlim = READ_ONCE(np->mcast_hops); + put_cmsg(&msg, SOL_IPV6, IPV6_HOPLIMIT, sizeof(hlim), &hlim); } if (np->rxopt.bits.rxtclass) { @@ -1197,7 +1198,8 @@ int do_ipv6_getsockopt(struct sock *sk, int level, int optname, put_cmsg(&msg, SOL_IPV6, IPV6_2292PKTINFO, sizeof(src_info), &src_info); } if (np->rxopt.bits.rxohlim) { - int hlim = np->mcast_hops; + int hlim = READ_ONCE(np->mcast_hops); + put_cmsg(&msg, SOL_IPV6, IPV6_2292HOPLIMIT, sizeof(hlim), &hlim); } if (np->rxopt.bits.rxflow) { @@ -1349,7 +1351,7 @@ int do_ipv6_getsockopt(struct sock *sk, int level, int optname, if (optname == IPV6_UNICAST_HOPS) val = READ_ONCE(np->hop_limit); else - val = np->mcast_hops; + val = READ_ONCE(np->mcast_hops); if (val < 0) { rcu_read_lock(); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 3a88545a265d..54db5fab318b 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1542,7 +1542,8 @@ ipv6_pktoptions: if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo) np->mcast_oif = tcp_v6_iif(opt_skb); if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) - np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit; + WRITE_ONCE(np->mcast_hops, + ipv6_hdr(opt_skb)->hop_limit); if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass) np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb)); if (np->repflow) diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index 3c2251cabd04..df1b33b61059 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -1322,7 +1322,7 @@ static void set_mcast_ttl(struct sock *sk, u_char ttl) struct ipv6_pinfo *np = inet6_sk(sk); /* IPV6_MULTICAST_HOPS */ - np->mcast_hops = ttl; + WRITE_ONCE(np->mcast_hops, ttl); } #endif release_sock(sk); -- cgit v1.2.3 From dcae74622c051b219ee628669a31716473efda2c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 12 Sep 2023 16:02:04 +0000 Subject: ipv6: lockless IPV6_RECVERR_RFC4884 implementation Move np->recverr_rfc4884 to an atomic flag to fix data-races. Signed-off-by: Eric Dumazet Reviewed-by: David Ahern Signed-off-by: David S. Miller --- include/linux/ipv6.h | 1 - include/net/inet_sock.h | 1 + net/ipv6/datagram.c | 2 +- net/ipv6/ipv6_sockglue.c | 17 ++++++++--------- 4 files changed, 10 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 9cc278b5e4f4..0d2b0a1b2dae 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -256,7 +256,6 @@ struct ipv6_pinfo { autoflowlabel:1, autoflowlabel_set:1, mc_all:1, - recverr_rfc4884:1, rtalert_isolate:1; __u8 min_hopcount; __u8 tclass; diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index b5a9dca92fb4..8cf1f7b44234 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -269,6 +269,7 @@ enum { INET_FLAGS_BIND_ADDRESS_NO_PORT = 18, INET_FLAGS_DEFER_CONNECT = 19, INET_FLAGS_MC6_LOOP = 20, + INET_FLAGS_RECVERR6_RFC4884 = 21, }; /* cmsg flags for inet */ diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 41ebc4e57473..e81892814935 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -332,7 +332,7 @@ void ipv6_icmp_error(struct sock *sk, struct sk_buff *skb, int err, __skb_pull(skb, payload - skb->data); - if (inet6_sk(sk)->recverr_rfc4884) + if (inet6_test_bit(RECVERR6_RFC4884, sk)) ipv6_icmp_error_rfc4884(skb, &serr->ee.ee_rfc4884); skb_reset_transport_header(skb); diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index bbc8a009e05d..b65e73ac2ccd 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -462,6 +462,13 @@ int do_ipv6_setsockopt(struct sock *sk, int level, int optname, */ WRITE_ONCE(np->min_hopcount, val); return 0; + case IPV6_RECVERR_RFC4884: + if (optlen < sizeof(int)) + return -EINVAL; + if (val < 0 || val > 1) + return -EINVAL; + inet6_assign_bit(RECVERR6_RFC4884, sk, valbool); + return 0; } if (needs_rtnl) rtnl_lock(); @@ -974,14 +981,6 @@ done: np->rxopt.bits.recvfragsize = valbool; retv = 0; break; - case IPV6_RECVERR_RFC4884: - if (optlen < sizeof(int)) - goto e_inval; - if (val < 0 || val > 1) - goto e_inval; - np->recverr_rfc4884 = valbool; - retv = 0; - break; } unlock: @@ -1462,7 +1461,7 @@ int do_ipv6_getsockopt(struct sock *sk, int level, int optname, break; case IPV6_RECVERR_RFC4884: - val = np->recverr_rfc4884; + val = inet6_test_bit(RECVERR6_RFC4884, sk); break; default: -- cgit v1.2.3 From 6559c0ff3bc27d7e4d447d31c1d7e8eae0e959f5 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 12 Sep 2023 16:02:05 +0000 Subject: ipv6: lockless IPV6_MULTICAST_ALL implementation Move np->mc_all to an atomic flags to fix data-races. Signed-off-by: Eric Dumazet Reviewed-by: David Ahern Signed-off-by: David S. Miller --- include/linux/ipv6.h | 1 - include/net/inet_sock.h | 1 + net/ipv6/af_inet6.c | 2 +- net/ipv6/ipv6_sockglue.c | 14 ++++++-------- net/ipv6/mcast.c | 2 +- 5 files changed, 9 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 0d2b0a1b2dae..d88e91b7f0a3 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -255,7 +255,6 @@ struct ipv6_pinfo { dontfrag:1, autoflowlabel:1, autoflowlabel_set:1, - mc_all:1, rtalert_isolate:1; __u8 min_hopcount; __u8 tclass; diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index 8cf1f7b44234..97e70a97dae8 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -270,6 +270,7 @@ enum { INET_FLAGS_DEFER_CONNECT = 19, INET_FLAGS_MC6_LOOP = 20, INET_FLAGS_RECVERR6_RFC4884 = 21, + INET_FLAGS_MC6_ALL = 22, }; /* cmsg flags for inet */ diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index bbd4aa1b96d0..372fb7b9112c 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -218,7 +218,7 @@ lookup_protocol: np->hop_limit = -1; np->mcast_hops = IPV6_DEFAULT_MCASTHOPS; inet6_set_bit(MC6_LOOP, sk); - np->mc_all = 1; + inet6_set_bit(MC6_ALL, sk); np->pmtudisc = IPV6_PMTUDISC_WANT; np->repflow = net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_ESTABLISHED; sk->sk_ipv6only = net->ipv6.sysctl.bindv6only; diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index b65e73ac2ccd..7a181831f226 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -469,6 +469,11 @@ int do_ipv6_setsockopt(struct sock *sk, int level, int optname, return -EINVAL; inet6_assign_bit(RECVERR6_RFC4884, sk, valbool); return 0; + case IPV6_MULTICAST_ALL: + if (optlen < sizeof(int)) + return -EINVAL; + inet6_assign_bit(MC6_ALL, sk, valbool); + return 0; } if (needs_rtnl) rtnl_lock(); @@ -890,13 +895,6 @@ done: retv = ipv6_sock_ac_drop(sk, mreq.ipv6mr_ifindex, &mreq.ipv6mr_acaddr); break; } - case IPV6_MULTICAST_ALL: - if (optlen < sizeof(int)) - goto e_inval; - np->mc_all = valbool; - retv = 0; - break; - case MCAST_JOIN_GROUP: case MCAST_LEAVE_GROUP: if (in_compat_syscall()) @@ -1372,7 +1370,7 @@ int do_ipv6_getsockopt(struct sock *sk, int level, int optname, break; case IPV6_MULTICAST_ALL: - val = np->mc_all; + val = inet6_test_bit(MC6_ALL, sk); break; case IPV6_UNICAST_IF: diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 4a7967623909..99e28b444a4c 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -642,7 +642,7 @@ bool inet6_mc_check(const struct sock *sk, const struct in6_addr *mc_addr, } if (!mc) { rcu_read_unlock(); - return np->mc_all; + return inet6_test_bit(MC6_ALL, sk); } psl = rcu_dereference(mc->sflist); if (!psl) { -- cgit v1.2.3 From 5121516b0c4736b7977d977b239e36d23ec64401 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 12 Sep 2023 16:02:06 +0000 Subject: ipv6: lockless IPV6_AUTOFLOWLABEL implementation Move np->autoflowlabel and np->autoflowlabel_set in inet->inet_flags, to fix data-races. Signed-off-by: Eric Dumazet Reviewed-by: David Ahern Signed-off-by: David S. Miller --- include/linux/ipv6.h | 2 -- include/net/inet_sock.h | 2 ++ include/net/ipv6.h | 2 +- net/ipv6/ip6_output.c | 12 +++++------- net/ipv6/ipv6_sockglue.c | 11 +++++------ 5 files changed, 13 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index d88e91b7f0a3..e3be5dc21b7d 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -253,8 +253,6 @@ struct ipv6_pinfo { * 100: prefer care-of address */ dontfrag:1, - autoflowlabel:1, - autoflowlabel_set:1, rtalert_isolate:1; __u8 min_hopcount; __u8 tclass; diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index 97e70a97dae8..f1af64a40673 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -271,6 +271,8 @@ enum { INET_FLAGS_MC6_LOOP = 20, INET_FLAGS_RECVERR6_RFC4884 = 21, INET_FLAGS_MC6_ALL = 22, + INET_FLAGS_AUTOFLOWLABEL_SET = 23, + INET_FLAGS_AUTOFLOWLABEL = 24, }; /* cmsg flags for inet */ diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 8a04a8985336..4b6cbec059e2 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -428,7 +428,7 @@ int ipv6_flowlabel_opt_get(struct sock *sk, struct in6_flowlabel_req *freq, int flags); int ip6_flowlabel_init(void); void ip6_flowlabel_cleanup(void); -bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np); +bool ip6_autoflowlabel(struct net *net, const struct sock *sk); static inline void fl6_sock_release(struct ip6_flowlabel *fl) { diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index ab7ede4a731a..47aa42f93ccd 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -232,12 +232,11 @@ int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb) } EXPORT_SYMBOL(ip6_output); -bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np) +bool ip6_autoflowlabel(struct net *net, const struct sock *sk) { - if (!np->autoflowlabel_set) + if (!inet6_test_bit(AUTOFLOWLABEL_SET, sk)) return ip6_default_np_autolabel(net); - else - return np->autoflowlabel; + return inet6_test_bit(AUTOFLOWLABEL, sk); } /* @@ -314,7 +313,7 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, hlimit = ip6_dst_hoplimit(dst); ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel, - ip6_autoflowlabel(net, np), fl6)); + ip6_autoflowlabel(net, sk), fl6)); hdr->payload_len = htons(seg_len); hdr->nexthdr = proto; @@ -1938,7 +1937,6 @@ struct sk_buff *__ip6_make_skb(struct sock *sk, struct sk_buff *skb, *tmp_skb; struct sk_buff **tail_skb; struct in6_addr *final_dst; - struct ipv6_pinfo *np = inet6_sk(sk); struct net *net = sock_net(sk); struct ipv6hdr *hdr; struct ipv6_txoptions *opt = v6_cork->opt; @@ -1981,7 +1979,7 @@ struct sk_buff *__ip6_make_skb(struct sock *sk, ip6_flow_hdr(hdr, v6_cork->tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel, - ip6_autoflowlabel(net, np), fl6)); + ip6_autoflowlabel(net, sk), fl6)); hdr->hop_limit = v6_cork->hop_limit; hdr->nexthdr = proto; hdr->saddr = fl6->saddr; diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 7a181831f226..d5d428a695f7 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -474,6 +474,10 @@ int do_ipv6_setsockopt(struct sock *sk, int level, int optname, return -EINVAL; inet6_assign_bit(MC6_ALL, sk, valbool); return 0; + case IPV6_AUTOFLOWLABEL: + inet6_assign_bit(AUTOFLOWLABEL, sk, valbool); + inet6_set_bit(AUTOFLOWLABEL_SET, sk); + return 0; } if (needs_rtnl) rtnl_lock(); @@ -970,11 +974,6 @@ done: np->dontfrag = valbool; retv = 0; break; - case IPV6_AUTOFLOWLABEL: - np->autoflowlabel = valbool; - np->autoflowlabel_set = 1; - retv = 0; - break; case IPV6_RECVFRAGSIZE: np->rxopt.bits.recvfragsize = valbool; retv = 0; @@ -1447,7 +1446,7 @@ int do_ipv6_getsockopt(struct sock *sk, int level, int optname, break; case IPV6_AUTOFLOWLABEL: - val = ip6_autoflowlabel(sock_net(sk), np); + val = ip6_autoflowlabel(sock_net(sk), sk); break; case IPV6_RECVFRAGSIZE: -- cgit v1.2.3 From 1086ca7cce292bb498d7f8f85f4593c9ef4902b7 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 12 Sep 2023 16:02:07 +0000 Subject: ipv6: lockless IPV6_DONTFRAG implementation Move np->dontfrag flag to inet->inet_flags to fix data-races. Signed-off-by: Eric Dumazet Reviewed-by: David Ahern Signed-off-by: David S. Miller --- include/linux/ipv6.h | 1 - include/net/inet_sock.h | 1 + include/net/ipv6.h | 6 +++--- include/net/xfrm.h | 2 +- net/ipv6/icmp.c | 4 ++-- net/ipv6/ip6_output.c | 2 +- net/ipv6/ipv6_sockglue.c | 9 ++++----- net/ipv6/ping.c | 2 +- net/ipv6/raw.c | 2 +- net/ipv6/udp.c | 2 +- net/l2tp/l2tp_ip6.c | 2 +- 11 files changed, 16 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index e3be5dc21b7d..57d563f1d4b1 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -252,7 +252,6 @@ struct ipv6_pinfo { * 010: prefer public address * 100: prefer care-of address */ - dontfrag:1, rtalert_isolate:1; __u8 min_hopcount; __u8 tclass; diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index f1af64a40673..ac75324e9e1e 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -273,6 +273,7 @@ enum { INET_FLAGS_MC6_ALL = 22, INET_FLAGS_AUTOFLOWLABEL_SET = 23, INET_FLAGS_AUTOFLOWLABEL = 24, + INET_FLAGS_DONTFRAG = 25, }; /* cmsg flags for inet */ diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 4b6cbec059e2..5a1f2993680d 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -373,12 +373,12 @@ static inline void ipcm6_init(struct ipcm6_cookie *ipc6) } static inline void ipcm6_init_sk(struct ipcm6_cookie *ipc6, - const struct ipv6_pinfo *np) + const struct sock *sk) { *ipc6 = (struct ipcm6_cookie) { .hlimit = -1, - .tclass = np->tclass, - .dontfrag = np->dontfrag, + .tclass = inet6_sk(sk)->tclass, + .dontfrag = inet6_test_bit(DONTFRAG, sk), }; } diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 363c7d510554..98d7aa78adda 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -2166,7 +2166,7 @@ static inline bool xfrm6_local_dontfrag(const struct sock *sk) proto = sk->sk_protocol; if (proto == IPPROTO_UDP || proto == IPPROTO_RAW) - return inet6_sk(sk)->dontfrag; + return inet6_test_bit(DONTFRAG, sk); return false; } diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 93a594a901d1..8fb4a791881a 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -588,7 +588,7 @@ void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info, else if (!fl6.flowi6_oif) fl6.flowi6_oif = np->ucast_oif; - ipcm6_init_sk(&ipc6, np); + ipcm6_init_sk(&ipc6, sk); ipc6.sockc.mark = mark; fl6.flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6.flowlabel); @@ -791,7 +791,7 @@ static enum skb_drop_reason icmpv6_echo_reply(struct sk_buff *skb) msg.offset = 0; msg.type = type; - ipcm6_init_sk(&ipc6, np); + ipcm6_init_sk(&ipc6, sk); ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst); ipc6.tclass = ipv6_get_dsfield(ipv6_hdr(skb)); ipc6.sockc.mark = mark; diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 47aa42f93ccd..8851fe5d45a0 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -2092,7 +2092,7 @@ struct sk_buff *ip6_make_skb(struct sock *sk, return ERR_PTR(err); } if (ipc6->dontfrag < 0) - ipc6->dontfrag = inet6_sk(sk)->dontfrag; + ipc6->dontfrag = inet6_test_bit(DONTFRAG, sk); err = __ip6_append_data(sk, &queue, cork, &v6_cork, ¤t->task_frag, getfrag, from, diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index d5d428a695f7..33dd4dd872e6 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -478,6 +478,9 @@ int do_ipv6_setsockopt(struct sock *sk, int level, int optname, inet6_assign_bit(AUTOFLOWLABEL, sk, valbool); inet6_set_bit(AUTOFLOWLABEL_SET, sk); return 0; + case IPV6_DONTFRAG: + inet6_assign_bit(DONTFRAG, sk, valbool); + return 0; } if (needs_rtnl) rtnl_lock(); @@ -970,10 +973,6 @@ done: goto e_inval; retv = __ip6_sock_set_addr_preferences(sk, val); break; - case IPV6_DONTFRAG: - np->dontfrag = valbool; - retv = 0; - break; case IPV6_RECVFRAGSIZE: np->rxopt.bits.recvfragsize = valbool; retv = 0; @@ -1442,7 +1441,7 @@ int do_ipv6_getsockopt(struct sock *sk, int level, int optname, break; case IPV6_DONTFRAG: - val = np->dontfrag; + val = inet6_test_bit(DONTFRAG, sk); break; case IPV6_AUTOFLOWLABEL: diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c index 5831aaa53d75..4444b61eb23b 100644 --- a/net/ipv6/ping.c +++ b/net/ipv6/ping.c @@ -118,7 +118,7 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) l3mdev_master_ifindex_by_index(sock_net(sk), oif) != sk->sk_bound_dev_if)) return -EINVAL; - ipcm6_init_sk(&ipc6, np); + ipcm6_init_sk(&ipc6, sk); ipc6.sockc.tsflags = READ_ONCE(sk->sk_tsflags); ipc6.sockc.mark = READ_ONCE(sk->sk_mark); diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 42fcec3ecf5e..cc9673c1809f 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -898,7 +898,7 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst); if (ipc6.dontfrag < 0) - ipc6.dontfrag = np->dontfrag; + ipc6.dontfrag = inet6_test_bit(DONTFRAG, sk); if (msg->msg_flags&MSG_CONFIRM) goto do_confirm; diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index f60ba4295435..e4301500741a 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1595,7 +1595,7 @@ back_from_confirm: do_append_data: if (ipc6.dontfrag < 0) - ipc6.dontfrag = np->dontfrag; + ipc6.dontfrag = inet6_test_bit(DONTFRAG, sk); up->len += ulen; err = ip6_append_data(sk, getfrag, msg, ulen, sizeof(struct udphdr), &ipc6, fl6, (struct rt6_info *)dst, diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index ed8ebb6f5909..40af2431e73a 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -621,7 +621,7 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst); if (ipc6.dontfrag < 0) - ipc6.dontfrag = np->dontfrag; + ipc6.dontfrag = inet6_test_bit(DONTFRAG, sk); if (msg->msg_flags & MSG_CONFIRM) goto do_confirm; -- cgit v1.2.3 From 3fa29971c69519629370b119b0b618ee88ade6b9 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 12 Sep 2023 16:02:08 +0000 Subject: ipv6: lockless IPV6_RECVERR implemetation np->recverr is moved to inet->inet_flags to fix data-races. Signed-off-by: Eric Dumazet Reviewed-by: David Ahern Signed-off-by: David S. Miller --- include/linux/ipv6.h | 3 +-- include/net/inet_sock.h | 1 + include/net/ipv6.h | 4 +--- net/dccp/ipv6.c | 2 +- net/ipv4/ping.c | 2 +- net/ipv6/datagram.c | 6 ++---- net/ipv6/ipv6_sockglue.c | 17 ++++++++--------- net/ipv6/raw.c | 10 +++++----- net/ipv6/tcp_ipv6.c | 2 +- net/ipv6/udp.c | 6 +++--- net/sctp/ipv6.c | 4 +--- 11 files changed, 25 insertions(+), 32 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 57d563f1d4b1..53f4f1b97a78 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -243,8 +243,7 @@ struct ipv6_pinfo { } rxopt; /* sockopt flags */ - __u16 recverr:1, - sndflow:1, + __u16 sndflow:1, repflow:1, pmtudisc:3, padding:1, /* 1 bit hole */ diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index ac75324e9e1e..3b79bc759ff4 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -274,6 +274,7 @@ enum { INET_FLAGS_AUTOFLOWLABEL_SET = 23, INET_FLAGS_AUTOFLOWLABEL = 24, INET_FLAGS_DONTFRAG = 25, + INET_FLAGS_RECVERR6 = 26, }; /* cmsg flags for inet */ diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 5a1f2993680d..bd115980809f 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -1303,9 +1303,7 @@ static inline int ip6_sock_set_v6only(struct sock *sk) static inline void ip6_sock_set_recverr(struct sock *sk) { - lock_sock(sk); - inet6_sk(sk)->recverr = true; - release_sock(sk); + inet6_set_bit(RECVERR6, sk); } static inline int __ip6_sock_set_addr_preferences(struct sock *sk, int val) diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 83617a16b98e..e6c3d84c2b9e 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -185,7 +185,7 @@ static int dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, goto out; } - if (!sock_owned_by_user(sk) && np->recverr) { + if (!sock_owned_by_user(sk) && inet6_test_bit(RECVERR6, sk)) { sk->sk_err = err; sk_error_report(sk); } else { diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 75e0aee35eb7..bc01ad5fc01a 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -581,7 +581,7 @@ void ping_err(struct sk_buff *skb, int offset, u32 info) * 4.1.3.3. */ if ((family == AF_INET && !inet_test_bit(RECVERR, sk)) || - (family == AF_INET6 && !inet6_sk(sk)->recverr)) { + (family == AF_INET6 && !inet6_test_bit(RECVERR6, sk))) { if (!harderr || sk->sk_state != TCP_ESTABLISHED) goto out; } else { diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index e81892814935..74673a5eff31 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -305,11 +305,10 @@ static void ipv6_icmp_error_rfc4884(const struct sk_buff *skb, void ipv6_icmp_error(struct sock *sk, struct sk_buff *skb, int err, __be16 port, u32 info, u8 *payload) { - struct ipv6_pinfo *np = inet6_sk(sk); struct icmp6hdr *icmph = icmp6_hdr(skb); struct sock_exterr_skb *serr; - if (!np->recverr) + if (!inet6_test_bit(RECVERR6, sk)) return; skb = skb_clone(skb, GFP_ATOMIC); @@ -344,12 +343,11 @@ EXPORT_SYMBOL_GPL(ipv6_icmp_error); void ipv6_local_error(struct sock *sk, int err, struct flowi6 *fl6, u32 info) { - const struct ipv6_pinfo *np = inet6_sk(sk); struct sock_exterr_skb *serr; struct ipv6hdr *iph; struct sk_buff *skb; - if (!np->recverr) + if (!inet6_test_bit(RECVERR6, sk)) return; skb = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC); diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 33dd4dd872e6..ec10b45c49c1 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -481,6 +481,13 @@ int do_ipv6_setsockopt(struct sock *sk, int level, int optname, case IPV6_DONTFRAG: inet6_assign_bit(DONTFRAG, sk, valbool); return 0; + case IPV6_RECVERR: + if (optlen < sizeof(int)) + return -EINVAL; + inet6_assign_bit(RECVERR6, sk, valbool); + if (!val) + skb_errqueue_purge(&sk->sk_error_queue); + return 0; } if (needs_rtnl) rtnl_lock(); @@ -943,14 +950,6 @@ done: np->pmtudisc = val; retv = 0; break; - case IPV6_RECVERR: - if (optlen < sizeof(int)) - goto e_inval; - np->recverr = valbool; - if (!val) - skb_errqueue_purge(&sk->sk_error_queue); - retv = 0; - break; case IPV6_FLOWINFO_SEND: if (optlen < sizeof(int)) goto e_inval; @@ -1380,7 +1379,7 @@ int do_ipv6_getsockopt(struct sock *sk, int level, int optname, break; case IPV6_RECVERR: - val = np->recverr; + val = inet6_test_bit(RECVERR6, sk); break; case IPV6_FLOWINFO_SEND: diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index cc9673c1809f..71f6bdccfa1f 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -291,6 +291,7 @@ static void rawv6_err(struct sock *sk, struct sk_buff *skb, struct inet6_skb_parm *opt, u8 type, u8 code, int offset, __be32 info) { + bool recverr = inet6_test_bit(RECVERR6, sk); struct ipv6_pinfo *np = inet6_sk(sk); int err; int harderr; @@ -300,7 +301,7 @@ static void rawv6_err(struct sock *sk, struct sk_buff *skb, 2. Socket is connected (otherwise the error indication is useless without recverr and error is hard. */ - if (!np->recverr && sk->sk_state != TCP_ESTABLISHED) + if (!recverr && sk->sk_state != TCP_ESTABLISHED) return; harderr = icmpv6_err_convert(type, code, &err); @@ -312,14 +313,14 @@ static void rawv6_err(struct sock *sk, struct sk_buff *skb, ip6_sk_redirect(skb, sk); return; } - if (np->recverr) { + if (recverr) { u8 *payload = skb->data; if (!inet_test_bit(HDRINCL, sk)) payload += offset; ipv6_icmp_error(sk, skb, err, 0, ntohl(info), payload); } - if (np->recverr || harderr) { + if (recverr || harderr) { sk->sk_err = err; sk_error_report(sk); } @@ -587,7 +588,6 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length, struct flowi6 *fl6, struct dst_entry **dstp, unsigned int flags, const struct sockcm_cookie *sockc) { - struct ipv6_pinfo *np = inet6_sk(sk); struct net *net = sock_net(sk); struct ipv6hdr *iph; struct sk_buff *skb; @@ -668,7 +668,7 @@ out: error: IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS); error_check: - if (err == -ENOBUFS && !np->recverr) + if (err == -ENOBUFS && !inet6_test_bit(RECVERR6, sk)) err = 0; return err; } diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 54db5fab318b..b5954b136b57 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -508,7 +508,7 @@ static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, tcp_ld_RTO_revert(sk, seq); } - if (!sock_owned_by_user(sk) && np->recverr) { + if (!sock_owned_by_user(sk) && inet6_test_bit(RECVERR6, sk)) { WRITE_ONCE(sk->sk_err, err); sk_error_report(sk); } else { diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index e4301500741a..90e873689b88 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -619,7 +619,7 @@ int __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt, goto out; } - if (!np->recverr) { + if (!inet6_test_bit(RECVERR6, sk)) { if (!harderr || sk->sk_state != TCP_ESTABLISHED) goto out; } else { @@ -1283,7 +1283,7 @@ csum_partial: send: err = ip6_send_skb(skb); if (err) { - if (err == -ENOBUFS && !inet6_sk(sk)->recverr) { + if (err == -ENOBUFS && !inet6_test_bit(RECVERR6, sk)) { UDP6_INC_STATS(sock_net(sk), UDP_MIB_SNDBUFERRORS, is_udplite); err = 0; @@ -1608,7 +1608,7 @@ do_append_data: up->pending = 0; if (err > 0) - err = np->recverr ? net_xmit_errno(err) : 0; + err = inet6_test_bit(RECVERR6, sk) ? net_xmit_errno(err) : 0; release_sock(sk); out: diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index 43f2731bf590..42b5b853ea01 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -128,7 +128,6 @@ static void sctp_v6_err_handle(struct sctp_transport *t, struct sk_buff *skb, { struct sctp_association *asoc = t->asoc; struct sock *sk = asoc->base.sk; - struct ipv6_pinfo *np; int err = 0; switch (type) { @@ -149,9 +148,8 @@ static void sctp_v6_err_handle(struct sctp_transport *t, struct sk_buff *skb, break; } - np = inet6_sk(sk); icmpv6_err_convert(type, code, &err); - if (!sock_owned_by_user(sk) && np->recverr) { + if (!sock_owned_by_user(sk) && inet6_test_bit(RECVERR6, sk)) { sk->sk_err = err; sk_error_report(sk); } else { -- cgit v1.2.3 From 3cccda8db2cf2f2a224d55d5b6e2251d478c58ca Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 12 Sep 2023 16:02:09 +0000 Subject: ipv6: move np->repflow to atomic flags Move np->repflow to inet->inet_flags to fix data-races. Signed-off-by: Eric Dumazet Reviewed-by: David Ahern Signed-off-by: David S. Miller --- include/linux/ipv6.h | 1 - include/net/inet_sock.h | 1 + net/dccp/ipv6.c | 2 +- net/ipv6/af_inet6.c | 3 ++- net/ipv6/ip6_flowlabel.c | 8 ++++---- net/ipv6/tcp_ipv6.c | 14 ++++++-------- 6 files changed, 14 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 53f4f1b97a78..e62413371ea4 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -244,7 +244,6 @@ struct ipv6_pinfo { /* sockopt flags */ __u16 sndflow:1, - repflow:1, pmtudisc:3, padding:1, /* 1 bit hole */ srcprefs:3, /* 001: prefer temporary address diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index 3b79bc759ff4..5d61c7dc6577 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -275,6 +275,7 @@ enum { INET_FLAGS_AUTOFLOWLABEL = 24, INET_FLAGS_DONTFRAG = 25, INET_FLAGS_RECVERR6 = 26, + INET_FLAGS_REPFLOW = 27, }; /* cmsg flags for inet */ diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index e6c3d84c2b9e..d7e63eea705d 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -679,7 +679,7 @@ ipv6_pktoptions: WRITE_ONCE(np->mcast_hops, ipv6_hdr(opt_skb)->hop_limit); if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass) np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb)); - if (np->repflow) + if (inet6_test_bit(REPFLOW, sk)) np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb)); if (ipv6_opt_accepted(sk, opt_skb, &DCCP_SKB_CB(opt_skb)->header.h6)) { diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 372fb7b9112c..48737363377f 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -220,7 +220,8 @@ lookup_protocol: inet6_set_bit(MC6_LOOP, sk); inet6_set_bit(MC6_ALL, sk); np->pmtudisc = IPV6_PMTUDISC_WANT; - np->repflow = net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_ESTABLISHED; + inet6_assign_bit(REPFLOW, sk, net->ipv6.sysctl.flowlabel_reflect & + FLOWLABEL_REFLECT_ESTABLISHED); sk->sk_ipv6only = net->ipv6.sysctl.bindv6only; sk->sk_txrehash = READ_ONCE(net->core.sysctl_txrehash); diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index b3ca4beb4405..eca07e10e21f 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -513,7 +513,7 @@ int ipv6_flowlabel_opt_get(struct sock *sk, struct in6_flowlabel_req *freq, return 0; } - if (np->repflow) { + if (inet6_test_bit(REPFLOW, sk)) { freq->flr_label = np->flow_label; return 0; } @@ -551,10 +551,10 @@ static int ipv6_flowlabel_put(struct sock *sk, struct in6_flowlabel_req *freq) if (freq->flr_flags & IPV6_FL_F_REFLECT) { if (sk->sk_protocol != IPPROTO_TCP) return -ENOPROTOOPT; - if (!np->repflow) + if (!inet6_test_bit(REPFLOW, sk)) return -ESRCH; np->flow_label = 0; - np->repflow = 0; + inet6_clear_bit(REPFLOW, sk); return 0; } @@ -626,7 +626,7 @@ static int ipv6_flowlabel_get(struct sock *sk, struct in6_flowlabel_req *freq, if (sk->sk_protocol != IPPROTO_TCP) return -ENOPROTOOPT; - np->repflow = 1; + inet6_set_bit(REPFLOW, sk); return 0; } diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index b5954b136b57..201caf88bb99 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -548,7 +548,7 @@ static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst, &ireq->ir_v6_rmt_addr); fl6->daddr = ireq->ir_v6_rmt_addr; - if (np->repflow && ireq->pktopts) + if (inet6_test_bit(REPFLOW, sk) && ireq->pktopts) fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts)); tclass = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) ? @@ -797,7 +797,7 @@ static void tcp_v6_init_req(struct request_sock *req, (ipv6_opt_accepted(sk_listener, skb, &TCP_SKB_CB(skb)->header.h6) || np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim || - np->rxopt.bits.rxohlim || np->repflow)) { + np->rxopt.bits.rxohlim || inet6_test_bit(REPFLOW, sk_listener))) { refcount_inc(&skb->users); ireq->pktopts = skb; } @@ -1055,10 +1055,8 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb) if (sk) { oif = sk->sk_bound_dev_if; if (sk_fullsock(sk)) { - const struct ipv6_pinfo *np = tcp_inet6_sk(sk); - trace_tcp_send_reset(sk, skb); - if (np->repflow) + if (inet6_test_bit(REPFLOW, sk)) label = ip6_flowlabel(ipv6h); priority = sk->sk_priority; txhash = sk->sk_txhash; @@ -1247,7 +1245,7 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * newnp->mcast_oif = inet_iif(skb); newnp->mcast_hops = ip_hdr(skb)->ttl; newnp->rcv_flowinfo = 0; - if (np->repflow) + if (inet6_test_bit(REPFLOW, sk)) newnp->flow_label = 0; /* @@ -1320,7 +1318,7 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * newnp->mcast_oif = tcp_v6_iif(skb); newnp->mcast_hops = ipv6_hdr(skb)->hop_limit; newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb)); - if (np->repflow) + if (inet6_test_bit(REPFLOW, sk)) newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb)); /* Set ToS of the new socket based upon the value of incoming SYN. @@ -1546,7 +1544,7 @@ ipv6_pktoptions: ipv6_hdr(opt_skb)->hop_limit); if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass) np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb)); - if (np->repflow) + if (inet6_test_bit(REPFLOW, sk)) np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb)); if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) { tcp_v6_restore_cb(opt_skb); -- cgit v1.2.3 From 83cd5eb654b320c1972254f243531f3f3cebcccf Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 12 Sep 2023 16:02:10 +0000 Subject: ipv6: lockless IPV6_ROUTER_ALERT_ISOLATE implementation Reads from np->rtalert_isolate are racy. Move this flag to inet->inet_flags to fix data-races. Signed-off-by: Eric Dumazet Reviewed-by: David Ahern Signed-off-by: David S. Miller --- include/linux/ipv6.h | 3 +-- include/net/inet_sock.h | 1 + net/ipv6/ip6_output.c | 3 +-- net/ipv6/ipv6_sockglue.c | 13 ++++++------- 4 files changed, 9 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index e62413371ea4..f288a35f157f 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -246,11 +246,10 @@ struct ipv6_pinfo { __u16 sndflow:1, pmtudisc:3, padding:1, /* 1 bit hole */ - srcprefs:3, /* 001: prefer temporary address + srcprefs:3; /* 001: prefer temporary address * 010: prefer public address * 100: prefer care-of address */ - rtalert_isolate:1; __u8 min_hopcount; __u8 tclass; __be32 rcv_flowinfo; diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index 5d61c7dc6577..befee0f66c05 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -276,6 +276,7 @@ enum { INET_FLAGS_DONTFRAG = 25, INET_FLAGS_RECVERR6 = 26, INET_FLAGS_REPFLOW = 27, + INET_FLAGS_RTALERT_ISOLATE = 28, }; /* cmsg flags for inet */ diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 8851fe5d45a0..f87d8491d7e2 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -368,9 +368,8 @@ static int ip6_call_ra_chain(struct sk_buff *skb, int sel) if (sk && ra->sel == sel && (!sk->sk_bound_dev_if || sk->sk_bound_dev_if == skb->dev->ifindex)) { - struct ipv6_pinfo *np = inet6_sk(sk); - if (np && np->rtalert_isolate && + if (inet6_test_bit(RTALERT_ISOLATE, sk) && !net_eq(sock_net(sk), dev_net(skb->dev))) { continue; } diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index ec10b45c49c1..c22a492e0536 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -488,6 +488,11 @@ int do_ipv6_setsockopt(struct sock *sk, int level, int optname, if (!val) skb_errqueue_purge(&sk->sk_error_queue); return 0; + case IPV6_ROUTER_ALERT_ISOLATE: + if (optlen < sizeof(int)) + return -EINVAL; + inet6_assign_bit(RTALERT_ISOLATE, sk, valbool); + return 0; } if (needs_rtnl) rtnl_lock(); @@ -936,12 +941,6 @@ done: goto e_inval; retv = ip6_ra_control(sk, val); break; - case IPV6_ROUTER_ALERT_ISOLATE: - if (optlen < sizeof(int)) - goto e_inval; - np->rtalert_isolate = valbool; - retv = 0; - break; case IPV6_MTU_DISCOVER: if (optlen < sizeof(int)) goto e_inval; @@ -1452,7 +1451,7 @@ int do_ipv6_getsockopt(struct sock *sk, int level, int optname, break; case IPV6_ROUTER_ALERT_ISOLATE: - val = np->rtalert_isolate; + val = inet6_test_bit(RTALERT_ISOLATE, sk); break; case IPV6_RECVERR_RFC4884: -- cgit v1.2.3 From 6b724bc4300b431443f3b99520994a5aece347cd Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 12 Sep 2023 16:02:11 +0000 Subject: ipv6: lockless IPV6_MTU_DISCOVER implementation Most np->pmtudisc reads are racy. Move this 3bit field on a full byte, add annotations and make IPV6_MTU_DISCOVER setsockopt() lockless. Signed-off-by: Eric Dumazet Reviewed-by: David Ahern Signed-off-by: David S. Miller --- include/linux/ipv6.h | 5 ++--- include/net/ip6_route.h | 14 +++++++++----- net/ipv6/ip6_output.c | 4 ++-- net/ipv6/ipv6_sockglue.c | 17 ++++++++--------- net/ipv6/raw.c | 2 +- net/ipv6/udp.c | 2 +- net/netfilter/ipvs/ip_vs_sync.c | 2 +- 7 files changed, 24 insertions(+), 22 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index f288a35f157f..10f521a6a9c8 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -243,13 +243,12 @@ struct ipv6_pinfo { } rxopt; /* sockopt flags */ - __u16 sndflow:1, - pmtudisc:3, - padding:1, /* 1 bit hole */ + __u8 sndflow:1, srcprefs:3; /* 001: prefer temporary address * 010: prefer public address * 100: prefer care-of address */ + __u8 pmtudisc; __u8 min_hopcount; __u8 tclass; __be32 rcv_flowinfo; diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index b32539bb0fb0..b1ea49900b4a 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -266,7 +266,7 @@ static inline unsigned int ip6_skb_dst_mtu(const struct sk_buff *skb) const struct dst_entry *dst = skb_dst(skb); unsigned int mtu; - if (np && np->pmtudisc >= IPV6_PMTUDISC_PROBE) { + if (np && READ_ONCE(np->pmtudisc) >= IPV6_PMTUDISC_PROBE) { mtu = READ_ONCE(dst->dev->mtu); mtu -= lwtunnel_headroom(dst->lwtstate, mtu); } else { @@ -277,14 +277,18 @@ static inline unsigned int ip6_skb_dst_mtu(const struct sk_buff *skb) static inline bool ip6_sk_accept_pmtu(const struct sock *sk) { - return inet6_sk(sk)->pmtudisc != IPV6_PMTUDISC_INTERFACE && - inet6_sk(sk)->pmtudisc != IPV6_PMTUDISC_OMIT; + u8 pmtudisc = READ_ONCE(inet6_sk(sk)->pmtudisc); + + return pmtudisc != IPV6_PMTUDISC_INTERFACE && + pmtudisc != IPV6_PMTUDISC_OMIT; } static inline bool ip6_sk_ignore_df(const struct sock *sk) { - return inet6_sk(sk)->pmtudisc < IPV6_PMTUDISC_DO || - inet6_sk(sk)->pmtudisc == IPV6_PMTUDISC_OMIT; + u8 pmtudisc = READ_ONCE(inet6_sk(sk)->pmtudisc); + + return pmtudisc < IPV6_PMTUDISC_DO || + pmtudisc == IPV6_PMTUDISC_OMIT; } static inline const struct in6_addr *rt6_nexthop(const struct rt6_info *rt, diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index f87d8491d7e2..7e5d9eeb990f 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1436,10 +1436,10 @@ static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork, v6_cork->hop_limit = ipc6->hlimit; v6_cork->tclass = ipc6->tclass; if (rt->dst.flags & DST_XFRM_TUNNEL) - mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ? + mtu = READ_ONCE(np->pmtudisc) >= IPV6_PMTUDISC_PROBE ? READ_ONCE(rt->dst.dev->mtu) : dst_mtu(&rt->dst); else - mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ? + mtu = READ_ONCE(np->pmtudisc) >= IPV6_PMTUDISC_PROBE ? READ_ONCE(rt->dst.dev->mtu) : dst_mtu(xfrm_dst_path(&rt->dst)); frag_size = READ_ONCE(np->frag_size); diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index c22a492e0536..85ea42644dcb 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -493,6 +493,13 @@ int do_ipv6_setsockopt(struct sock *sk, int level, int optname, return -EINVAL; inet6_assign_bit(RTALERT_ISOLATE, sk, valbool); return 0; + case IPV6_MTU_DISCOVER: + if (optlen < sizeof(int)) + return -EINVAL; + if (val < IPV6_PMTUDISC_DONT || val > IPV6_PMTUDISC_OMIT) + return -EINVAL; + WRITE_ONCE(np->pmtudisc, val); + return 0; } if (needs_rtnl) rtnl_lock(); @@ -941,14 +948,6 @@ done: goto e_inval; retv = ip6_ra_control(sk, val); break; - case IPV6_MTU_DISCOVER: - if (optlen < sizeof(int)) - goto e_inval; - if (val < IPV6_PMTUDISC_DONT || val > IPV6_PMTUDISC_OMIT) - goto e_inval; - np->pmtudisc = val; - retv = 0; - break; case IPV6_FLOWINFO_SEND: if (optlen < sizeof(int)) goto e_inval; @@ -1374,7 +1373,7 @@ int do_ipv6_getsockopt(struct sock *sk, int level, int optname, break; case IPV6_MTU_DISCOVER: - val = np->pmtudisc; + val = READ_ONCE(np->pmtudisc); break; case IPV6_RECVERR: diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 71f6bdccfa1f..47372cceb98f 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -307,7 +307,7 @@ static void rawv6_err(struct sock *sk, struct sk_buff *skb, harderr = icmpv6_err_convert(type, code, &err); if (type == ICMPV6_PKT_TOOBIG) { ip6_sk_update_pmtu(skb, sk, info); - harderr = (np->pmtudisc == IPV6_PMTUDISC_DO); + harderr = (READ_ONCE(np->pmtudisc) == IPV6_PMTUDISC_DO); } if (type == NDISC_REDIRECT) { ip6_sk_redirect(skb, sk); diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 90e873689b88..c17e19fece1b 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -598,7 +598,7 @@ int __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (!ip6_sk_accept_pmtu(sk)) goto out; ip6_sk_update_pmtu(skb, sk, info); - if (np->pmtudisc != IPV6_PMTUDISC_DONT) + if (READ_ONCE(np->pmtudisc) != IPV6_PMTUDISC_DONT) harderr = 1; } if (type == NDISC_REDIRECT) { diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index df1b33b61059..5820a8156c47 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -1341,7 +1341,7 @@ static void set_mcast_pmtudisc(struct sock *sk, int val) struct ipv6_pinfo *np = inet6_sk(sk); /* IPV6_MTU_DISCOVER */ - np->pmtudisc = val; + WRITE_ONCE(np->pmtudisc, val); } #endif release_sock(sk); -- cgit v1.2.3 From 859f8b265fc2a11af0fb0c52b4087e0409250592 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 12 Sep 2023 16:02:12 +0000 Subject: ipv6: lockless IPV6_FLOWINFO_SEND implementation np->sndflow reads are racy. Use one bit ftom atomic inet->inet_flags instead, IPV6_FLOWINFO_SEND setsockopt() can be lockless. Signed-off-by: Eric Dumazet Reviewed-by: David Ahern Signed-off-by: David S. Miller --- include/linux/ipv6.h | 3 +-- include/net/inet_sock.h | 1 + net/dccp/ipv6.c | 2 +- net/ipv4/ping.c | 3 +-- net/ipv6/af_inet6.c | 2 +- net/ipv6/datagram.c | 7 ++++--- net/ipv6/ipv6_sockglue.c | 13 ++++++------- net/ipv6/ping.c | 2 +- net/ipv6/raw.c | 2 +- net/ipv6/tcp_ipv6.c | 2 +- net/ipv6/udp.c | 2 +- net/l2tp/l2tp_ip6.c | 4 ++-- net/sctp/ipv6.c | 3 ++- 13 files changed, 23 insertions(+), 23 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 10f521a6a9c8..09253825c99c 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -243,8 +243,7 @@ struct ipv6_pinfo { } rxopt; /* sockopt flags */ - __u8 sndflow:1, - srcprefs:3; /* 001: prefer temporary address + __u8 srcprefs:3; /* 001: prefer temporary address * 010: prefer public address * 100: prefer care-of address */ diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index befee0f66c05..98e11958cdff 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -277,6 +277,7 @@ enum { INET_FLAGS_RECVERR6 = 26, INET_FLAGS_REPFLOW = 27, INET_FLAGS_RTALERT_ISOLATE = 28, + INET_FLAGS_SNDFLOW = 29, }; /* cmsg flags for inet */ diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index d7e63eea705d..4803f0614848 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -844,7 +844,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, memset(&fl6, 0, sizeof(fl6)); - if (np->sndflow) { + if (inet6_test_bit(SNDFLOW, sk)) { fl6.flowlabel = usin->sin6_flowinfo & IPV6_FLOWINFO_MASK; IP6_ECN_flow_init(fl6.flowlabel); if (fl6.flowlabel & IPV6_FLOWLABEL_MASK) { diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index bc01ad5fc01a..4dd809b7b188 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -899,7 +899,6 @@ int ping_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags, #if IS_ENABLED(CONFIG_IPV6) } else if (family == AF_INET6) { - struct ipv6_pinfo *np = inet6_sk(sk); struct ipv6hdr *ip6 = ipv6_hdr(skb); DECLARE_SOCKADDR(struct sockaddr_in6 *, sin6, msg->msg_name); @@ -908,7 +907,7 @@ int ping_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags, sin6->sin6_port = 0; sin6->sin6_addr = ip6->saddr; sin6->sin6_flowinfo = 0; - if (np->sndflow) + if (inet6_test_bit(SNDFLOW, sk)) sin6->sin6_flowinfo = ip6_flowinfo(ip6); sin6->sin6_scope_id = ipv6_iface_scope_id(&sin6->sin6_addr, diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 48737363377f..c6ad0d6e99b5 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -537,7 +537,7 @@ int inet6_getname(struct socket *sock, struct sockaddr *uaddr, } sin->sin6_port = inet->inet_dport; sin->sin6_addr = sk->sk_v6_daddr; - if (np->sndflow) + if (inet6_test_bit(SNDFLOW, sk)) sin->sin6_flowinfo = np->flow_label; BPF_CGROUP_RUN_SA_PROG(sk, (struct sockaddr *)sin, CGROUP_INET6_GETPEERNAME); diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 74673a5eff31..cc6a502db39d 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -80,7 +80,8 @@ int ip6_datagram_dst_update(struct sock *sk, bool fix_sk_saddr) struct flowi6 fl6; int err = 0; - if (np->sndflow && (np->flow_label & IPV6_FLOWLABEL_MASK)) { + if (inet6_test_bit(SNDFLOW, sk) && + (np->flow_label & IPV6_FLOWLABEL_MASK)) { flowlabel = fl6_sock_lookup(sk, np->flow_label); if (IS_ERR(flowlabel)) return -EINVAL; @@ -163,7 +164,7 @@ int __ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, if (usin->sin6_family != AF_INET6) return -EAFNOSUPPORT; - if (np->sndflow) + if (inet6_test_bit(SNDFLOW, sk)) fl6_flowlabel = usin->sin6_flowinfo & IPV6_FLOWINFO_MASK; if (ipv6_addr_any(&usin->sin6_addr)) { @@ -491,7 +492,7 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) const struct ipv6hdr *ip6h = container_of((struct in6_addr *)(nh + serr->addr_offset), struct ipv6hdr, daddr); sin->sin6_addr = ip6h->daddr; - if (np->sndflow) + if (inet6_test_bit(SNDFLOW, sk)) sin->sin6_flowinfo = ip6_flowinfo(ip6h); sin->sin6_scope_id = ipv6_iface_scope_id(&sin->sin6_addr, diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 85ea42644dcb..e9dc6f881bb9 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -500,6 +500,11 @@ int do_ipv6_setsockopt(struct sock *sk, int level, int optname, return -EINVAL; WRITE_ONCE(np->pmtudisc, val); return 0; + case IPV6_FLOWINFO_SEND: + if (optlen < sizeof(int)) + return -EINVAL; + inet6_assign_bit(SNDFLOW, sk, valbool); + return 0; } if (needs_rtnl) rtnl_lock(); @@ -948,12 +953,6 @@ done: goto e_inval; retv = ip6_ra_control(sk, val); break; - case IPV6_FLOWINFO_SEND: - if (optlen < sizeof(int)) - goto e_inval; - np->sndflow = valbool; - retv = 0; - break; case IPV6_FLOWLABEL_MGR: retv = ipv6_flowlabel_opt(sk, optval, optlen); break; @@ -1381,7 +1380,7 @@ int do_ipv6_getsockopt(struct sock *sk, int level, int optname, break; case IPV6_FLOWINFO_SEND: - val = np->sndflow; + val = inet6_test_bit(SNDFLOW, sk); break; case IPV6_FLOWLABEL_MGR: diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c index 4444b61eb23b..e8fb0d275cc2 100644 --- a/net/ipv6/ping.c +++ b/net/ipv6/ping.c @@ -89,7 +89,7 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) return -EAFNOSUPPORT; } daddr = &(u->sin6_addr); - if (np->sndflow) + if (inet6_test_bit(SNDFLOW, sk)) fl6.flowlabel = u->sin6_flowinfo & IPV6_FLOWINFO_MASK; if (__ipv6_addr_needs_scope_id(ipv6_addr_type(daddr))) oif = u->sin6_scope_id; diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 47372cceb98f..a2aa54a2baae 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -795,7 +795,7 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) return -EINVAL; daddr = &sin6->sin6_addr; - if (np->sndflow) { + if (inet6_test_bit(SNDFLOW, sk)) { fl6.flowlabel = sin6->sin6_flowinfo&IPV6_FLOWINFO_MASK; if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) { flowlabel = fl6_sock_lookup(sk, fl6.flowlabel); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 201caf88bb99..94afb8d0f2d0 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -163,7 +163,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, memset(&fl6, 0, sizeof(fl6)); - if (np->sndflow) { + if (inet6_test_bit(SNDFLOW, sk)) { fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK; IP6_ECN_flow_init(fl6.flowlabel); if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) { diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index c17e19fece1b..5e9312eefed0 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1429,7 +1429,7 @@ do_udp_sendmsg: fl6->fl6_dport = sin6->sin6_port; daddr = &sin6->sin6_addr; - if (np->sndflow) { + if (inet6_test_bit(SNDFLOW, sk)) { fl6->flowlabel = sin6->sin6_flowinfo&IPV6_FLOWINFO_MASK; if (fl6->flowlabel & IPV6_FLOWLABEL_MASK) { flowlabel = fl6_sock_lookup(sk, fl6->flowlabel); diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index 40af2431e73a..44cfb72bbd18 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -431,7 +431,7 @@ static int l2tp_ip6_getname(struct socket *sock, struct sockaddr *uaddr, return -ENOTCONN; lsa->l2tp_conn_id = lsk->peer_conn_id; lsa->l2tp_addr = sk->sk_v6_daddr; - if (np->sndflow) + if (inet6_test_bit(SNDFLOW, sk)) lsa->l2tp_flowinfo = np->flow_label; } else { if (ipv6_addr_any(&sk->sk_v6_rcv_saddr)) @@ -529,7 +529,7 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) return -EAFNOSUPPORT; daddr = &lsa->l2tp_addr; - if (np->sndflow) { + if (inet6_test_bit(SNDFLOW, sk)) { fl6.flowlabel = lsa->l2tp_flowinfo & IPV6_FLOWINFO_MASK; if (fl6.flowlabel & IPV6_FLOWLABEL_MASK) { flowlabel = fl6_sock_lookup(sk, fl6.flowlabel); diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index 42b5b853ea01..5c0ed5909d85 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -296,7 +296,8 @@ static void sctp_v6_get_dst(struct sctp_transport *t, union sctp_addr *saddr, if (t->flowlabel & SCTP_FLOWLABEL_SET_MASK) fl6->flowlabel = htonl(t->flowlabel & SCTP_FLOWLABEL_VAL_MASK); - if (np->sndflow && (fl6->flowlabel & IPV6_FLOWLABEL_MASK)) { + if (inet6_test_bit(SNDFLOW, sk) && + (fl6->flowlabel & IPV6_FLOWLABEL_MASK)) { struct ip6_flowlabel *flowlabel; flowlabel = fl6_sock_lookup(sk, fl6->flowlabel); -- cgit v1.2.3 From 0a596b0682a7ce37e26c36629816f105c6459d06 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sat, 26 Aug 2023 16:36:41 +0800 Subject: KEYS: Include linux/errno.h in linux/verification.h Add inclusion of linux/errno.h as otherwise the reference to EINVAL may be invalid. Fixes: f3cf4134c5c6 ("bpf: Add bpf_lookup_*_key() and bpf_key_put() kfuncs") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202308261414.HKw1Mrip-lkp@intel.com/ Signed-off-by: Herbert Xu --- include/linux/verification.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/verification.h b/include/linux/verification.h index f34e50ebcf60..cb2d47f28091 100644 --- a/include/linux/verification.h +++ b/include/linux/verification.h @@ -8,6 +8,7 @@ #ifndef _LINUX_VERIFICATION_H #define _LINUX_VERIFICATION_H +#include #include /* -- cgit v1.2.3 From b58a36008bfa1aadf55f516bcbfae40c779eb54b Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Wed, 6 Sep 2023 01:27:57 +0200 Subject: hwrng: bcm2835 - Fix hwrng throughput regression The last RCU stall fix caused a massive throughput regression of the hwrng on Raspberry Pi 0 - 3. hwrng_msleep doesn't sleep precisely enough and usleep_range doesn't allow scheduling. So try to restore the best possible throughput by introducing hwrng_yield which interruptable sleeps for one jiffy. Some performance measurements on Raspberry Pi 3B+ (arm64/defconfig): sudo dd if=/dev/hwrng of=/dev/null count=1 bs=10000 cpu_relax ~138025 Bytes / sec hwrng_msleep(1000) ~13 Bytes / sec hwrng_yield ~2510 Bytes / sec Fixes: 96cb9d055445 ("hwrng: bcm2835 - use hwrng_msleep() instead of cpu_relax()") Link: https://lore.kernel.org/linux-arm-kernel/bc97ece5-44a3-4c4e-77da-2db3eb66b128@gmx.net/ Signed-off-by: Stefan Wahren Reviewed-by: Jason A. Donenfeld Signed-off-by: Herbert Xu --- drivers/char/hw_random/bcm2835-rng.c | 2 +- drivers/char/hw_random/core.c | 6 ++++++ include/linux/hw_random.h | 1 + 3 files changed, 8 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/char/hw_random/bcm2835-rng.c b/drivers/char/hw_random/bcm2835-rng.c index eb04b12f9f01..b03e80300627 100644 --- a/drivers/char/hw_random/bcm2835-rng.c +++ b/drivers/char/hw_random/bcm2835-rng.c @@ -70,7 +70,7 @@ static int bcm2835_rng_read(struct hwrng *rng, void *buf, size_t max, while ((rng_readl(priv, RNG_STATUS) >> 24) == 0) { if (!wait) return 0; - hwrng_msleep(rng, 1000); + hwrng_yield(rng); } num_words = rng_readl(priv, RNG_STATUS) >> 24; diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c index e3598ec9cfca..420f155d251f 100644 --- a/drivers/char/hw_random/core.c +++ b/drivers/char/hw_random/core.c @@ -678,6 +678,12 @@ long hwrng_msleep(struct hwrng *rng, unsigned int msecs) } EXPORT_SYMBOL_GPL(hwrng_msleep); +long hwrng_yield(struct hwrng *rng) +{ + return wait_for_completion_interruptible_timeout(&rng->dying, 1); +} +EXPORT_SYMBOL_GPL(hwrng_yield); + static int __init hwrng_modinit(void) { int ret; diff --git a/include/linux/hw_random.h b/include/linux/hw_random.h index 8a3115516a1b..136e9842120e 100644 --- a/include/linux/hw_random.h +++ b/include/linux/hw_random.h @@ -63,5 +63,6 @@ extern void hwrng_unregister(struct hwrng *rng); extern void devm_hwrng_unregister(struct device *dve, struct hwrng *rng); extern long hwrng_msleep(struct hwrng *rng, unsigned int msecs); +extern long hwrng_yield(struct hwrng *rng); #endif /* LINUX_HWRANDOM_H_ */ -- cgit v1.2.3 From b1f099b1cf51d553c510c6c8141c27d9ba7ea1fe Mon Sep 17 00:00:00 2001 From: Yury Norov Date: Sat, 19 Aug 2023 07:12:33 -0700 Subject: numa: Generalize numa_map_to_online_node() The function in fact searches the nearest node for a given one, based on a N_ONLINE state. This is a common pattern to search for a nearest node. This patch converts numa_map_to_online_node() to numa_nearest_node() so that others won't need to opencode the logic. Signed-off-by: Yury Norov Signed-off-by: Ingo Molnar Cc: Mel Gorman Link: https://lore.kernel.org/r/20230819141239.287290-2-yury.norov@gmail.com --- include/linux/numa.h | 7 +++++-- mm/mempolicy.c | 18 +++++++++++------- 2 files changed, 16 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/numa.h b/include/linux/numa.h index 59df211d051f..fb30a42f0700 100644 --- a/include/linux/numa.h +++ b/include/linux/numa.h @@ -25,7 +25,7 @@ #include /* Generic implementation available */ -int numa_map_to_online_node(int node); +int numa_nearest_node(int node, unsigned int state); #ifndef memory_add_physaddr_to_nid static inline int memory_add_physaddr_to_nid(u64 start) @@ -44,10 +44,11 @@ static inline int phys_to_target_node(u64 start) } #endif #else /* !CONFIG_NUMA */ -static inline int numa_map_to_online_node(int node) +static inline int numa_nearest_node(int node, unsigned int state) { return NUMA_NO_NODE; } + static inline int memory_add_physaddr_to_nid(u64 start) { return 0; @@ -58,6 +59,8 @@ static inline int phys_to_target_node(u64 start) } #endif +#define numa_map_to_online_node(node) numa_nearest_node(node, N_ONLINE) + #ifdef CONFIG_HAVE_ARCH_NODE_DEV_GROUP extern const struct attribute_group arch_node_dev_group; #endif diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 42b5567e3773..d4c0fff79758 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -131,22 +131,26 @@ static struct mempolicy default_policy = { static struct mempolicy preferred_node_policy[MAX_NUMNODES]; /** - * numa_map_to_online_node - Find closest online node + * numa_nearest_node - Find nearest node by state * @node: Node id to start the search + * @state: State to filter the search * - * Lookup the next closest node by distance if @nid is not online. + * Lookup the closest node by distance if @nid is not in state. * - * Return: this @node if it is online, otherwise the closest node by distance + * Return: this @node if it is in state, otherwise the closest node by distance */ -int numa_map_to_online_node(int node) +int numa_nearest_node(int node, unsigned int state) { int min_dist = INT_MAX, dist, n, min_node; - if (node == NUMA_NO_NODE || node_online(node)) + if (state >= NR_NODE_STATES) + return -EINVAL; + + if (node == NUMA_NO_NODE || node_state(node, state)) return node; min_node = node; - for_each_online_node(n) { + for_each_node_state(n, state) { dist = node_distance(node, n); if (dist < min_dist) { min_dist = dist; @@ -156,7 +160,7 @@ int numa_map_to_online_node(int node) return min_node; } -EXPORT_SYMBOL_GPL(numa_map_to_online_node); +EXPORT_SYMBOL_GPL(numa_nearest_node); struct mempolicy *get_task_policy(struct task_struct *p) { -- cgit v1.2.3 From 8ab63d418d4339d996f80d02a00dbce0aa3ff972 Mon Sep 17 00:00:00 2001 From: Yury Norov Date: Sat, 19 Aug 2023 07:12:36 -0700 Subject: sched/topology: Fix sched_numa_find_nth_cpu() in non-NUMA case When CONFIG_NUMA is enabled, sched_numa_find_nth_cpu() searches for a CPU in sched_domains_numa_masks. The masks includes only online CPUs, so effectively offline CPUs are skipped. When CONFIG_NUMA is disabled, the fallback function should be consistent. Fixes: cd7f55359c90 ("sched: add sched_numa_find_nth_cpu()") Signed-off-by: Yury Norov Signed-off-by: Ingo Molnar Cc: Mel Gorman Link: https://lore.kernel.org/r/20230819141239.287290-5-yury.norov@gmail.com --- include/linux/topology.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/topology.h b/include/linux/topology.h index fea32377f7c7..52f5850730b3 100644 --- a/include/linux/topology.h +++ b/include/linux/topology.h @@ -251,7 +251,7 @@ extern const struct cpumask *sched_numa_hop_mask(unsigned int node, unsigned int #else static __always_inline int sched_numa_find_nth_cpu(const struct cpumask *cpus, int cpu, int node) { - return cpumask_nth(cpu, cpus); + return cpumask_nth_and(cpu, cpus, cpu_online_mask); } static inline const struct cpumask * -- cgit v1.2.3 From 8f908db77782630c45ba29dac35c434b5ce0b730 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 15 Sep 2023 10:34:27 -0700 Subject: bpf: Fix BTF_ID symbol generation collision Marcus and Satya reported an issue where BTF_ID macro generates same symbol in separate objects and that breaks final vmlinux link. ld.lld: error: ld-temp.o :14577:1: symbol '__BTF_ID__struct__cgroup__624' is already defined This can be triggered under specific configs when __COUNTER__ happens to be the same for the same symbol in two different translation units, which is already quite unlikely to happen. Add __LINE__ number suffix to make BTF_ID symbol more unique, which is not a complete fix, but it would help for now and meanwhile we can work on better solution as suggested by Andrii. Cc: stable@vger.kernel.org Reported-by: Satya Durga Srinivasu Prabhala Reported-by: Marcus Seyfarth Closes: https://github.com/ClangBuiltLinux/linux/issues/1913 Debugged-by: Nathan Chancellor Link: https://lore.kernel.org/bpf/CAEf4Bzb5KQ2_LmhN769ifMeSJaWfebccUasQOfQKaOd0nQ51tw@mail.gmail.com/ Signed-off-by: Jiri Olsa Signed-off-by: Nick Desaulniers Reviewed-by: Nathan Chancellor Link: https://lore.kernel.org/r/20230915-bpf_collision-v3-1-263fc519c21f@google.com Signed-off-by: Alexei Starovoitov --- include/linux/btf_ids.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/btf_ids.h b/include/linux/btf_ids.h index a3462a9b8e18..a9cb10b0e2e9 100644 --- a/include/linux/btf_ids.h +++ b/include/linux/btf_ids.h @@ -49,7 +49,7 @@ word \ ____BTF_ID(symbol, word) #define __ID(prefix) \ - __PASTE(prefix, __COUNTER__) + __PASTE(__PASTE(prefix, __COUNTER__), __LINE__) /* * The BTF_ID defines unique symbol for each ID pointing -- cgit v1.2.3 From 32e4fa37fa667fdf53499b9de92737dc75199d8e Mon Sep 17 00:00:00 2001 From: Olaf Hering Date: Mon, 4 Sep 2023 14:13:50 +0200 Subject: cpu/hotplug: Remove unused cpuhp_state CPUHP_AP_X86_VDSO_VMA_ONLINE Commit b2e2ba578e01 ("x86/vdso: Initialize the CPU/node NR segment descriptor earlier") removed the single user of this constant. Remove it to reduce the size of cpuhp_hp_states[]. Signed-off-by: Olaf Hering Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20230904121350.18055-1-olaf@aepfle.de --- include/linux/cpuhotplug.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 06dda85f0424..cd8bd6ed04f9 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -205,7 +205,6 @@ enum cpuhp_state { CPUHP_AP_KVM_ONLINE, CPUHP_AP_SCHED_WAIT_EMPTY, CPUHP_AP_SMPBOOT_THREADS, - CPUHP_AP_X86_VDSO_VMA_ONLINE, CPUHP_AP_IRQ_AFFINITY_ONLINE, CPUHP_AP_BLK_MQ_ONLINE, CPUHP_AP_ARM_MVEBU_SYNC_CLOCKS, -- cgit v1.2.3 From c656a4d5484ad99e97de549a9affc12a91d94963 Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Fri, 15 Sep 2023 15:46:08 -0400 Subject: Revert "SUNRPC: clean up integer overflow check" This reverts commit e87cf8a28e7592bd19064e8181324ae26bc02932. This commit was added to silence a tautological comparison warning, but removing the 'len' value check before calling xdr_inline_decode() is really not what we want. Signed-off-by: Anna Schumaker --- include/linux/sunrpc/xdr.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index 5b4fb3c791bc..896a6d2a9cf0 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -779,7 +779,9 @@ xdr_stream_decode_uint32_array(struct xdr_stream *xdr, if (unlikely(xdr_stream_decode_u32(xdr, &len) < 0)) return -EBADMSG; - p = xdr_inline_decode(xdr, size_mul(len, sizeof(*p))); + if (len > SIZE_MAX / sizeof(*p)) + return -EBADMSG; + p = xdr_inline_decode(xdr, len * sizeof(*p)); if (unlikely(!p)) return -EBADMSG; if (array == NULL) -- cgit v1.2.3 From 993b5662f302628db4eb358d69b2720c88cbfaf0 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 13 Sep 2023 16:12:33 -0400 Subject: SUNRPC: Silence compiler complaints about tautological comparisons On 64-bit systems, the compiler will complain that the comparison between SIZE_MAX and the 32-bit unsigned int 'len' is unnecessary. Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- include/linux/sunrpc/xdr.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index 896a6d2a9cf0..2f8dc47f1eb0 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -779,7 +779,7 @@ xdr_stream_decode_uint32_array(struct xdr_stream *xdr, if (unlikely(xdr_stream_decode_u32(xdr, &len) < 0)) return -EBADMSG; - if (len > SIZE_MAX / sizeof(*p)) + if (U32_MAX >= SIZE_MAX / sizeof(*p) && len > SIZE_MAX / sizeof(*p)) return -EBADMSG; p = xdr_inline_decode(xdr, len * sizeof(*p)); if (unlikely(!p)) -- cgit v1.2.3 From daabb2b098e04753fa3d1b1feed13e5a61bef61c Mon Sep 17 00:00:00 2001 From: Puranjay Mohan Date: Thu, 7 Sep 2023 23:05:49 +0000 Subject: bpf/tests: add tests for cpuv4 instructions The BPF JITs now support cpuv4 instructions. Add tests for these new instructions to the test suite: 1. Sign extended Load 2. Sign extended Mov 3. Unconditional byte swap 4. Unconditional jump with 32-bit offset 5. Signed division and modulo Signed-off-by: Puranjay Mohan Link: https://lore.kernel.org/r/20230907230550.1417590-9-puranjay12@gmail.com Signed-off-by: Alexei Starovoitov --- include/linux/filter.h | 50 ++++++- lib/test_bpf.c | 371 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 417 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index 761af6b3cf2b..0138832ad571 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -117,21 +117,25 @@ struct ctl_table_header; /* ALU ops on immediates, bpf_add|sub|...: dst_reg += imm32 */ -#define BPF_ALU64_IMM(OP, DST, IMM) \ +#define BPF_ALU64_IMM_OFF(OP, DST, IMM, OFF) \ ((struct bpf_insn) { \ .code = BPF_ALU64 | BPF_OP(OP) | BPF_K, \ .dst_reg = DST, \ .src_reg = 0, \ - .off = 0, \ + .off = OFF, \ .imm = IMM }) +#define BPF_ALU64_IMM(OP, DST, IMM) \ + BPF_ALU64_IMM_OFF(OP, DST, IMM, 0) -#define BPF_ALU32_IMM(OP, DST, IMM) \ +#define BPF_ALU32_IMM_OFF(OP, DST, IMM, OFF) \ ((struct bpf_insn) { \ .code = BPF_ALU | BPF_OP(OP) | BPF_K, \ .dst_reg = DST, \ .src_reg = 0, \ - .off = 0, \ + .off = OFF, \ .imm = IMM }) +#define BPF_ALU32_IMM(OP, DST, IMM) \ + BPF_ALU32_IMM_OFF(OP, DST, IMM, 0) /* Endianess conversion, cpu_to_{l,b}e(), {l,b}e_to_cpu() */ @@ -143,6 +147,16 @@ struct ctl_table_header; .off = 0, \ .imm = LEN }) +/* Byte Swap, bswap16/32/64 */ + +#define BPF_BSWAP(DST, LEN) \ + ((struct bpf_insn) { \ + .code = BPF_ALU64 | BPF_END | BPF_SRC(BPF_TO_LE), \ + .dst_reg = DST, \ + .src_reg = 0, \ + .off = 0, \ + .imm = LEN }) + /* Short form of mov, dst_reg = src_reg */ #define BPF_MOV64_REG(DST, SRC) \ @@ -179,6 +193,24 @@ struct ctl_table_header; .off = 0, \ .imm = IMM }) +/* Short form of movsx, dst_reg = (s8,s16,s32)src_reg */ + +#define BPF_MOVSX64_REG(DST, SRC, OFF) \ + ((struct bpf_insn) { \ + .code = BPF_ALU64 | BPF_MOV | BPF_X, \ + .dst_reg = DST, \ + .src_reg = SRC, \ + .off = OFF, \ + .imm = 0 }) + +#define BPF_MOVSX32_REG(DST, SRC, OFF) \ + ((struct bpf_insn) { \ + .code = BPF_ALU | BPF_MOV | BPF_X, \ + .dst_reg = DST, \ + .src_reg = SRC, \ + .off = OFF, \ + .imm = 0 }) + /* Special form of mov32, used for doing explicit zero extension on dst. */ #define BPF_ZEXT_REG(DST) \ ((struct bpf_insn) { \ @@ -263,6 +295,16 @@ static inline bool insn_is_zext(const struct bpf_insn *insn) .off = OFF, \ .imm = 0 }) +/* Memory load, dst_reg = *(signed size *) (src_reg + off16) */ + +#define BPF_LDX_MEMSX(SIZE, DST, SRC, OFF) \ + ((struct bpf_insn) { \ + .code = BPF_LDX | BPF_SIZE(SIZE) | BPF_MEMSX, \ + .dst_reg = DST, \ + .src_reg = SRC, \ + .off = OFF, \ + .imm = 0 }) + /* Memory store, *(uint *) (dst_reg + off16) = src_reg */ #define BPF_STX_MEM(SIZE, DST, SRC, OFF) \ diff --git a/lib/test_bpf.c b/lib/test_bpf.c index ecde4216201e..7916503e6a6a 100644 --- a/lib/test_bpf.c +++ b/lib/test_bpf.c @@ -5111,6 +5111,104 @@ static struct bpf_test tests[] = { { }, { { 0, 0xffffffff } } }, + /* MOVSX32 */ + { + "ALU_MOVSX | BPF_B", + .u.insns_int = { + BPF_LD_IMM64(R2, 0x00000000ffffffefLL), + BPF_LD_IMM64(R3, 0xdeadbeefdeadbeefLL), + BPF_MOVSX32_REG(R1, R3, 8), + BPF_JMP_REG(BPF_JEQ, R2, R1, 2), + BPF_MOV32_IMM(R0, 2), + BPF_EXIT_INSN(), + BPF_MOV32_IMM(R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x1 } }, + }, + { + "ALU_MOVSX | BPF_H", + .u.insns_int = { + BPF_LD_IMM64(R2, 0x00000000ffffbeefLL), + BPF_LD_IMM64(R3, 0xdeadbeefdeadbeefLL), + BPF_MOVSX32_REG(R1, R3, 16), + BPF_JMP_REG(BPF_JEQ, R2, R1, 2), + BPF_MOV32_IMM(R0, 2), + BPF_EXIT_INSN(), + BPF_MOV32_IMM(R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x1 } }, + }, + { + "ALU_MOVSX | BPF_W", + .u.insns_int = { + BPF_LD_IMM64(R2, 0x00000000deadbeefLL), + BPF_LD_IMM64(R3, 0xdeadbeefdeadbeefLL), + BPF_MOVSX32_REG(R1, R3, 32), + BPF_JMP_REG(BPF_JEQ, R2, R1, 2), + BPF_MOV32_IMM(R0, 2), + BPF_EXIT_INSN(), + BPF_MOV32_IMM(R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x1 } }, + }, + /* MOVSX64 REG */ + { + "ALU64_MOVSX | BPF_B", + .u.insns_int = { + BPF_LD_IMM64(R2, 0xffffffffffffffefLL), + BPF_LD_IMM64(R3, 0xdeadbeefdeadbeefLL), + BPF_MOVSX64_REG(R1, R3, 8), + BPF_JMP_REG(BPF_JEQ, R2, R1, 2), + BPF_MOV32_IMM(R0, 2), + BPF_EXIT_INSN(), + BPF_MOV32_IMM(R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x1 } }, + }, + { + "ALU64_MOVSX | BPF_H", + .u.insns_int = { + BPF_LD_IMM64(R2, 0xffffffffffffbeefLL), + BPF_LD_IMM64(R3, 0xdeadbeefdeadbeefLL), + BPF_MOVSX64_REG(R1, R3, 16), + BPF_JMP_REG(BPF_JEQ, R2, R1, 2), + BPF_MOV32_IMM(R0, 2), + BPF_EXIT_INSN(), + BPF_MOV32_IMM(R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x1 } }, + }, + { + "ALU64_MOVSX | BPF_W", + .u.insns_int = { + BPF_LD_IMM64(R2, 0xffffffffdeadbeefLL), + BPF_LD_IMM64(R3, 0xdeadbeefdeadbeefLL), + BPF_MOVSX64_REG(R1, R3, 32), + BPF_JMP_REG(BPF_JEQ, R2, R1, 2), + BPF_MOV32_IMM(R0, 2), + BPF_EXIT_INSN(), + BPF_MOV32_IMM(R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x1 } }, + }, /* BPF_ALU | BPF_ADD | BPF_X */ { "ALU_ADD_X: 1 + 2 = 3", @@ -6105,6 +6203,106 @@ static struct bpf_test tests[] = { { }, { { 0, 2 } }, }, + /* BPF_ALU | BPF_DIV | BPF_X off=1 (SDIV) */ + { + "ALU_SDIV_X: -6 / 2 = -3", + .u.insns_int = { + BPF_LD_IMM64(R0, -6), + BPF_ALU32_IMM(BPF_MOV, R1, 2), + BPF_ALU32_REG_OFF(BPF_DIV, R0, R1, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, -3 } }, + }, + /* BPF_ALU | BPF_DIV | BPF_K off=1 (SDIV) */ + { + "ALU_SDIV_K: -6 / 2 = -3", + .u.insns_int = { + BPF_LD_IMM64(R0, -6), + BPF_ALU32_IMM_OFF(BPF_DIV, R0, 2, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, -3 } }, + }, + /* BPF_ALU64 | BPF_DIV | BPF_X off=1 (SDIV64) */ + { + "ALU64_SDIV_X: -6 / 2 = -3", + .u.insns_int = { + BPF_LD_IMM64(R0, -6), + BPF_ALU32_IMM(BPF_MOV, R1, 2), + BPF_ALU64_REG_OFF(BPF_DIV, R0, R1, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, -3 } }, + }, + /* BPF_ALU64 | BPF_DIV | BPF_K off=1 (SDIV64) */ + { + "ALU64_SDIV_K: -6 / 2 = -3", + .u.insns_int = { + BPF_LD_IMM64(R0, -6), + BPF_ALU64_IMM_OFF(BPF_DIV, R0, 2, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, -3 } }, + }, + /* BPF_ALU | BPF_MOD | BPF_X off=1 (SMOD) */ + { + "ALU_SMOD_X: -7 % 2 = -1", + .u.insns_int = { + BPF_LD_IMM64(R0, -7), + BPF_ALU32_IMM(BPF_MOV, R1, 2), + BPF_ALU32_REG_OFF(BPF_MOD, R0, R1, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, -1 } }, + }, + /* BPF_ALU | BPF_MOD | BPF_K off=1 (SMOD) */ + { + "ALU_SMOD_K: -7 % 2 = -1", + .u.insns_int = { + BPF_LD_IMM64(R0, -7), + BPF_ALU32_IMM_OFF(BPF_MOD, R0, 2, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, -1 } }, + }, + /* BPF_ALU64 | BPF_MOD | BPF_X off=1 (SMOD64) */ + { + "ALU64_SMOD_X: -7 % 2 = -1", + .u.insns_int = { + BPF_LD_IMM64(R0, -7), + BPF_ALU32_IMM(BPF_MOV, R1, 2), + BPF_ALU64_REG_OFF(BPF_MOD, R0, R1, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, -1 } }, + }, + /* BPF_ALU64 | BPF_MOD | BPF_K off=1 (SMOD64) */ + { + "ALU64_SMOD_X: -7 % 2 = -1", + .u.insns_int = { + BPF_LD_IMM64(R0, -7), + BPF_ALU64_IMM_OFF(BPF_MOD, R0, 2, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, -1 } }, + }, /* BPF_ALU | BPF_AND | BPF_X */ { "ALU_AND_X: 3 & 2 = 2", @@ -7837,6 +8035,104 @@ static struct bpf_test tests[] = { { }, { { 0, (u32) (cpu_to_le64(0xfedcba9876543210ULL) >> 32) } }, }, + /* BSWAP */ + { + "BSWAP 16: 0x0123456789abcdef -> 0xefcd", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x0123456789abcdefLL), + BPF_BSWAP(R0, 16), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xefcd } }, + }, + { + "BSWAP 32: 0x0123456789abcdef -> 0xefcdab89", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x0123456789abcdefLL), + BPF_BSWAP(R0, 32), + BPF_ALU64_REG(BPF_MOV, R1, R0), + BPF_ALU64_IMM(BPF_RSH, R1, 32), + BPF_ALU32_REG(BPF_ADD, R0, R1), /* R1 = 0 */ + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xefcdab89 } }, + }, + { + "BSWAP 64: 0x0123456789abcdef -> 0x67452301", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x0123456789abcdefLL), + BPF_BSWAP(R0, 64), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x67452301 } }, + }, + { + "BSWAP 64: 0x0123456789abcdef >> 32 -> 0xefcdab89", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x0123456789abcdefLL), + BPF_BSWAP(R0, 64), + BPF_ALU64_IMM(BPF_RSH, R0, 32), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xefcdab89 } }, + }, + /* BSWAP, reversed */ + { + "BSWAP 16: 0xfedcba9876543210 -> 0x1032", + .u.insns_int = { + BPF_LD_IMM64(R0, 0xfedcba9876543210ULL), + BPF_BSWAP(R0, 16), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x1032 } }, + }, + { + "BSWAP 32: 0xfedcba9876543210 -> 0x10325476", + .u.insns_int = { + BPF_LD_IMM64(R0, 0xfedcba9876543210ULL), + BPF_BSWAP(R0, 32), + BPF_ALU64_REG(BPF_MOV, R1, R0), + BPF_ALU64_IMM(BPF_RSH, R1, 32), + BPF_ALU32_REG(BPF_ADD, R0, R1), /* R1 = 0 */ + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x10325476 } }, + }, + { + "BSWAP 64: 0xfedcba9876543210 -> 0x98badcfe", + .u.insns_int = { + BPF_LD_IMM64(R0, 0xfedcba9876543210ULL), + BPF_BSWAP(R0, 64), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x98badcfe } }, + }, + { + "BSWAP 64: 0xfedcba9876543210 >> 32 -> 0x10325476", + .u.insns_int = { + BPF_LD_IMM64(R0, 0xfedcba9876543210ULL), + BPF_BSWAP(R0, 64), + BPF_ALU64_IMM(BPF_RSH, R0, 32), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x10325476 } }, + }, /* BPF_LDX_MEM B/H/W/DW */ { "BPF_LDX_MEM | BPF_B, base", @@ -8228,6 +8524,67 @@ static struct bpf_test tests[] = { { { 32, 0 } }, .stack_depth = 0, }, + /* BPF_LDX_MEMSX B/H/W */ + { + "BPF_LDX_MEMSX | BPF_B", + .u.insns_int = { + BPF_LD_IMM64(R1, 0xdead0000000000f0ULL), + BPF_LD_IMM64(R2, 0xfffffffffffffff0ULL), + BPF_STX_MEM(BPF_DW, R10, R1, -8), +#ifdef __BIG_ENDIAN + BPF_LDX_MEMSX(BPF_B, R0, R10, -1), +#else + BPF_LDX_MEMSX(BPF_B, R0, R10, -8), +#endif + BPF_JMP_REG(BPF_JNE, R0, R2, 1), + BPF_ALU64_IMM(BPF_MOV, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0 } }, + .stack_depth = 8, + }, + { + "BPF_LDX_MEMSX | BPF_H", + .u.insns_int = { + BPF_LD_IMM64(R1, 0xdead00000000f123ULL), + BPF_LD_IMM64(R2, 0xfffffffffffff123ULL), + BPF_STX_MEM(BPF_DW, R10, R1, -8), +#ifdef __BIG_ENDIAN + BPF_LDX_MEMSX(BPF_H, R0, R10, -2), +#else + BPF_LDX_MEMSX(BPF_H, R0, R10, -8), +#endif + BPF_JMP_REG(BPF_JNE, R0, R2, 1), + BPF_ALU64_IMM(BPF_MOV, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0 } }, + .stack_depth = 8, + }, + { + "BPF_LDX_MEMSX | BPF_W", + .u.insns_int = { + BPF_LD_IMM64(R1, 0x00000000deadbeefULL), + BPF_LD_IMM64(R2, 0xffffffffdeadbeefULL), + BPF_STX_MEM(BPF_DW, R10, R1, -8), +#ifdef __BIG_ENDIAN + BPF_LDX_MEMSX(BPF_W, R0, R10, -4), +#else + BPF_LDX_MEMSX(BPF_W, R0, R10, -8), +#endif + BPF_JMP_REG(BPF_JNE, R0, R2, 1), + BPF_ALU64_IMM(BPF_MOV, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0 } }, + .stack_depth = 8, + }, /* BPF_STX_MEM B/H/W/DW */ { "BPF_STX_MEM | BPF_B", @@ -9474,6 +9831,20 @@ static struct bpf_test tests[] = { { }, { { 0, 1 } }, }, + /* BPF_JMP32 | BPF_JA */ + { + "JMP32_JA: Unconditional jump: if (true) return 1", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_JMP32_IMM(BPF_JA, 0, 1, 0), + BPF_EXIT_INSN(), + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, /* BPF_JMP | BPF_JSLT | BPF_K */ { "JMP_JSLT_K: Signed jump: if (-2 < -1) return 1", -- cgit v1.2.3 From 3868ab0f192581eff978501a05f3dc2e01541d77 Mon Sep 17 00:00:00 2001 From: Aananth V Date: Thu, 14 Sep 2023 14:36:21 +0000 Subject: tcp: new TCP_INFO stats for RTO events The 2023 SIGCOMM paper "Improving Network Availability with Protective ReRoute" has indicated Linux TCP's RTO-triggered txhash rehashing can effectively reduce application disruption during outages. To better measure the efficacy of this feature, this patch adds three more detailed stats during RTO recovery and exports via TCP_INFO. Applications and monitoring systems can leverage this data to measure the network path diversity and end-to-end repair latency during network outages to improve their network infrastructure. The following counters are added to tcp_sock in order to track RTO events over the lifetime of a TCP socket. 1. u16 total_rto - Counts the total number of RTO timeouts. 2. u16 total_rto_recoveries - Counts the total number of RTO recoveries. 3. u32 total_rto_time - Counts the total time spent (ms) in RTO recoveries. (time spent in CA_Loss and CA_Recovery states) To compute total_rto_time, we add a new u32 rto_stamp field to tcp_sock. rto_stamp records the start timestamp (ms) of the last RTO recovery (CA_Loss). Corresponding fields are also added to the tcp_info struct. Signed-off-by: Aananth V Signed-off-by: Neal Cardwell Signed-off-by: Yuchung Cheng Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/tcp.h | 8 ++++++++ include/uapi/linux/tcp.h | 12 ++++++++++++ net/ipv4/tcp.c | 9 +++++++++ net/ipv4/tcp_input.c | 15 +++++++++++++++ net/ipv4/tcp_minisocks.c | 4 ++++ net/ipv4/tcp_timer.c | 17 +++++++++++++++-- 6 files changed, 63 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 44d946161d4a..e15452df9804 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -377,6 +377,14 @@ struct tcp_sock { * Total data bytes retransmitted */ u32 total_retrans; /* Total retransmits for entire connection */ + u32 rto_stamp; /* Start time (ms) of last CA_Loss recovery */ + u16 total_rto; /* Total number of RTO timeouts, including + * SYN/SYN-ACK and recurring timeouts. + */ + u16 total_rto_recoveries; /* Total number of RTO recoveries, + * including any unfinished recovery. + */ + u32 total_rto_time; /* ms spent in (completed) RTO recoveries. */ u32 urg_seq; /* Seq of received urgent pointer */ unsigned int keepalive_time; /* time before keep alive takes place */ diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h index 879eeb0a084b..d1d08da6331a 100644 --- a/include/uapi/linux/tcp.h +++ b/include/uapi/linux/tcp.h @@ -289,6 +289,18 @@ struct tcp_info { */ __u32 tcpi_rehash; /* PLB or timeout triggered rehash attempts */ + + __u16 tcpi_total_rto; /* Total number of RTO timeouts, including + * SYN/SYN-ACK and recurring timeouts. + */ + __u16 tcpi_total_rto_recoveries; /* Total number of RTO + * recoveries, including any + * unfinished recovery. + */ + __u32 tcpi_total_rto_time; /* Total time spent in RTO recoveries + * in milliseconds, including any + * unfinished recovery. + */ }; /* netlink attributes types for SCM_TIMESTAMPING_OPT_STATS */ diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 0c3040a63ebd..69b8d7073708 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3818,6 +3818,15 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) info->tcpi_rcv_wnd = tp->rcv_wnd; info->tcpi_rehash = tp->plb_rehash + tp->timeout_rehash; info->tcpi_fastopen_client_fail = tp->fastopen_client_fail; + + info->tcpi_total_rto = tp->total_rto; + info->tcpi_total_rto_recoveries = tp->total_rto_recoveries; + info->tcpi_total_rto_time = tp->total_rto_time; + if (tp->rto_stamp) { + info->tcpi_total_rto_time += tcp_time_stamp_raw() - + tp->rto_stamp; + } + unlock_sock_fast(sk, slow); } EXPORT_SYMBOL_GPL(tcp_get_info); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 8d2c91703158..584825ddd0a0 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2088,6 +2088,10 @@ void tcp_clear_retrans(struct tcp_sock *tp) tp->undo_marker = 0; tp->undo_retrans = -1; tp->sacked_out = 0; + tp->rto_stamp = 0; + tp->total_rto = 0; + tp->total_rto_recoveries = 0; + tp->total_rto_time = 0; } static inline void tcp_init_undo(struct tcp_sock *tp) @@ -2825,6 +2829,14 @@ void tcp_enter_recovery(struct sock *sk, bool ece_ack) tcp_set_ca_state(sk, TCP_CA_Recovery); } +static void tcp_update_rto_time(struct tcp_sock *tp) +{ + if (tp->rto_stamp) { + tp->total_rto_time += tcp_time_stamp(tp) - tp->rto_stamp; + tp->rto_stamp = 0; + } +} + /* Process an ACK in CA_Loss state. Move to CA_Open if lost data are * recovered or spurious. Otherwise retransmits more on partial ACKs. */ @@ -3029,6 +3041,8 @@ static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una, break; case TCP_CA_Loss: tcp_process_loss(sk, flag, num_dupack, rexmit); + if (icsk->icsk_ca_state != TCP_CA_Loss) + tcp_update_rto_time(tp); tcp_identify_packet_loss(sk, ack_flag); if (!(icsk->icsk_ca_state == TCP_CA_Open || (*ack_flag & FLAG_LOST_RETRANS))) @@ -6454,6 +6468,7 @@ static void tcp_rcv_synrecv_state_fastopen(struct sock *sk) tcp_try_undo_recovery(sk); /* Reset rtx states to prevent spurious retransmits_timed_out() */ + tcp_update_rto_time(tp); tp->retrans_stamp = 0; inet_csk(sk)->icsk_retransmits = 0; diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index b98d476f1594..eee8ab1bfa0e 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -565,6 +565,10 @@ struct sock *tcp_create_openreq_child(const struct sock *sk, newtp->undo_marker = treq->snt_isn; newtp->retrans_stamp = div_u64(treq->snt_synack, USEC_PER_SEC / TCP_TS_HZ); + newtp->total_rto = req->num_timeout; + newtp->total_rto_recoveries = 1; + newtp->total_rto_time = tcp_time_stamp_raw() - + newtp->retrans_stamp; } newtp->tsoffset = treq->ts_off; #ifdef CONFIG_TCP_MD5SIG diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index d7d64682b068..3f61c6a70a1f 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -415,6 +415,19 @@ abort: tcp_write_err(sk); } } +static void tcp_update_rto_stats(struct sock *sk) +{ + struct inet_connection_sock *icsk = inet_csk(sk); + struct tcp_sock *tp = tcp_sk(sk); + + if (!icsk->icsk_retransmits) { + tp->total_rto_recoveries++; + tp->rto_stamp = tcp_time_stamp(tp); + } + icsk->icsk_retransmits++; + tp->total_rto++; +} + /* * Timer for Fast Open socket to retransmit SYNACK. Note that the * sk here is the child socket, not the parent (listener) socket. @@ -447,7 +460,7 @@ static void tcp_fastopen_synack_timer(struct sock *sk, struct request_sock *req) */ inet_rtx_syn_ack(sk, req); req->num_timeout++; - icsk->icsk_retransmits++; + tcp_update_rto_stats(sk); if (!tp->retrans_stamp) tp->retrans_stamp = tcp_time_stamp(tp); inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, @@ -575,7 +588,7 @@ void tcp_retransmit_timer(struct sock *sk) tcp_enter_loss(sk); - icsk->icsk_retransmits++; + tcp_update_rto_stats(sk); if (tcp_retransmit_skb(sk, tcp_rtx_queue_head(sk), 1) > 0) { /* Retransmission failed because of local congestion, * Let senders fight for local resources conservatively. -- cgit v1.2.3 From 9af27da6313c8f8c6a26c7ea3fe23d6b9664a3a8 Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Wed, 13 Sep 2023 01:31:58 +0200 Subject: bpf: Use bpf_is_subprog to check for subprogs We would like to know whether a bpf_prog corresponds to the main prog or one of the subprogs. The current JIT implementations simply check this using the func_idx in bpf_prog->aux->func_idx. When the index is 0, it belongs to the main program, otherwise it corresponds to some subprogram. This will also be necessary to halt exception propagation while walking the stack when an exception is thrown, so we add a simple helper function to check this, named bpf_is_subprog, and convert existing JIT implementations to also make use of it. Signed-off-by: Kumar Kartikeya Dwivedi Link: https://lore.kernel.org/r/20230912233214.1518551-2-memxor@gmail.com Signed-off-by: Alexei Starovoitov --- arch/arm64/net/bpf_jit_comp.c | 2 +- arch/s390/net/bpf_jit_comp.c | 2 +- arch/x86/net/bpf_jit_comp.c | 2 +- include/linux/bpf.h | 5 +++++ 4 files changed, 8 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index 150d1c6543f7..7d4af64e3982 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -288,7 +288,7 @@ static bool is_lsi_offset(int offset, int scale) static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf) { const struct bpf_prog *prog = ctx->prog; - const bool is_main_prog = prog->aux->func_idx == 0; + const bool is_main_prog = !bpf_is_subprog(prog); const u8 r6 = bpf2a64[BPF_REG_6]; const u8 r7 = bpf2a64[BPF_REG_7]; const u8 r8 = bpf2a64[BPF_REG_8]; diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index de2fb12120d2..eeb42e5cd7d6 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -556,7 +556,7 @@ static void bpf_jit_prologue(struct bpf_jit *jit, struct bpf_prog *fp, EMIT6_PCREL_RILC(0xc0040000, 0, jit->prologue_plt); jit->prologue_plt_ret = jit->prg; - if (fp->aux->func_idx == 0) { + if (!bpf_is_subprog(fp)) { /* Initialize the tail call counter in the main program. */ /* xc STK_OFF_TCCNT(4,%r15),STK_OFF_TCCNT(%r15) */ _EMIT6(0xd703f000 | STK_OFF_TCCNT, 0xf000 | STK_OFF_TCCNT); diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 2846c21d75bf..a0d03503b3cb 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -1049,7 +1049,7 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image emit_prologue(&prog, bpf_prog->aux->stack_depth, bpf_prog_was_classic(bpf_prog), tail_call_reachable, - bpf_prog->aux->func_idx != 0); + bpf_is_subprog(bpf_prog)); push_callee_regs(&prog, callee_regs_used); ilen = prog - temp; diff --git a/include/linux/bpf.h b/include/linux/bpf.h index b9e573159432..9171b0b6a590 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -3194,4 +3194,9 @@ static inline gfp_t bpf_memcg_flags(gfp_t flags) return flags; } +static inline bool bpf_is_subprog(const struct bpf_prog *prog) +{ + return prog->aux->func_idx != 0; +} + #endif /* _LINUX_BPF_H */ -- cgit v1.2.3 From fd5d27b70188379bb441d404c29a0afb111e1753 Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Wed, 13 Sep 2023 01:31:59 +0200 Subject: arch/x86: Implement arch_bpf_stack_walk The plumbing for offline unwinding when we throw an exception in programs would require walking the stack, hence introduce a new arch_bpf_stack_walk function. This is provided when the JIT supports exceptions, i.e. bpf_jit_supports_exceptions is true. The arch-specific code is really minimal, hence it should be straightforward to extend this support to other architectures as well, as it reuses the logic of arch_stack_walk, but allowing access to unwind_state data. Once the stack pointer and frame pointer are known for the main subprog during the unwinding, we know the stack layout and location of any callee-saved registers which must be restored before we return back to the kernel. This handling will be added in the subsequent patches. Note that while we primarily unwind through BPF frames, which are effectively CONFIG_UNWINDER_FRAME_POINTER, we still need one of this or CONFIG_UNWINDER_ORC to be able to unwind through the bpf_throw frame from which we begin walking the stack. We also require both sp and bp (stack and frame pointers) from the unwind_state structure, which are only available when one of these two options are enabled. Signed-off-by: Kumar Kartikeya Dwivedi Link: https://lore.kernel.org/r/20230912233214.1518551-3-memxor@gmail.com Signed-off-by: Alexei Starovoitov --- arch/x86/net/bpf_jit_comp.c | 28 ++++++++++++++++++++++++++++ include/linux/filter.h | 2 ++ kernel/bpf/core.c | 9 +++++++++ 3 files changed, 39 insertions(+) (limited to 'include/linux') diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index a0d03503b3cb..d0c24b5a6abb 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -16,6 +16,7 @@ #include #include #include +#include static u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len) { @@ -2933,3 +2934,30 @@ void bpf_jit_free(struct bpf_prog *prog) bpf_prog_unlock_free(prog); } + +bool bpf_jit_supports_exceptions(void) +{ + /* We unwind through both kernel frames (starting from within bpf_throw + * call) and BPF frames. Therefore we require one of ORC or FP unwinder + * to be enabled to walk kernel frames and reach BPF frames in the stack + * trace. + */ + return IS_ENABLED(CONFIG_UNWINDER_ORC) || IS_ENABLED(CONFIG_UNWINDER_FRAME_POINTER); +} + +void arch_bpf_stack_walk(bool (*consume_fn)(void *cookie, u64 ip, u64 sp, u64 bp), void *cookie) +{ +#if defined(CONFIG_UNWINDER_ORC) || defined(CONFIG_UNWINDER_FRAME_POINTER) + struct unwind_state state; + unsigned long addr; + + for (unwind_start(&state, current, NULL, NULL); !unwind_done(&state); + unwind_next_frame(&state)) { + addr = unwind_get_return_address(&state); + if (!addr || !consume_fn(cookie, (u64)addr, (u64)state.sp, (u64)state.bp)) + break; + } + return; +#endif + WARN(1, "verification of programs using bpf_throw should have failed\n"); +} diff --git a/include/linux/filter.h b/include/linux/filter.h index 0138832ad571..88874de974cb 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -954,6 +954,8 @@ bool bpf_jit_needs_zext(void); bool bpf_jit_supports_subprog_tailcalls(void); bool bpf_jit_supports_kfunc_call(void); bool bpf_jit_supports_far_kfunc_call(void); +bool bpf_jit_supports_exceptions(void); +void arch_bpf_stack_walk(bool (*consume_fn)(void *cookie, u64 ip, u64 sp, u64 bp), void *cookie); bool bpf_helper_changes_pkt_data(void *func); static inline bool bpf_dump_raw_ok(const struct cred *cred) diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 95599df82ee4..c4ac084f2767 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -2914,6 +2914,15 @@ int __weak bpf_arch_text_invalidate(void *dst, size_t len) return -ENOTSUPP; } +bool __weak bpf_jit_supports_exceptions(void) +{ + return false; +} + +void __weak arch_bpf_stack_walk(bool (*consume_fn)(void *cookie, u64 ip, u64 sp, u64 bp), void *cookie) +{ +} + #ifdef CONFIG_BPF_SYSCALL static int __init bpf_global_ma_init(void) { -- cgit v1.2.3 From 335d1c5b545284d75ef96ee42e461eacefe865bb Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Wed, 13 Sep 2023 01:32:00 +0200 Subject: bpf: Implement support for adding hidden subprogs Introduce support in the verifier for generating a subprogram and include it as part of a BPF program dynamically after the do_check phase is complete. The first user will be the next patch which generates default exception callbacks if none are set for the program. The phase of invocation will be do_misc_fixups. Note that this is an internal verifier function, and should be used with instruction blocks which uphold the invariants stated in check_subprogs. Since these subprogs are always appended to the end of the instruction sequence of the program, it becomes relatively inexpensive to do the related adjustments to the subprog_info of the program. Only the fake exit subprogram is shifted forward, making room for our new subprog. This is useful to insert a new subprogram, get it JITed, and obtain its function pointer. The next patch will use this functionality to insert a default exception callback which will be invoked after unwinding the stack. Note that these added subprograms are invisible to userspace, and never reported in BPF_OBJ_GET_INFO_BY_ID etc. For now, only a single subprogram is supported, but more can be easily supported in the future. To this end, two function counts are introduced now, the existing func_cnt, and real_func_cnt, the latter including hidden programs. This allows us to conver the JIT code to use the real_func_cnt for management of resources while syscall path continues working with existing func_cnt. Signed-off-by: Kumar Kartikeya Dwivedi Link: https://lore.kernel.org/r/20230912233214.1518551-4-memxor@gmail.com Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 1 + include/linux/bpf_verifier.h | 3 ++- kernel/bpf/core.c | 12 ++++++------ kernel/bpf/syscall.c | 2 +- kernel/bpf/verifier.c | 36 +++++++++++++++++++++++++++++++++--- 5 files changed, 43 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 9171b0b6a590..c3667e95af59 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1389,6 +1389,7 @@ struct bpf_prog_aux { u32 stack_depth; u32 id; u32 func_cnt; /* used by non-func prog as the number of func progs */ + u32 real_func_cnt; /* includes hidden progs, only used for JIT and freeing progs */ u32 func_idx; /* 0 for non-func prog, the index in func array for func prog */ u32 attach_btf_id; /* in-kernel BTF type id to attach to */ u32 ctx_arg_info_size; diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index a3236651ec64..3c2a8636ab29 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -588,6 +588,7 @@ struct bpf_verifier_env { u32 used_map_cnt; /* number of used maps */ u32 used_btf_cnt; /* number of used BTF objects */ u32 id_gen; /* used to generate unique reg IDs */ + u32 hidden_subprog_cnt; /* number of hidden subprogs */ bool explore_alu_limits; bool allow_ptr_leaks; bool allow_uninit_stack; @@ -598,7 +599,7 @@ struct bpf_verifier_env { struct bpf_insn_aux_data *insn_aux_data; /* array of per-insn state */ const struct bpf_line_info *prev_linfo; struct bpf_verifier_log log; - struct bpf_subprog_info subprog_info[BPF_MAX_SUBPROGS + 1]; + struct bpf_subprog_info subprog_info[BPF_MAX_SUBPROGS + 2]; /* max + 2 for the fake and exception subprogs */ union { struct bpf_idmap idmap_scratch; struct bpf_idset idset_scratch; diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index c4ac084f2767..840ba952702d 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -212,7 +212,7 @@ void bpf_prog_fill_jited_linfo(struct bpf_prog *prog, const struct bpf_line_info *linfo; void **jited_linfo; - if (!prog->aux->jited_linfo) + if (!prog->aux->jited_linfo || prog->aux->func_idx > prog->aux->func_cnt) /* Userspace did not provide linfo */ return; @@ -539,7 +539,7 @@ static void bpf_prog_kallsyms_del_subprogs(struct bpf_prog *fp) { int i; - for (i = 0; i < fp->aux->func_cnt; i++) + for (i = 0; i < fp->aux->real_func_cnt; i++) bpf_prog_kallsyms_del(fp->aux->func[i]); } @@ -589,7 +589,7 @@ bpf_prog_ksym_set_name(struct bpf_prog *prog) sym = bin2hex(sym, prog->tag, sizeof(prog->tag)); /* prog->aux->name will be ignored if full btf name is available */ - if (prog->aux->func_info_cnt) { + if (prog->aux->func_info_cnt && prog->aux->func_idx < prog->aux->func_info_cnt) { type = btf_type_by_id(prog->aux->btf, prog->aux->func_info[prog->aux->func_idx].type_id); func_name = btf_name_by_offset(prog->aux->btf, type->name_off); @@ -1208,7 +1208,7 @@ int bpf_jit_get_func_addr(const struct bpf_prog *prog, if (!extra_pass) addr = NULL; else if (prog->aux->func && - off >= 0 && off < prog->aux->func_cnt) + off >= 0 && off < prog->aux->real_func_cnt) addr = (u8 *)prog->aux->func[off]->bpf_func; else return -EINVAL; @@ -2721,7 +2721,7 @@ static void bpf_prog_free_deferred(struct work_struct *work) #endif if (aux->dst_trampoline) bpf_trampoline_put(aux->dst_trampoline); - for (i = 0; i < aux->func_cnt; i++) { + for (i = 0; i < aux->real_func_cnt; i++) { /* We can just unlink the subprog poke descriptor table as * it was originally linked to the main program and is also * released along with it. @@ -2729,7 +2729,7 @@ static void bpf_prog_free_deferred(struct work_struct *work) aux->func[i]->aux->poke_tab = NULL; bpf_jit_free(aux->func[i]); } - if (aux->func_cnt) { + if (aux->real_func_cnt) { kfree(aux->func); bpf_prog_unlock_free(aux->prog); } else { diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 6a692f3bea15..85c1d908f70f 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -2749,7 +2749,7 @@ free_used_maps: * period before we can tear down JIT memory since symbols * are already exposed under kallsyms. */ - __bpf_prog_put_noref(prog, prog->aux->func_cnt); + __bpf_prog_put_noref(prog, prog->aux->real_func_cnt); return err; free_prog_sec: free_uid(prog->aux->user); diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 18e673c0ac15..39548e326d53 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -15210,7 +15210,8 @@ static void adjust_btf_func(struct bpf_verifier_env *env) if (!aux->func_info) return; - for (i = 0; i < env->subprog_cnt; i++) + /* func_info is not available for hidden subprogs */ + for (i = 0; i < env->subprog_cnt - env->hidden_subprog_cnt; i++) aux->func_info[i].insn_off = env->subprog_info[i].start; } @@ -18151,7 +18152,8 @@ static int jit_subprogs(struct bpf_verifier_env *env) * the call instruction, as an index for this list */ func[i]->aux->func = func; - func[i]->aux->func_cnt = env->subprog_cnt; + func[i]->aux->func_cnt = env->subprog_cnt - env->hidden_subprog_cnt; + func[i]->aux->real_func_cnt = env->subprog_cnt; } for (i = 0; i < env->subprog_cnt; i++) { old_bpf_func = func[i]->bpf_func; @@ -18197,7 +18199,8 @@ static int jit_subprogs(struct bpf_verifier_env *env) prog->aux->extable = func[0]->aux->extable; prog->aux->num_exentries = func[0]->aux->num_exentries; prog->aux->func = func; - prog->aux->func_cnt = env->subprog_cnt; + prog->aux->func_cnt = env->subprog_cnt - env->hidden_subprog_cnt; + prog->aux->real_func_cnt = env->subprog_cnt; bpf_prog_jit_attempt_done(prog); return 0; out_free: @@ -18433,6 +18436,33 @@ static int fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, return 0; } +/* The function requires that first instruction in 'patch' is insnsi[prog->len - 1] */ +static __maybe_unused int add_hidden_subprog(struct bpf_verifier_env *env, struct bpf_insn *patch, int len) +{ + struct bpf_subprog_info *info = env->subprog_info; + int cnt = env->subprog_cnt; + struct bpf_prog *prog; + + /* We only reserve one slot for hidden subprogs in subprog_info. */ + if (env->hidden_subprog_cnt) { + verbose(env, "verifier internal error: only one hidden subprog supported\n"); + return -EFAULT; + } + /* We're not patching any existing instruction, just appending the new + * ones for the hidden subprog. Hence all of the adjustment operations + * in bpf_patch_insn_data are no-ops. + */ + prog = bpf_patch_insn_data(env, env->prog->len - 1, patch, len); + if (!prog) + return -ENOMEM; + env->prog = prog; + info[cnt + 1].start = info[cnt].start; + info[cnt].start = prog->len - len + 1; + env->subprog_cnt++; + env->hidden_subprog_cnt++; + return 0; +} + /* Do various post-verification rewrites in a single program pass. * These rewrites simplify JIT and interpreter implementations. */ -- cgit v1.2.3 From f18b03fabaa9b7c80e80b72a621f481f0d706ae0 Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Wed, 13 Sep 2023 01:32:01 +0200 Subject: bpf: Implement BPF exceptions This patch implements BPF exceptions, and introduces a bpf_throw kfunc to allow programs to throw exceptions during their execution at runtime. A bpf_throw invocation is treated as an immediate termination of the program, returning back to its caller within the kernel, unwinding all stack frames. This allows the program to simplify its implementation, by testing for runtime conditions which the verifier has no visibility into, and assert that they are true. In case they are not, the program can simply throw an exception from the other branch. BPF exceptions are explicitly *NOT* an unlikely slowpath error handling primitive, and this objective has guided design choices of the implementation of the them within the kernel (with the bulk of the cost for unwinding the stack offloaded to the bpf_throw kfunc). The implementation of this mechanism requires use of add_hidden_subprog mechanism introduced in the previous patch, which generates a couple of instructions to move R1 to R0 and exit. The JIT then rewrites the prologue of this subprog to take the stack pointer and frame pointer as inputs and reset the stack frame, popping all callee-saved registers saved by the main subprog. The bpf_throw function then walks the stack at runtime, and invokes this exception subprog with the stack and frame pointers as parameters. Reviewers must take note that currently the main program is made to save all callee-saved registers on x86_64 during entry into the program. This is because we must do an equivalent of a lightweight context switch when unwinding the stack, therefore we need the callee-saved registers of the caller of the BPF program to be able to return with a sane state. Note that we have to additionally handle r12, even though it is not used by the program, because when throwing the exception the program makes an entry into the kernel which could clobber r12 after saving it on the stack. To be able to preserve the value we received on program entry, we push r12 and restore it from the generated subprogram when unwinding the stack. For now, bpf_throw invocation fails when lingering resources or locks exist in that path of the program. In a future followup, bpf_throw will be extended to perform frame-by-frame unwinding to release lingering resources for each stack frame, removing this limitation. Signed-off-by: Kumar Kartikeya Dwivedi Link: https://lore.kernel.org/r/20230912233214.1518551-5-memxor@gmail.com Signed-off-by: Alexei Starovoitov --- arch/x86/net/bpf_jit_comp.c | 89 ++++++++++++++++--- include/linux/bpf.h | 3 + include/linux/bpf_verifier.h | 4 + include/linux/filter.h | 6 ++ kernel/bpf/core.c | 2 +- kernel/bpf/helpers.c | 38 ++++++++ kernel/bpf/verifier.c | 116 ++++++++++++++++++++++--- tools/testing/selftests/bpf/bpf_experimental.h | 16 ++++ 8 files changed, 247 insertions(+), 27 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index d0c24b5a6abb..84005f2114e0 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -18,6 +18,8 @@ #include #include +static bool all_callee_regs_used[4] = {true, true, true, true}; + static u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len) { if (len == 1) @@ -256,6 +258,14 @@ struct jit_context { /* Number of bytes that will be skipped on tailcall */ #define X86_TAIL_CALL_OFFSET (11 + ENDBR_INSN_SIZE) +static void push_r12(u8 **pprog) +{ + u8 *prog = *pprog; + + EMIT2(0x41, 0x54); /* push r12 */ + *pprog = prog; +} + static void push_callee_regs(u8 **pprog, bool *callee_regs_used) { u8 *prog = *pprog; @@ -271,6 +281,14 @@ static void push_callee_regs(u8 **pprog, bool *callee_regs_used) *pprog = prog; } +static void pop_r12(u8 **pprog) +{ + u8 *prog = *pprog; + + EMIT2(0x41, 0x5C); /* pop r12 */ + *pprog = prog; +} + static void pop_callee_regs(u8 **pprog, bool *callee_regs_used) { u8 *prog = *pprog; @@ -292,7 +310,8 @@ static void pop_callee_regs(u8 **pprog, bool *callee_regs_used) * while jumping to another program */ static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf, - bool tail_call_reachable, bool is_subprog) + bool tail_call_reachable, bool is_subprog, + bool is_exception_cb) { u8 *prog = *pprog; @@ -312,8 +331,22 @@ static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf, /* Keep the same instruction layout. */ EMIT2(0x66, 0x90); /* nop2 */ } - EMIT1(0x55); /* push rbp */ - EMIT3(0x48, 0x89, 0xE5); /* mov rbp, rsp */ + /* Exception callback receives FP as third parameter */ + if (is_exception_cb) { + EMIT3(0x48, 0x89, 0xF4); /* mov rsp, rsi */ + EMIT3(0x48, 0x89, 0xD5); /* mov rbp, rdx */ + /* The main frame must have exception_boundary as true, so we + * first restore those callee-saved regs from stack, before + * reusing the stack frame. + */ + pop_callee_regs(&prog, all_callee_regs_used); + pop_r12(&prog); + /* Reset the stack frame. */ + EMIT3(0x48, 0x89, 0xEC); /* mov rsp, rbp */ + } else { + EMIT1(0x55); /* push rbp */ + EMIT3(0x48, 0x89, 0xE5); /* mov rbp, rsp */ + } /* X86_TAIL_CALL_OFFSET is here */ EMIT_ENDBR(); @@ -472,7 +505,8 @@ static void emit_return(u8 **pprog, u8 *ip) * goto *(prog->bpf_func + prologue_size); * out: */ -static void emit_bpf_tail_call_indirect(u8 **pprog, bool *callee_regs_used, +static void emit_bpf_tail_call_indirect(struct bpf_prog *bpf_prog, + u8 **pprog, bool *callee_regs_used, u32 stack_depth, u8 *ip, struct jit_context *ctx) { @@ -522,7 +556,12 @@ static void emit_bpf_tail_call_indirect(u8 **pprog, bool *callee_regs_used, offset = ctx->tail_call_indirect_label - (prog + 2 - start); EMIT2(X86_JE, offset); /* je out */ - pop_callee_regs(&prog, callee_regs_used); + if (bpf_prog->aux->exception_boundary) { + pop_callee_regs(&prog, all_callee_regs_used); + pop_r12(&prog); + } else { + pop_callee_regs(&prog, callee_regs_used); + } EMIT1(0x58); /* pop rax */ if (stack_depth) @@ -546,7 +585,8 @@ static void emit_bpf_tail_call_indirect(u8 **pprog, bool *callee_regs_used, *pprog = prog; } -static void emit_bpf_tail_call_direct(struct bpf_jit_poke_descriptor *poke, +static void emit_bpf_tail_call_direct(struct bpf_prog *bpf_prog, + struct bpf_jit_poke_descriptor *poke, u8 **pprog, u8 *ip, bool *callee_regs_used, u32 stack_depth, struct jit_context *ctx) @@ -575,7 +615,13 @@ static void emit_bpf_tail_call_direct(struct bpf_jit_poke_descriptor *poke, emit_jump(&prog, (u8 *)poke->tailcall_target + X86_PATCH_SIZE, poke->tailcall_bypass); - pop_callee_regs(&prog, callee_regs_used); + if (bpf_prog->aux->exception_boundary) { + pop_callee_regs(&prog, all_callee_regs_used); + pop_r12(&prog); + } else { + pop_callee_regs(&prog, callee_regs_used); + } + EMIT1(0x58); /* pop rax */ if (stack_depth) EMIT3_off32(0x48, 0x81, 0xC4, round_up(stack_depth, 8)); @@ -1050,8 +1096,20 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image emit_prologue(&prog, bpf_prog->aux->stack_depth, bpf_prog_was_classic(bpf_prog), tail_call_reachable, - bpf_is_subprog(bpf_prog)); - push_callee_regs(&prog, callee_regs_used); + bpf_is_subprog(bpf_prog), bpf_prog->aux->exception_cb); + /* Exception callback will clobber callee regs for its own use, and + * restore the original callee regs from main prog's stack frame. + */ + if (bpf_prog->aux->exception_boundary) { + /* We also need to save r12, which is not mapped to any BPF + * register, as we throw after entry into the kernel, which may + * overwrite r12. + */ + push_r12(&prog); + push_callee_regs(&prog, all_callee_regs_used); + } else { + push_callee_regs(&prog, callee_regs_used); + } ilen = prog - temp; if (rw_image) @@ -1648,13 +1706,15 @@ st: if (is_imm8(insn->off)) case BPF_JMP | BPF_TAIL_CALL: if (imm32) - emit_bpf_tail_call_direct(&bpf_prog->aux->poke_tab[imm32 - 1], + emit_bpf_tail_call_direct(bpf_prog, + &bpf_prog->aux->poke_tab[imm32 - 1], &prog, image + addrs[i - 1], callee_regs_used, bpf_prog->aux->stack_depth, ctx); else - emit_bpf_tail_call_indirect(&prog, + emit_bpf_tail_call_indirect(bpf_prog, + &prog, callee_regs_used, bpf_prog->aux->stack_depth, image + addrs[i - 1], @@ -1907,7 +1967,12 @@ emit_jmp: seen_exit = true; /* Update cleanup_addr */ ctx->cleanup_addr = proglen; - pop_callee_regs(&prog, callee_regs_used); + if (bpf_prog->aux->exception_boundary) { + pop_callee_regs(&prog, all_callee_regs_used); + pop_r12(&prog); + } else { + pop_callee_regs(&prog, callee_regs_used); + } EMIT1(0xC9); /* leave */ emit_return(&prog, image + addrs[i - 1] + (prog - temp)); break; diff --git a/include/linux/bpf.h b/include/linux/bpf.h index c3667e95af59..16740ee82082 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1410,6 +1410,8 @@ struct bpf_prog_aux { bool sleepable; bool tail_call_reachable; bool xdp_has_frags; + bool exception_cb; + bool exception_boundary; /* BTF_KIND_FUNC_PROTO for valid attach_btf_id */ const struct btf_type *attach_func_proto; /* function name for valid attach_btf_id */ @@ -1432,6 +1434,7 @@ struct bpf_prog_aux { int cgroup_atype; /* enum cgroup_bpf_attach_type */ struct bpf_map *cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE]; char name[BPF_OBJ_NAME_LEN]; + unsigned int (*bpf_exception_cb)(u64 cookie, u64 sp, u64 bp); #ifdef CONFIG_SECURITY void *security; #endif diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 3c2a8636ab29..da21a3ec5027 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -541,7 +541,9 @@ struct bpf_subprog_info { bool has_tail_call; bool tail_call_reachable; bool has_ld_abs; + bool is_cb; bool is_async_cb; + bool is_exception_cb; }; struct bpf_verifier_env; @@ -589,6 +591,7 @@ struct bpf_verifier_env { u32 used_btf_cnt; /* number of used BTF objects */ u32 id_gen; /* used to generate unique reg IDs */ u32 hidden_subprog_cnt; /* number of hidden subprogs */ + int exception_callback_subprog; bool explore_alu_limits; bool allow_ptr_leaks; bool allow_uninit_stack; @@ -596,6 +599,7 @@ struct bpf_verifier_env { bool bypass_spec_v1; bool bypass_spec_v4; bool seen_direct_write; + bool seen_exception; struct bpf_insn_aux_data *insn_aux_data; /* array of per-insn state */ const struct bpf_line_info *prev_linfo; struct bpf_verifier_log log; diff --git a/include/linux/filter.h b/include/linux/filter.h index 88874de974cb..27406aee2d40 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -1171,6 +1171,7 @@ const char *__bpf_address_lookup(unsigned long addr, unsigned long *size, bool is_bpf_text_address(unsigned long addr); int bpf_get_kallsym(unsigned int symnum, unsigned long *value, char *type, char *sym); +struct bpf_prog *bpf_prog_ksym_find(unsigned long addr); static inline const char * bpf_address_lookup(unsigned long addr, unsigned long *size, @@ -1238,6 +1239,11 @@ static inline int bpf_get_kallsym(unsigned int symnum, unsigned long *value, return -ERANGE; } +static inline struct bpf_prog *bpf_prog_ksym_find(unsigned long addr) +{ + return NULL; +} + static inline const char * bpf_address_lookup(unsigned long addr, unsigned long *size, unsigned long *off, char **modname, char *sym) diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 840ba952702d..7849b9cca749 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -733,7 +733,7 @@ bool is_bpf_text_address(unsigned long addr) return ret; } -static struct bpf_prog *bpf_prog_ksym_find(unsigned long addr) +struct bpf_prog *bpf_prog_ksym_find(unsigned long addr) { struct bpf_ksym *ksym = bpf_ksym_find(addr); diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index b0a9834f1051..78e8f4de6750 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -2449,6 +2449,43 @@ __bpf_kfunc void bpf_rcu_read_unlock(void) rcu_read_unlock(); } +struct bpf_throw_ctx { + struct bpf_prog_aux *aux; + u64 sp; + u64 bp; + int cnt; +}; + +static bool bpf_stack_walker(void *cookie, u64 ip, u64 sp, u64 bp) +{ + struct bpf_throw_ctx *ctx = cookie; + struct bpf_prog *prog; + + if (!is_bpf_text_address(ip)) + return !ctx->cnt; + prog = bpf_prog_ksym_find(ip); + ctx->cnt++; + if (bpf_is_subprog(prog)) + return true; + ctx->aux = prog->aux; + ctx->sp = sp; + ctx->bp = bp; + return false; +} + +__bpf_kfunc void bpf_throw(u64 cookie) +{ + struct bpf_throw_ctx ctx = {}; + + arch_bpf_stack_walk(bpf_stack_walker, &ctx); + WARN_ON_ONCE(!ctx.aux); + if (ctx.aux) + WARN_ON_ONCE(!ctx.aux->exception_boundary); + WARN_ON_ONCE(!ctx.bp); + WARN_ON_ONCE(!ctx.cnt); + ctx.aux->bpf_exception_cb(cookie, ctx.sp, ctx.bp); +} + __diag_pop(); BTF_SET8_START(generic_btf_ids) @@ -2478,6 +2515,7 @@ BTF_ID_FLAGS(func, bpf_cgroup_from_id, KF_ACQUIRE | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_task_under_cgroup, KF_RCU) #endif BTF_ID_FLAGS(func, bpf_task_from_pid, KF_ACQUIRE | KF_RET_NULL) +BTF_ID_FLAGS(func, bpf_throw) BTF_SET8_END(generic_btf_ids) static const struct btf_kfunc_id_set generic_kfunc_set = { diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 39548e326d53..9baa6f187b38 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -543,6 +543,7 @@ static bool is_dynptr_ref_function(enum bpf_func_id func_id) } static bool is_callback_calling_kfunc(u32 btf_id); +static bool is_bpf_throw_kfunc(struct bpf_insn *insn); static bool is_callback_calling_function(enum bpf_func_id func_id) { @@ -1748,7 +1749,9 @@ static int copy_verifier_state(struct bpf_verifier_state *dst_state, return -ENOMEM; dst_state->jmp_history_cnt = src->jmp_history_cnt; - /* if dst has more stack frames then src frame, free them */ + /* if dst has more stack frames then src frame, free them, this is also + * necessary in case of exceptional exits using bpf_throw. + */ for (i = src->curframe + 1; i <= dst_state->curframe; i++) { free_func_state(dst_state->frame[i]); dst_state->frame[i] = NULL; @@ -2868,7 +2871,7 @@ next: if (i == subprog_end - 1) { /* to avoid fall-through from one subprog into another * the last insn of the subprog should be either exit - * or unconditional jump back + * or unconditional jump back or bpf_throw call */ if (code != (BPF_JMP | BPF_EXIT) && code != (BPF_JMP32 | BPF_JA) && @@ -5661,6 +5664,27 @@ continue_func: for (; i < subprog_end; i++) { int next_insn, sidx; + if (bpf_pseudo_kfunc_call(insn + i) && !insn[i].off) { + bool err = false; + + if (!is_bpf_throw_kfunc(insn + i)) + continue; + if (subprog[idx].is_cb) + err = true; + for (int c = 0; c < frame && !err; c++) { + if (subprog[ret_prog[c]].is_cb) { + err = true; + break; + } + } + if (!err) + continue; + verbose(env, + "bpf_throw kfunc (insn %d) cannot be called from callback subprog %d\n", + i, idx); + return -EINVAL; + } + if (!bpf_pseudo_call(insn + i) && !bpf_pseudo_func(insn + i)) continue; /* remember insn and function to return to */ @@ -8919,6 +8943,7 @@ static int __check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn * callbacks */ if (set_callee_state_cb != set_callee_state) { + env->subprog_info[subprog].is_cb = true; if (bpf_pseudo_kfunc_call(insn) && !is_callback_calling_kfunc(insn->imm)) { verbose(env, "verifier bug: kfunc %s#%d not marked as callback-calling\n", @@ -9308,7 +9333,8 @@ static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx) verbose(env, "to caller at %d:\n", *insn_idx); print_verifier_state(env, caller, true); } - /* clear everything in the callee */ + /* clear everything in the callee. In case of exceptional exits using + * bpf_throw, this will be done by copy_verifier_state for extra frames. */ free_func_state(callee); state->frame[state->curframe--] = NULL; return 0; @@ -9432,17 +9458,17 @@ record_func_key(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta, return 0; } -static int check_reference_leak(struct bpf_verifier_env *env) +static int check_reference_leak(struct bpf_verifier_env *env, bool exception_exit) { struct bpf_func_state *state = cur_func(env); bool refs_lingering = false; int i; - if (state->frameno && !state->in_callback_fn) + if (!exception_exit && state->frameno && !state->in_callback_fn) return 0; for (i = 0; i < state->acquired_refs; i++) { - if (state->in_callback_fn && state->refs[i].callback_ref != state->frameno) + if (!exception_exit && state->in_callback_fn && state->refs[i].callback_ref != state->frameno) continue; verbose(env, "Unreleased reference id=%d alloc_insn=%d\n", state->refs[i].id, state->refs[i].insn_idx); @@ -9697,7 +9723,7 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn switch (func_id) { case BPF_FUNC_tail_call: - err = check_reference_leak(env); + err = check_reference_leak(env, false); if (err) { verbose(env, "tail_call would lead to reference leak\n"); return err; @@ -10332,6 +10358,7 @@ enum special_kfunc_type { KF_bpf_dynptr_clone, KF_bpf_percpu_obj_new_impl, KF_bpf_percpu_obj_drop_impl, + KF_bpf_throw, }; BTF_SET_START(special_kfunc_set) @@ -10354,6 +10381,7 @@ BTF_ID(func, bpf_dynptr_slice_rdwr) BTF_ID(func, bpf_dynptr_clone) BTF_ID(func, bpf_percpu_obj_new_impl) BTF_ID(func, bpf_percpu_obj_drop_impl) +BTF_ID(func, bpf_throw) BTF_SET_END(special_kfunc_set) BTF_ID_LIST(special_kfunc_list) @@ -10378,6 +10406,7 @@ BTF_ID(func, bpf_dynptr_slice_rdwr) BTF_ID(func, bpf_dynptr_clone) BTF_ID(func, bpf_percpu_obj_new_impl) BTF_ID(func, bpf_percpu_obj_drop_impl) +BTF_ID(func, bpf_throw) static bool is_kfunc_ret_null(struct bpf_kfunc_call_arg_meta *meta) { @@ -10695,6 +10724,12 @@ static bool is_callback_calling_kfunc(u32 btf_id) return btf_id == special_kfunc_list[KF_bpf_rbtree_add_impl]; } +static bool is_bpf_throw_kfunc(struct bpf_insn *insn) +{ + return bpf_pseudo_kfunc_call(insn) && insn->off == 0 && + insn->imm == special_kfunc_list[KF_bpf_throw]; +} + static bool is_rbtree_lock_required_kfunc(u32 btf_id) { return is_bpf_rbtree_api_kfunc(btf_id); @@ -11480,6 +11515,15 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, } } + if (meta.func_id == special_kfunc_list[KF_bpf_throw]) { + if (!bpf_jit_supports_exceptions()) { + verbose(env, "JIT does not support calling kfunc %s#%d\n", + func_name, meta.func_id); + return -ENOTSUPP; + } + env->seen_exception = true; + } + for (i = 0; i < CALLER_SAVED_REGS; i++) mark_reg_not_init(env, regs, caller_saved[i]); @@ -14525,7 +14569,7 @@ static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn) * gen_ld_abs() may terminate the program at runtime, leading to * reference leak. */ - err = check_reference_leak(env); + err = check_reference_leak(env, false); if (err) { verbose(env, "BPF_LD_[ABS|IND] cannot be mixed with socket references\n"); return err; @@ -16539,6 +16583,7 @@ static int do_check(struct bpf_verifier_env *env) int prev_insn_idx = -1; for (;;) { + bool exception_exit = false; struct bpf_insn *insn; u8 class; int err; @@ -16753,12 +16798,17 @@ static int do_check(struct bpf_verifier_env *env) return -EINVAL; } } - if (insn->src_reg == BPF_PSEUDO_CALL) + if (insn->src_reg == BPF_PSEUDO_CALL) { err = check_func_call(env, insn, &env->insn_idx); - else if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) + } else if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) { err = check_kfunc_call(env, insn, &env->insn_idx); - else + if (!err && is_bpf_throw_kfunc(insn)) { + exception_exit = true; + goto process_bpf_exit_full; + } + } else { err = check_helper_call(env, insn, &env->insn_idx); + } if (err) return err; @@ -16788,7 +16838,7 @@ static int do_check(struct bpf_verifier_env *env) verbose(env, "BPF_EXIT uses reserved fields\n"); return -EINVAL; } - +process_bpf_exit_full: if (env->cur_state->active_lock.ptr && !in_rbtree_lock_required_cb(env)) { verbose(env, "bpf_spin_unlock is missing\n"); @@ -16807,10 +16857,23 @@ static int do_check(struct bpf_verifier_env *env) * function, for which reference_state must * match caller reference state when it exits. */ - err = check_reference_leak(env); + err = check_reference_leak(env, exception_exit); if (err) return err; + /* The side effect of the prepare_func_exit + * which is being skipped is that it frees + * bpf_func_state. Typically, process_bpf_exit + * will only be hit with outermost exit. + * copy_verifier_state in pop_stack will handle + * freeing of any extra bpf_func_state left over + * from not processing all nested function + * exits. We also skip return code checks as + * they are not needed for exceptional exits. + */ + if (exception_exit) + goto process_bpf_exit; + if (state->curframe) { /* exit from nested function */ err = prepare_func_exit(env, &env->insn_idx); @@ -18113,6 +18176,9 @@ static int jit_subprogs(struct bpf_verifier_env *env) } func[i]->aux->num_exentries = num_exentries; func[i]->aux->tail_call_reachable = env->subprog_info[i].tail_call_reachable; + func[i]->aux->exception_cb = env->subprog_info[i].is_exception_cb; + if (!i) + func[i]->aux->exception_boundary = env->seen_exception; func[i] = bpf_int_jit_compile(func[i]); if (!func[i]->jited) { err = -ENOTSUPP; @@ -18201,6 +18267,8 @@ static int jit_subprogs(struct bpf_verifier_env *env) prog->aux->func = func; prog->aux->func_cnt = env->subprog_cnt - env->hidden_subprog_cnt; prog->aux->real_func_cnt = env->subprog_cnt; + prog->aux->bpf_exception_cb = (void *)func[env->exception_callback_subprog]->bpf_func; + prog->aux->exception_boundary = func[0]->aux->exception_boundary; bpf_prog_jit_attempt_done(prog); return 0; out_free: @@ -18437,7 +18505,7 @@ static int fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, } /* The function requires that first instruction in 'patch' is insnsi[prog->len - 1] */ -static __maybe_unused int add_hidden_subprog(struct bpf_verifier_env *env, struct bpf_insn *patch, int len) +static int add_hidden_subprog(struct bpf_verifier_env *env, struct bpf_insn *patch, int len) { struct bpf_subprog_info *info = env->subprog_info; int cnt = env->subprog_cnt; @@ -18481,6 +18549,26 @@ static int do_misc_fixups(struct bpf_verifier_env *env) struct bpf_map *map_ptr; int i, ret, cnt, delta = 0; + if (env->seen_exception && !env->exception_callback_subprog) { + struct bpf_insn patch[] = { + env->prog->insnsi[insn_cnt - 1], + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }; + + ret = add_hidden_subprog(env, patch, ARRAY_SIZE(patch)); + if (ret < 0) + return ret; + prog = env->prog; + insn = prog->insnsi; + + env->exception_callback_subprog = env->subprog_cnt - 1; + /* Don't update insn_cnt, as add_hidden_subprog always appends insns */ + env->subprog_info[env->exception_callback_subprog].is_cb = true; + env->subprog_info[env->exception_callback_subprog].is_async_cb = true; + env->subprog_info[env->exception_callback_subprog].is_exception_cb = true; + } + for (i = 0; i < insn_cnt; i++, insn++) { /* Make divide-by-zero exceptions impossible. */ if (insn->code == (BPF_ALU64 | BPF_MOD | BPF_X) || diff --git a/tools/testing/selftests/bpf/bpf_experimental.h b/tools/testing/selftests/bpf/bpf_experimental.h index 4494eaa9937e..333b54a86e3a 100644 --- a/tools/testing/selftests/bpf/bpf_experimental.h +++ b/tools/testing/selftests/bpf/bpf_experimental.h @@ -162,4 +162,20 @@ extern void bpf_percpu_obj_drop_impl(void *kptr, void *meta) __ksym; /* Convenience macro to wrap over bpf_obj_drop_impl */ #define bpf_percpu_obj_drop(kptr) bpf_percpu_obj_drop_impl(kptr, NULL) +/* Description + * Throw a BPF exception from the program, immediately terminating its + * execution and unwinding the stack. The supplied 'cookie' parameter + * will be the return value of the program when an exception is thrown. + * + * Note that throwing an exception with lingering resources (locks, + * references, etc.) will lead to a verification error. + * + * Note that callbacks *cannot* call this helper. + * Returns + * Never. + * Throws + * An exception with the specified 'cookie' value. + */ +extern void bpf_throw(u64 cookie) __ksym; + #endif -- cgit v1.2.3 From b9ae0c9dd0aca79bffc17be51c2dc148d1f72708 Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Wed, 13 Sep 2023 01:32:03 +0200 Subject: bpf: Add support for custom exception callbacks By default, the subprog generated by the verifier to handle a thrown exception hardcodes a return value of 0. To allow user-defined logic and modification of the return value when an exception is thrown, introduce the 'exception_callback:' declaration tag, which marks a callback as the default exception handler for the program. The format of the declaration tag is 'exception_callback:', where is the name of the exception callback. Each main program can be tagged using this BTF declaratiion tag to associate it with an exception callback. In case the tag is absent, the default callback is used. As such, the exception callback cannot be modified at runtime, only set during verification. Allowing modification of the callback for the current program execution at runtime leads to issues when the programs begin to nest, as any per-CPU state maintaing this information will have to be saved and restored. We don't want it to stay in bpf_prog_aux as this takes a global effect for all programs. An alternative solution is spilling the callback pointer at a known location on the program stack on entry, and then passing this location to bpf_throw as a parameter. However, since exceptions are geared more towards a use case where they are ideally never invoked, optimizing for this use case and adding to the complexity has diminishing returns. Signed-off-by: Kumar Kartikeya Dwivedi Link: https://lore.kernel.org/r/20230912233214.1518551-7-memxor@gmail.com Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 4 +- include/linux/bpf_verifier.h | 1 + kernel/bpf/btf.c | 29 +++++-- kernel/bpf/verifier.c | 113 +++++++++++++++++++++++-- tools/testing/selftests/bpf/bpf_experimental.h | 31 ++++++- 5 files changed, 160 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 16740ee82082..30063a760b5a 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -2422,9 +2422,11 @@ int btf_check_subprog_arg_match(struct bpf_verifier_env *env, int subprog, int btf_check_subprog_call(struct bpf_verifier_env *env, int subprog, struct bpf_reg_state *regs); int btf_prepare_func_args(struct bpf_verifier_env *env, int subprog, - struct bpf_reg_state *reg); + struct bpf_reg_state *reg, bool is_ex_cb); int btf_check_type_match(struct bpf_verifier_log *log, const struct bpf_prog *prog, struct btf *btf, const struct btf_type *t); +const char *btf_find_decl_tag_value(const struct btf *btf, const struct btf_type *pt, + int comp_idx, const char *tag_key); struct bpf_prog *bpf_prog_by_id(u32 id); struct bpf_link *bpf_link_by_id(u32 id); diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index da21a3ec5027..94ec766432f5 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -300,6 +300,7 @@ struct bpf_func_state { bool in_callback_fn; struct tnum callback_ret_range; bool in_async_callback_fn; + bool in_exception_callback_fn; /* The following fields should be last. See copy_func_state() */ int acquired_refs; diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 187b57276fec..f93e835d90af 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -3310,10 +3310,10 @@ static int btf_find_kptr(const struct btf *btf, const struct btf_type *t, return BTF_FIELD_FOUND; } -static const char *btf_find_decl_tag_value(const struct btf *btf, - const struct btf_type *pt, - int comp_idx, const char *tag_key) +const char *btf_find_decl_tag_value(const struct btf *btf, const struct btf_type *pt, + int comp_idx, const char *tag_key) { + const char *value = NULL; int i; for (i = 1; i < btf_nr_types(btf); i++) { @@ -3327,9 +3327,14 @@ static const char *btf_find_decl_tag_value(const struct btf *btf, continue; if (strncmp(__btf_name_by_offset(btf, t->name_off), tag_key, len)) continue; - return __btf_name_by_offset(btf, t->name_off) + len; + /* Prevent duplicate entries for same type */ + if (value) + return ERR_PTR(-EEXIST); + value = __btf_name_by_offset(btf, t->name_off) + len; } - return NULL; + if (!value) + return ERR_PTR(-ENOENT); + return value; } static int @@ -3347,7 +3352,7 @@ btf_find_graph_root(const struct btf *btf, const struct btf_type *pt, if (t->size != sz) return BTF_FIELD_IGNORE; value_type = btf_find_decl_tag_value(btf, pt, comp_idx, "contains:"); - if (!value_type) + if (IS_ERR(value_type)) return -EINVAL; node_field_name = strstr(value_type, ":"); if (!node_field_name) @@ -6954,7 +6959,7 @@ int btf_check_subprog_call(struct bpf_verifier_env *env, int subprog, * (either PTR_TO_CTX or SCALAR_VALUE). */ int btf_prepare_func_args(struct bpf_verifier_env *env, int subprog, - struct bpf_reg_state *regs) + struct bpf_reg_state *regs, bool is_ex_cb) { struct bpf_verifier_log *log = &env->log; struct bpf_prog *prog = env->prog; @@ -7011,7 +7016,7 @@ int btf_prepare_func_args(struct bpf_verifier_env *env, int subprog, tname, nargs, MAX_BPF_FUNC_REG_ARGS); return -EINVAL; } - /* check that function returns int */ + /* check that function returns int, exception cb also requires this */ t = btf_type_by_id(btf, t->type); while (btf_type_is_modifier(t)) t = btf_type_by_id(btf, t->type); @@ -7060,6 +7065,14 @@ int btf_prepare_func_args(struct bpf_verifier_env *env, int subprog, i, btf_type_str(t), tname); return -EINVAL; } + /* We have already ensured that the callback returns an integer, just + * like all global subprogs. We need to determine it only has a single + * scalar argument. + */ + if (is_ex_cb && (nargs != 1 || regs[BPF_REG_1].type != SCALAR_VALUE)) { + bpf_log(log, "exception cb only supports single integer argument\n"); + return -EINVAL; + } return 0; } diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index ec767ae08c2b..ec3f22312516 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -2457,6 +2457,68 @@ static int add_subprog(struct bpf_verifier_env *env, int off) return env->subprog_cnt - 1; } +static int bpf_find_exception_callback_insn_off(struct bpf_verifier_env *env) +{ + struct bpf_prog_aux *aux = env->prog->aux; + struct btf *btf = aux->btf; + const struct btf_type *t; + u32 main_btf_id, id; + const char *name; + int ret, i; + + /* Non-zero func_info_cnt implies valid btf */ + if (!aux->func_info_cnt) + return 0; + main_btf_id = aux->func_info[0].type_id; + + t = btf_type_by_id(btf, main_btf_id); + if (!t) { + verbose(env, "invalid btf id for main subprog in func_info\n"); + return -EINVAL; + } + + name = btf_find_decl_tag_value(btf, t, -1, "exception_callback:"); + if (IS_ERR(name)) { + ret = PTR_ERR(name); + /* If there is no tag present, there is no exception callback */ + if (ret == -ENOENT) + ret = 0; + else if (ret == -EEXIST) + verbose(env, "multiple exception callback tags for main subprog\n"); + return ret; + } + + ret = btf_find_by_name_kind(btf, name, BTF_KIND_FUNC); + if (ret < 0) { + verbose(env, "exception callback '%s' could not be found in BTF\n", name); + return ret; + } + id = ret; + t = btf_type_by_id(btf, id); + if (btf_func_linkage(t) != BTF_FUNC_GLOBAL) { + verbose(env, "exception callback '%s' must have global linkage\n", name); + return -EINVAL; + } + ret = 0; + for (i = 0; i < aux->func_info_cnt; i++) { + if (aux->func_info[i].type_id != id) + continue; + ret = aux->func_info[i].insn_off; + /* Further func_info and subprog checks will also happen + * later, so assume this is the right insn_off for now. + */ + if (!ret) { + verbose(env, "invalid exception callback insn_off in func_info: 0\n"); + ret = -EINVAL; + } + } + if (!ret) { + verbose(env, "exception callback type id not found in func_info\n"); + ret = -EINVAL; + } + return ret; +} + #define MAX_KFUNC_DESCS 256 #define MAX_KFUNC_BTFS 256 @@ -2796,8 +2858,8 @@ bpf_jit_find_kfunc_model(const struct bpf_prog *prog, static int add_subprog_and_kfunc(struct bpf_verifier_env *env) { struct bpf_subprog_info *subprog = env->subprog_info; + int i, ret, insn_cnt = env->prog->len, ex_cb_insn; struct bpf_insn *insn = env->prog->insnsi; - int i, ret, insn_cnt = env->prog->len; /* Add entry function. */ ret = add_subprog(env, 0); @@ -2823,6 +2885,26 @@ static int add_subprog_and_kfunc(struct bpf_verifier_env *env) return ret; } + ret = bpf_find_exception_callback_insn_off(env); + if (ret < 0) + return ret; + ex_cb_insn = ret; + + /* If ex_cb_insn > 0, this means that the main program has a subprog + * marked using BTF decl tag to serve as the exception callback. + */ + if (ex_cb_insn) { + ret = add_subprog(env, ex_cb_insn); + if (ret < 0) + return ret; + for (i = 1; i < env->subprog_cnt; i++) { + if (env->subprog_info[i].start != ex_cb_insn) + continue; + env->exception_callback_subprog = i; + break; + } + } + /* Add a fake 'exit' subprog which could simplify subprog iteration * logic. 'subprog_cnt' should not be increased. */ @@ -5707,6 +5789,10 @@ continue_func: /* async callbacks don't increase bpf prog stack size unless called directly */ if (!bpf_pseudo_call(insn + i)) continue; + if (subprog[sidx].is_exception_cb) { + verbose(env, "insn %d cannot call exception cb directly\n", i); + return -EINVAL; + } } i = next_insn; idx = sidx; @@ -5728,8 +5814,13 @@ continue_func: * tail call counter throughout bpf2bpf calls combined with tailcalls */ if (tail_call_reachable) - for (j = 0; j < frame; j++) + for (j = 0; j < frame; j++) { + if (subprog[ret_prog[j]].is_exception_cb) { + verbose(env, "cannot tail call within exception cb\n"); + return -EINVAL; + } subprog[ret_prog[j]].tail_call_reachable = true; + } if (subprog[0].tail_call_reachable) env->prog->aux->tail_call_reachable = true; @@ -14630,7 +14721,7 @@ static int check_return_code(struct bpf_verifier_env *env) const bool is_subprog = frame->subprogno; /* LSM and struct_ops func-ptr's return type could be "void" */ - if (!is_subprog) { + if (!is_subprog || frame->in_exception_callback_fn) { switch (prog_type) { case BPF_PROG_TYPE_LSM: if (prog->expected_attach_type == BPF_LSM_CGROUP) @@ -14678,7 +14769,7 @@ static int check_return_code(struct bpf_verifier_env *env) return 0; } - if (is_subprog) { + if (is_subprog && !frame->in_exception_callback_fn) { if (reg->type != SCALAR_VALUE) { verbose(env, "At subprogram exit the register R0 is not a scalar value (%s)\n", reg_type_str(env, reg->type)); @@ -19334,7 +19425,7 @@ static void free_states(struct bpf_verifier_env *env) } } -static int do_check_common(struct bpf_verifier_env *env, int subprog) +static int do_check_common(struct bpf_verifier_env *env, int subprog, bool is_ex_cb) { bool pop_log = !(env->log.level & BPF_LOG_LEVEL2); struct bpf_verifier_state *state; @@ -19365,7 +19456,7 @@ static int do_check_common(struct bpf_verifier_env *env, int subprog) regs = state->frame[state->curframe]->regs; if (subprog || env->prog->type == BPF_PROG_TYPE_EXT) { - ret = btf_prepare_func_args(env, subprog, regs); + ret = btf_prepare_func_args(env, subprog, regs, is_ex_cb); if (ret) goto out; for (i = BPF_REG_1; i <= BPF_REG_5; i++) { @@ -19381,6 +19472,12 @@ static int do_check_common(struct bpf_verifier_env *env, int subprog) regs[i].id = ++env->id_gen; } } + if (is_ex_cb) { + state->frame[0]->in_exception_callback_fn = true; + env->subprog_info[subprog].is_cb = true; + env->subprog_info[subprog].is_async_cb = true; + env->subprog_info[subprog].is_exception_cb = true; + } } else { /* 1st arg to a function */ regs[BPF_REG_1].type = PTR_TO_CTX; @@ -19445,7 +19542,7 @@ static int do_check_subprogs(struct bpf_verifier_env *env) continue; env->insn_idx = env->subprog_info[i].start; WARN_ON_ONCE(env->insn_idx == 0); - ret = do_check_common(env, i); + ret = do_check_common(env, i, env->exception_callback_subprog == i); if (ret) { return ret; } else if (env->log.level & BPF_LOG_LEVEL) { @@ -19462,7 +19559,7 @@ static int do_check_main(struct bpf_verifier_env *env) int ret; env->insn_idx = 0; - ret = do_check_common(env, 0); + ret = do_check_common(env, 0, false); if (!ret) env->prog->aux->stack_depth = env->subprog_info[0].stack_depth; return ret; diff --git a/tools/testing/selftests/bpf/bpf_experimental.h b/tools/testing/selftests/bpf/bpf_experimental.h index 333b54a86e3a..9a87170524ce 100644 --- a/tools/testing/selftests/bpf/bpf_experimental.h +++ b/tools/testing/selftests/bpf/bpf_experimental.h @@ -165,7 +165,16 @@ extern void bpf_percpu_obj_drop_impl(void *kptr, void *meta) __ksym; /* Description * Throw a BPF exception from the program, immediately terminating its * execution and unwinding the stack. The supplied 'cookie' parameter - * will be the return value of the program when an exception is thrown. + * will be the return value of the program when an exception is thrown, + * and the default exception callback is used. Otherwise, if an exception + * callback is set using the '__exception_cb(callback)' declaration tag + * on the main program, the 'cookie' parameter will be the callback's only + * input argument. + * + * Thus, in case of default exception callback, 'cookie' is subjected to + * constraints on the program's return value (as with R0 on exit). + * Otherwise, the return value of the marked exception callback will be + * subjected to the same checks. * * Note that throwing an exception with lingering resources (locks, * references, etc.) will lead to a verification error. @@ -178,4 +187,24 @@ extern void bpf_percpu_obj_drop_impl(void *kptr, void *meta) __ksym; */ extern void bpf_throw(u64 cookie) __ksym; +/* This macro must be used to mark the exception callback corresponding to the + * main program. For example: + * + * int exception_cb(u64 cookie) { + * return cookie; + * } + * + * SEC("tc") + * __exception_cb(exception_cb) + * int main_prog(struct __sk_buff *ctx) { + * ... + * return TC_ACT_OK; + * } + * + * Here, exception callback for the main program will be 'exception_cb'. Note + * that this attribute can only be used once, and multiple exception callbacks + * specified for the main program will lead to verification error. + */ +#define __exception_cb(name) __attribute__((btf_decl_tag("exception_callback:" #name))) + #endif -- cgit v1.2.3 From 7ccb84f04cda1dd6f64f352e9795db308e9cdc0c Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Wed, 13 Sep 2023 01:32:06 +0200 Subject: mm: kasan: Declare kasan_unpoison_task_stack_below in kasan.h We require access to this kasan helper in BPF code in the next patch where we have to unpoison the task stack when we unwind and reset the stack frame from bpf_throw, and it never really unpoisons the poisoned stack slots on entry when compiler instrumentation is generated by CONFIG_KASAN_STACK and inline instrumentation is supported. Also, remove the declaration from mm/kasan/kasan.h as we put it in the header file kasan.h. Cc: Andrey Ryabinin Cc: Alexander Potapenko Cc: Andrey Konovalov Cc: Dmitry Vyukov Cc: Vincenzo Frascino Suggested-by: Andrey Konovalov Signed-off-by: Kumar Kartikeya Dwivedi Reviewed-by: Andrey Konovalov Link: https://lore.kernel.org/r/20230912233214.1518551-10-memxor@gmail.com Signed-off-by: Alexei Starovoitov --- include/linux/kasan.h | 2 ++ mm/kasan/kasan.h | 1 - 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kasan.h b/include/linux/kasan.h index 819b6bc8ac08..7a463f814db2 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -283,8 +283,10 @@ static inline bool kasan_check_byte(const void *address) #if defined(CONFIG_KASAN) && defined(CONFIG_KASAN_STACK) void kasan_unpoison_task_stack(struct task_struct *task); +asmlinkage void kasan_unpoison_task_stack_below(const void *watermark); #else static inline void kasan_unpoison_task_stack(struct task_struct *task) {} +static inline void kasan_unpoison_task_stack_below(const void *watermark) {} #endif #ifdef CONFIG_KASAN_GENERIC diff --git a/mm/kasan/kasan.h b/mm/kasan/kasan.h index 2e973b36fe07..5eefe202bb8f 100644 --- a/mm/kasan/kasan.h +++ b/mm/kasan/kasan.h @@ -558,7 +558,6 @@ void kasan_restore_multi_shot(bool enabled); * code. Declared here to avoid warnings about missing declarations. */ -asmlinkage void kasan_unpoison_task_stack_below(const void *watermark); void __asan_register_globals(void *globals, ssize_t size); void __asan_unregister_globals(void *globals, ssize_t size); void __asan_handle_no_return(void); -- cgit v1.2.3 From 12e94aee074ce1c5ffdb8f2246a8c4a095b6aa8a Mon Sep 17 00:00:00 2001 From: Sebastian Reichel Date: Wed, 13 Sep 2023 15:39:00 +0200 Subject: power: supply: core: Don't export power_supply_notifier power_supply_notifier can be internal, since all users are going through power_supply_reg_notifier()/power_supply_unreg_notifier(). Link: https://lore.kernel.org/r/20230913133900.591637-1-sebastian.reichel@collabora.com Signed-off-by: Sebastian Reichel --- drivers/power/supply/power_supply_core.c | 3 +-- include/linux/power_supply.h | 1 - 2 files changed, 1 insertion(+), 3 deletions(-) (limited to 'include/linux') diff --git a/drivers/power/supply/power_supply_core.c b/drivers/power/supply/power_supply_core.c index 6e30674be366..73265001dd4b 100644 --- a/drivers/power/supply/power_supply_core.c +++ b/drivers/power/supply/power_supply_core.c @@ -29,8 +29,7 @@ struct class *power_supply_class; EXPORT_SYMBOL_GPL(power_supply_class); -BLOCKING_NOTIFIER_HEAD(power_supply_notifier); -EXPORT_SYMBOL_GPL(power_supply_notifier); +static BLOCKING_NOTIFIER_HEAD(power_supply_notifier); static struct device_type power_supply_dev_type; diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h index 85b86768c0b9..c0992a77feea 100644 --- a/include/linux/power_supply.h +++ b/include/linux/power_supply.h @@ -767,7 +767,6 @@ struct power_supply_battery_info { int bti_resistance_tolerance; }; -extern struct blocking_notifier_head power_supply_notifier; extern int power_supply_reg_notifier(struct notifier_block *nb); extern void power_supply_unreg_notifier(struct notifier_block *nb); #if IS_ENABLED(CONFIG_POWER_SUPPLY) -- cgit v1.2.3 From 9431063ad323ac864750aeba4d304389bc42ca4e Mon Sep 17 00:00:00 2001 From: Vadim Fedorenko Date: Wed, 13 Sep 2023 21:49:37 +0100 Subject: dpll: core: Add DPLL framework base functions DPLL framework is used to represent and configure DPLL devices in systems. Each device that has DPLL and can configure inputs and outputs can use this framework. Implement core framework functions for further interactions with device drivers implementing dpll subsystem, as well as for interactions of DPLL netlink framework part with the subsystem itself. Co-developed-by: Milena Olech Signed-off-by: Milena Olech Co-developed-by: Michal Michalik Signed-off-by: Michal Michalik Signed-off-by: Vadim Fedorenko Co-developed-by: Arkadiusz Kubalewski Signed-off-by: Arkadiusz Kubalewski Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- MAINTAINERS | 11 + drivers/Kconfig | 2 + drivers/Makefile | 1 + drivers/dpll/Kconfig | 7 + drivers/dpll/Makefile | 9 + drivers/dpll/dpll_core.c | 789 +++++++++++++++++++++++++++++++++++++++++++++++ drivers/dpll/dpll_core.h | 89 ++++++ include/linux/dpll.h | 133 ++++++++ 8 files changed, 1041 insertions(+) create mode 100644 drivers/dpll/Kconfig create mode 100644 drivers/dpll/Makefile create mode 100644 drivers/dpll/dpll_core.c create mode 100644 drivers/dpll/dpll_core.h create mode 100644 include/linux/dpll.h (limited to 'include/linux') diff --git a/MAINTAINERS b/MAINTAINERS index 9d0027f389e1..c94489dff2fd 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6333,6 +6333,17 @@ F: Documentation/networking/device_drivers/ethernet/freescale/dpaa2/switch-drive F: drivers/net/ethernet/freescale/dpaa2/dpaa2-switch* F: drivers/net/ethernet/freescale/dpaa2/dpsw* +DPLL SUBSYSTEM +M: Vadim Fedorenko +M: Arkadiusz Kubalewski +M: Jiri Pirko +L: netdev@vger.kernel.org +S: Supported +F: Documentation/driver-api/dpll.rst +F: drivers/dpll/* +F: include/net/dpll.h +F: include/uapi/linux/dpll.h + DRBD DRIVER M: Philipp Reisner M: Lars Ellenberg diff --git a/drivers/Kconfig b/drivers/Kconfig index efb66e25fa2d..8ba3e8b9ad72 100644 --- a/drivers/Kconfig +++ b/drivers/Kconfig @@ -243,4 +243,6 @@ source "drivers/hte/Kconfig" source "drivers/cdx/Kconfig" +source "drivers/dpll/Kconfig" + endmenu diff --git a/drivers/Makefile b/drivers/Makefile index 1bec7819a837..722d15be0eb7 100644 --- a/drivers/Makefile +++ b/drivers/Makefile @@ -197,5 +197,6 @@ obj-$(CONFIG_PECI) += peci/ obj-$(CONFIG_HTE) += hte/ obj-$(CONFIG_DRM_ACCEL) += accel/ obj-$(CONFIG_CDX_BUS) += cdx/ +obj-$(CONFIG_DPLL) += dpll/ obj-$(CONFIG_S390) += s390/ diff --git a/drivers/dpll/Kconfig b/drivers/dpll/Kconfig new file mode 100644 index 000000000000..a4cae73f20d3 --- /dev/null +++ b/drivers/dpll/Kconfig @@ -0,0 +1,7 @@ +# SPDX-License-Identifier: GPL-2.0-only +# +# Generic DPLL drivers configuration +# + +config DPLL + bool diff --git a/drivers/dpll/Makefile b/drivers/dpll/Makefile new file mode 100644 index 000000000000..2e5b27850110 --- /dev/null +++ b/drivers/dpll/Makefile @@ -0,0 +1,9 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Makefile for DPLL drivers. +# + +obj-$(CONFIG_DPLL) += dpll.o +dpll-y += dpll_core.o +dpll-y += dpll_netlink.o +dpll-y += dpll_nl.o diff --git a/drivers/dpll/dpll_core.c b/drivers/dpll/dpll_core.c new file mode 100644 index 000000000000..6449ba6a383b --- /dev/null +++ b/drivers/dpll/dpll_core.c @@ -0,0 +1,789 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * dpll_core.c - DPLL subsystem kernel-space interface implementation. + * + * Copyright (c) 2023 Meta Platforms, Inc. and affiliates + * Copyright (c) 2023 Intel Corporation. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include + +#include "dpll_core.h" + +/* Mutex lock to protect DPLL subsystem devices and pins */ +DEFINE_MUTEX(dpll_lock); + +DEFINE_XARRAY_FLAGS(dpll_device_xa, XA_FLAGS_ALLOC); +DEFINE_XARRAY_FLAGS(dpll_pin_xa, XA_FLAGS_ALLOC); + +static u32 dpll_xa_id; + +#define ASSERT_DPLL_REGISTERED(d) \ + WARN_ON_ONCE(!xa_get_mark(&dpll_device_xa, (d)->id, DPLL_REGISTERED)) +#define ASSERT_DPLL_NOT_REGISTERED(d) \ + WARN_ON_ONCE(xa_get_mark(&dpll_device_xa, (d)->id, DPLL_REGISTERED)) +#define ASSERT_PIN_REGISTERED(p) \ + WARN_ON_ONCE(!xa_get_mark(&dpll_pin_xa, (p)->id, DPLL_REGISTERED)) + +struct dpll_device_registration { + struct list_head list; + const struct dpll_device_ops *ops; + void *priv; +}; + +struct dpll_pin_registration { + struct list_head list; + const struct dpll_pin_ops *ops; + void *priv; +}; + +struct dpll_device *dpll_device_get_by_id(int id) +{ + if (xa_get_mark(&dpll_device_xa, id, DPLL_REGISTERED)) + return xa_load(&dpll_device_xa, id); + + return NULL; +} + +static struct dpll_pin_registration * +dpll_pin_registration_find(struct dpll_pin_ref *ref, + const struct dpll_pin_ops *ops, void *priv) +{ + struct dpll_pin_registration *reg; + + list_for_each_entry(reg, &ref->registration_list, list) { + if (reg->ops == ops && reg->priv == priv) + return reg; + } + return NULL; +} + +static int +dpll_xa_ref_pin_add(struct xarray *xa_pins, struct dpll_pin *pin, + const struct dpll_pin_ops *ops, void *priv) +{ + struct dpll_pin_registration *reg; + struct dpll_pin_ref *ref; + bool ref_exists = false; + unsigned long i; + int ret; + + xa_for_each(xa_pins, i, ref) { + if (ref->pin != pin) + continue; + reg = dpll_pin_registration_find(ref, ops, priv); + if (reg) { + refcount_inc(&ref->refcount); + return 0; + } + ref_exists = true; + break; + } + + if (!ref_exists) { + ref = kzalloc(sizeof(*ref), GFP_KERNEL); + if (!ref) + return -ENOMEM; + ref->pin = pin; + INIT_LIST_HEAD(&ref->registration_list); + ret = xa_insert(xa_pins, pin->pin_idx, ref, GFP_KERNEL); + if (ret) { + kfree(ref); + return ret; + } + refcount_set(&ref->refcount, 1); + } + + reg = kzalloc(sizeof(*reg), GFP_KERNEL); + if (!reg) { + if (!ref_exists) { + xa_erase(xa_pins, pin->pin_idx); + kfree(ref); + } + return -ENOMEM; + } + reg->ops = ops; + reg->priv = priv; + if (ref_exists) + refcount_inc(&ref->refcount); + list_add_tail(®->list, &ref->registration_list); + + return 0; +} + +static int dpll_xa_ref_pin_del(struct xarray *xa_pins, struct dpll_pin *pin, + const struct dpll_pin_ops *ops, void *priv) +{ + struct dpll_pin_registration *reg; + struct dpll_pin_ref *ref; + unsigned long i; + + xa_for_each(xa_pins, i, ref) { + if (ref->pin != pin) + continue; + reg = dpll_pin_registration_find(ref, ops, priv); + if (WARN_ON(!reg)) + return -EINVAL; + if (refcount_dec_and_test(&ref->refcount)) { + list_del(®->list); + kfree(reg); + xa_erase(xa_pins, i); + WARN_ON(!list_empty(&ref->registration_list)); + kfree(ref); + } + return 0; + } + + return -EINVAL; +} + +static int +dpll_xa_ref_dpll_add(struct xarray *xa_dplls, struct dpll_device *dpll, + const struct dpll_pin_ops *ops, void *priv) +{ + struct dpll_pin_registration *reg; + struct dpll_pin_ref *ref; + bool ref_exists = false; + unsigned long i; + int ret; + + xa_for_each(xa_dplls, i, ref) { + if (ref->dpll != dpll) + continue; + reg = dpll_pin_registration_find(ref, ops, priv); + if (reg) { + refcount_inc(&ref->refcount); + return 0; + } + ref_exists = true; + break; + } + + if (!ref_exists) { + ref = kzalloc(sizeof(*ref), GFP_KERNEL); + if (!ref) + return -ENOMEM; + ref->dpll = dpll; + INIT_LIST_HEAD(&ref->registration_list); + ret = xa_insert(xa_dplls, dpll->id, ref, GFP_KERNEL); + if (ret) { + kfree(ref); + return ret; + } + refcount_set(&ref->refcount, 1); + } + + reg = kzalloc(sizeof(*reg), GFP_KERNEL); + if (!reg) { + if (!ref_exists) { + xa_erase(xa_dplls, dpll->id); + kfree(ref); + } + return -ENOMEM; + } + reg->ops = ops; + reg->priv = priv; + if (ref_exists) + refcount_inc(&ref->refcount); + list_add_tail(®->list, &ref->registration_list); + + return 0; +} + +static void +dpll_xa_ref_dpll_del(struct xarray *xa_dplls, struct dpll_device *dpll, + const struct dpll_pin_ops *ops, void *priv) +{ + struct dpll_pin_registration *reg; + struct dpll_pin_ref *ref; + unsigned long i; + + xa_for_each(xa_dplls, i, ref) { + if (ref->dpll != dpll) + continue; + reg = dpll_pin_registration_find(ref, ops, priv); + if (WARN_ON(!reg)) + return; + if (refcount_dec_and_test(&ref->refcount)) { + list_del(®->list); + kfree(reg); + xa_erase(xa_dplls, i); + WARN_ON(!list_empty(&ref->registration_list)); + kfree(ref); + } + return; + } +} + +struct dpll_pin_ref *dpll_xa_ref_dpll_first(struct xarray *xa_refs) +{ + struct dpll_pin_ref *ref; + unsigned long i = 0; + + ref = xa_find(xa_refs, &i, ULONG_MAX, XA_PRESENT); + WARN_ON(!ref); + return ref; +} + +static struct dpll_device * +dpll_device_alloc(const u64 clock_id, u32 device_idx, struct module *module) +{ + struct dpll_device *dpll; + int ret; + + dpll = kzalloc(sizeof(*dpll), GFP_KERNEL); + if (!dpll) + return ERR_PTR(-ENOMEM); + refcount_set(&dpll->refcount, 1); + INIT_LIST_HEAD(&dpll->registration_list); + dpll->device_idx = device_idx; + dpll->clock_id = clock_id; + dpll->module = module; + ret = xa_alloc_cyclic(&dpll_device_xa, &dpll->id, dpll, xa_limit_32b, + &dpll_xa_id, GFP_KERNEL); + if (ret < 0) { + kfree(dpll); + return ERR_PTR(ret); + } + xa_init_flags(&dpll->pin_refs, XA_FLAGS_ALLOC); + + return dpll; +} + +/** + * dpll_device_get - find existing or create new dpll device + * @clock_id: clock_id of creator + * @device_idx: idx given by device driver + * @module: reference to registering module + * + * Get existing object of a dpll device, unique for given arguments. + * Create new if doesn't exist yet. + * + * Context: Acquires a lock (dpll_lock) + * Return: + * * valid dpll_device struct pointer if succeeded + * * ERR_PTR(X) - error + */ +struct dpll_device * +dpll_device_get(u64 clock_id, u32 device_idx, struct module *module) +{ + struct dpll_device *dpll, *ret = NULL; + unsigned long index; + + mutex_lock(&dpll_lock); + xa_for_each(&dpll_device_xa, index, dpll) { + if (dpll->clock_id == clock_id && + dpll->device_idx == device_idx && + dpll->module == module) { + ret = dpll; + refcount_inc(&ret->refcount); + break; + } + } + if (!ret) + ret = dpll_device_alloc(clock_id, device_idx, module); + mutex_unlock(&dpll_lock); + + return ret; +} +EXPORT_SYMBOL_GPL(dpll_device_get); + +/** + * dpll_device_put - decrease the refcount and free memory if possible + * @dpll: dpll_device struct pointer + * + * Context: Acquires a lock (dpll_lock) + * Drop reference for a dpll device, if all references are gone, delete + * dpll device object. + */ +void dpll_device_put(struct dpll_device *dpll) +{ + mutex_lock(&dpll_lock); + if (refcount_dec_and_test(&dpll->refcount)) { + ASSERT_DPLL_NOT_REGISTERED(dpll); + WARN_ON_ONCE(!xa_empty(&dpll->pin_refs)); + xa_destroy(&dpll->pin_refs); + xa_erase(&dpll_device_xa, dpll->id); + WARN_ON(!list_empty(&dpll->registration_list)); + kfree(dpll); + } + mutex_unlock(&dpll_lock); +} +EXPORT_SYMBOL_GPL(dpll_device_put); + +static struct dpll_device_registration * +dpll_device_registration_find(struct dpll_device *dpll, + const struct dpll_device_ops *ops, void *priv) +{ + struct dpll_device_registration *reg; + + list_for_each_entry(reg, &dpll->registration_list, list) { + if (reg->ops == ops && reg->priv == priv) + return reg; + } + return NULL; +} + +/** + * dpll_device_register - register the dpll device in the subsystem + * @dpll: pointer to a dpll + * @type: type of a dpll + * @ops: ops for a dpll device + * @priv: pointer to private information of owner + * + * Make dpll device available for user space. + * + * Context: Acquires a lock (dpll_lock) + * Return: + * * 0 on success + * * negative - error value + */ +int dpll_device_register(struct dpll_device *dpll, enum dpll_type type, + const struct dpll_device_ops *ops, void *priv) +{ + struct dpll_device_registration *reg; + bool first_registration = false; + + if (WARN_ON(!ops)) + return -EINVAL; + if (WARN_ON(!ops->mode_get)) + return -EINVAL; + if (WARN_ON(!ops->lock_status_get)) + return -EINVAL; + if (WARN_ON(type < DPLL_TYPE_PPS || type > DPLL_TYPE_MAX)) + return -EINVAL; + + mutex_lock(&dpll_lock); + reg = dpll_device_registration_find(dpll, ops, priv); + if (reg) { + mutex_unlock(&dpll_lock); + return -EEXIST; + } + + reg = kzalloc(sizeof(*reg), GFP_KERNEL); + if (!reg) { + mutex_unlock(&dpll_lock); + return -ENOMEM; + } + reg->ops = ops; + reg->priv = priv; + dpll->type = type; + first_registration = list_empty(&dpll->registration_list); + list_add_tail(®->list, &dpll->registration_list); + if (!first_registration) { + mutex_unlock(&dpll_lock); + return 0; + } + + xa_set_mark(&dpll_device_xa, dpll->id, DPLL_REGISTERED); + mutex_unlock(&dpll_lock); + + return 0; +} +EXPORT_SYMBOL_GPL(dpll_device_register); + +/** + * dpll_device_unregister - unregister dpll device + * @dpll: registered dpll pointer + * @ops: ops for a dpll device + * @priv: pointer to private information of owner + * + * Unregister device, make it unavailable for userspace. + * Note: It does not free the memory + * Context: Acquires a lock (dpll_lock) + */ +void dpll_device_unregister(struct dpll_device *dpll, + const struct dpll_device_ops *ops, void *priv) +{ + struct dpll_device_registration *reg; + + mutex_lock(&dpll_lock); + ASSERT_DPLL_REGISTERED(dpll); + reg = dpll_device_registration_find(dpll, ops, priv); + if (WARN_ON(!reg)) { + mutex_unlock(&dpll_lock); + return; + } + list_del(®->list); + kfree(reg); + + if (!list_empty(&dpll->registration_list)) { + mutex_unlock(&dpll_lock); + return; + } + xa_clear_mark(&dpll_device_xa, dpll->id, DPLL_REGISTERED); + mutex_unlock(&dpll_lock); +} +EXPORT_SYMBOL_GPL(dpll_device_unregister); + +static struct dpll_pin * +dpll_pin_alloc(u64 clock_id, u32 pin_idx, struct module *module, + const struct dpll_pin_properties *prop) +{ + struct dpll_pin *pin; + int ret; + + pin = kzalloc(sizeof(*pin), GFP_KERNEL); + if (!pin) + return ERR_PTR(-ENOMEM); + pin->pin_idx = pin_idx; + pin->clock_id = clock_id; + pin->module = module; + if (WARN_ON(prop->type < DPLL_PIN_TYPE_MUX || + prop->type > DPLL_PIN_TYPE_MAX)) { + ret = -EINVAL; + goto err; + } + pin->prop = prop; + refcount_set(&pin->refcount, 1); + xa_init_flags(&pin->dpll_refs, XA_FLAGS_ALLOC); + xa_init_flags(&pin->parent_refs, XA_FLAGS_ALLOC); + ret = xa_alloc(&dpll_pin_xa, &pin->id, pin, xa_limit_16b, GFP_KERNEL); + if (ret) + goto err; + return pin; +err: + xa_destroy(&pin->dpll_refs); + xa_destroy(&pin->parent_refs); + kfree(pin); + return ERR_PTR(ret); +} + +/** + * dpll_pin_get - find existing or create new dpll pin + * @clock_id: clock_id of creator + * @pin_idx: idx given by dev driver + * @module: reference to registering module + * @prop: dpll pin properties + * + * Get existing object of a pin (unique for given arguments) or create new + * if doesn't exist yet. + * + * Context: Acquires a lock (dpll_lock) + * Return: + * * valid allocated dpll_pin struct pointer if succeeded + * * ERR_PTR(X) - error + */ +struct dpll_pin * +dpll_pin_get(u64 clock_id, u32 pin_idx, struct module *module, + const struct dpll_pin_properties *prop) +{ + struct dpll_pin *pos, *ret = NULL; + unsigned long i; + + mutex_lock(&dpll_lock); + xa_for_each(&dpll_pin_xa, i, pos) { + if (pos->clock_id == clock_id && + pos->pin_idx == pin_idx && + pos->module == module) { + ret = pos; + refcount_inc(&ret->refcount); + break; + } + } + if (!ret) + ret = dpll_pin_alloc(clock_id, pin_idx, module, prop); + mutex_unlock(&dpll_lock); + + return ret; +} +EXPORT_SYMBOL_GPL(dpll_pin_get); + +/** + * dpll_pin_put - decrease the refcount and free memory if possible + * @pin: pointer to a pin to be put + * + * Drop reference for a pin, if all references are gone, delete pin object. + * + * Context: Acquires a lock (dpll_lock) + */ +void dpll_pin_put(struct dpll_pin *pin) +{ + mutex_lock(&dpll_lock); + if (refcount_dec_and_test(&pin->refcount)) { + xa_destroy(&pin->dpll_refs); + xa_destroy(&pin->parent_refs); + xa_erase(&dpll_pin_xa, pin->id); + kfree(pin); + } + mutex_unlock(&dpll_lock); +} +EXPORT_SYMBOL_GPL(dpll_pin_put); + +static int +__dpll_pin_register(struct dpll_device *dpll, struct dpll_pin *pin, + const struct dpll_pin_ops *ops, void *priv) +{ + int ret; + + ret = dpll_xa_ref_pin_add(&dpll->pin_refs, pin, ops, priv); + if (ret) + return ret; + ret = dpll_xa_ref_dpll_add(&pin->dpll_refs, dpll, ops, priv); + if (ret) + goto ref_pin_del; + xa_set_mark(&dpll_pin_xa, pin->id, DPLL_REGISTERED); + + return ret; + +ref_pin_del: + dpll_xa_ref_pin_del(&dpll->pin_refs, pin, ops, priv); + return ret; +} + +/** + * dpll_pin_register - register the dpll pin in the subsystem + * @dpll: pointer to a dpll + * @pin: pointer to a dpll pin + * @ops: ops for a dpll pin ops + * @priv: pointer to private information of owner + * + * Context: Acquires a lock (dpll_lock) + * Return: + * * 0 on success + * * negative - error value + */ +int +dpll_pin_register(struct dpll_device *dpll, struct dpll_pin *pin, + const struct dpll_pin_ops *ops, void *priv) +{ + int ret; + + if (WARN_ON(!ops) || + WARN_ON(!ops->state_on_dpll_get) || + WARN_ON(!ops->direction_get)) + return -EINVAL; + if (ASSERT_DPLL_REGISTERED(dpll)) + return -EINVAL; + + mutex_lock(&dpll_lock); + if (WARN_ON(!(dpll->module == pin->module && + dpll->clock_id == pin->clock_id))) + ret = -EINVAL; + else + ret = __dpll_pin_register(dpll, pin, ops, priv); + mutex_unlock(&dpll_lock); + + return ret; +} +EXPORT_SYMBOL_GPL(dpll_pin_register); + +static void +__dpll_pin_unregister(struct dpll_device *dpll, struct dpll_pin *pin, + const struct dpll_pin_ops *ops, void *priv) +{ + dpll_xa_ref_pin_del(&dpll->pin_refs, pin, ops, priv); + dpll_xa_ref_dpll_del(&pin->dpll_refs, dpll, ops, priv); + if (xa_empty(&pin->dpll_refs)) + xa_clear_mark(&dpll_pin_xa, pin->id, DPLL_REGISTERED); +} + +/** + * dpll_pin_unregister - unregister dpll pin from dpll device + * @dpll: registered dpll pointer + * @pin: pointer to a pin + * @ops: ops for a dpll pin + * @priv: pointer to private information of owner + * + * Note: It does not free the memory + * Context: Acquires a lock (dpll_lock) + */ +void dpll_pin_unregister(struct dpll_device *dpll, struct dpll_pin *pin, + const struct dpll_pin_ops *ops, void *priv) +{ + if (WARN_ON(xa_empty(&dpll->pin_refs))) + return; + if (WARN_ON(!xa_empty(&pin->parent_refs))) + return; + + mutex_lock(&dpll_lock); + __dpll_pin_unregister(dpll, pin, ops, priv); + mutex_unlock(&dpll_lock); +} +EXPORT_SYMBOL_GPL(dpll_pin_unregister); + +/** + * dpll_pin_on_pin_register - register a pin with a parent pin + * @parent: pointer to a parent pin + * @pin: pointer to a pin + * @ops: ops for a dpll pin + * @priv: pointer to private information of owner + * + * Register a pin with a parent pin, create references between them and + * between newly registered pin and dplls connected with a parent pin. + * + * Context: Acquires a lock (dpll_lock) + * Return: + * * 0 on success + * * negative - error value + */ +int dpll_pin_on_pin_register(struct dpll_pin *parent, struct dpll_pin *pin, + const struct dpll_pin_ops *ops, void *priv) +{ + struct dpll_pin_ref *ref; + unsigned long i, stop; + int ret; + + if (WARN_ON(parent->prop->type != DPLL_PIN_TYPE_MUX)) + return -EINVAL; + + if (WARN_ON(!ops) || + WARN_ON(!ops->state_on_pin_get) || + WARN_ON(!ops->direction_get)) + return -EINVAL; + if (ASSERT_PIN_REGISTERED(parent)) + return -EINVAL; + + mutex_lock(&dpll_lock); + ret = dpll_xa_ref_pin_add(&pin->parent_refs, parent, ops, priv); + if (ret) + goto unlock; + refcount_inc(&pin->refcount); + xa_for_each(&parent->dpll_refs, i, ref) { + ret = __dpll_pin_register(ref->dpll, pin, ops, priv); + if (ret) { + stop = i; + goto dpll_unregister; + } + } + mutex_unlock(&dpll_lock); + + return ret; + +dpll_unregister: + xa_for_each(&parent->dpll_refs, i, ref) + if (i < stop) + __dpll_pin_unregister(ref->dpll, pin, ops, priv); + refcount_dec(&pin->refcount); + dpll_xa_ref_pin_del(&pin->parent_refs, parent, ops, priv); +unlock: + mutex_unlock(&dpll_lock); + return ret; +} +EXPORT_SYMBOL_GPL(dpll_pin_on_pin_register); + +/** + * dpll_pin_on_pin_unregister - unregister dpll pin from a parent pin + * @parent: pointer to a parent pin + * @pin: pointer to a pin + * @ops: ops for a dpll pin + * @priv: pointer to private information of owner + * + * Context: Acquires a lock (dpll_lock) + * Note: It does not free the memory + */ +void dpll_pin_on_pin_unregister(struct dpll_pin *parent, struct dpll_pin *pin, + const struct dpll_pin_ops *ops, void *priv) +{ + struct dpll_pin_ref *ref; + unsigned long i; + + mutex_lock(&dpll_lock); + dpll_xa_ref_pin_del(&pin->parent_refs, parent, ops, priv); + refcount_dec(&pin->refcount); + xa_for_each(&pin->dpll_refs, i, ref) + __dpll_pin_unregister(ref->dpll, pin, ops, priv); + mutex_unlock(&dpll_lock); +} +EXPORT_SYMBOL_GPL(dpll_pin_on_pin_unregister); + +static struct dpll_device_registration * +dpll_device_registration_first(struct dpll_device *dpll) +{ + struct dpll_device_registration *reg; + + reg = list_first_entry_or_null((struct list_head *)&dpll->registration_list, + struct dpll_device_registration, list); + WARN_ON(!reg); + return reg; +} + +void *dpll_priv(struct dpll_device *dpll) +{ + struct dpll_device_registration *reg; + + reg = dpll_device_registration_first(dpll); + return reg->priv; +} + +const struct dpll_device_ops *dpll_device_ops(struct dpll_device *dpll) +{ + struct dpll_device_registration *reg; + + reg = dpll_device_registration_first(dpll); + return reg->ops; +} + +static struct dpll_pin_registration * +dpll_pin_registration_first(struct dpll_pin_ref *ref) +{ + struct dpll_pin_registration *reg; + + reg = list_first_entry_or_null(&ref->registration_list, + struct dpll_pin_registration, list); + WARN_ON(!reg); + return reg; +} + +void *dpll_pin_on_dpll_priv(struct dpll_device *dpll, + struct dpll_pin *pin) +{ + struct dpll_pin_registration *reg; + struct dpll_pin_ref *ref; + + ref = xa_load(&dpll->pin_refs, pin->pin_idx); + if (!ref) + return NULL; + reg = dpll_pin_registration_first(ref); + return reg->priv; +} + +void *dpll_pin_on_pin_priv(struct dpll_pin *parent, + struct dpll_pin *pin) +{ + struct dpll_pin_registration *reg; + struct dpll_pin_ref *ref; + + ref = xa_load(&pin->parent_refs, parent->pin_idx); + if (!ref) + return NULL; + reg = dpll_pin_registration_first(ref); + return reg->priv; +} + +const struct dpll_pin_ops *dpll_pin_ops(struct dpll_pin_ref *ref) +{ + struct dpll_pin_registration *reg; + + reg = dpll_pin_registration_first(ref); + return reg->ops; +} + +static int __init dpll_init(void) +{ + int ret; + + ret = genl_register_family(&dpll_nl_family); + if (ret) + goto error; + + return 0; + +error: + mutex_destroy(&dpll_lock); + return ret; +} + +static void __exit dpll_exit(void) +{ + genl_unregister_family(&dpll_nl_family); + mutex_destroy(&dpll_lock); +} + +subsys_initcall(dpll_init); +module_exit(dpll_exit); diff --git a/drivers/dpll/dpll_core.h b/drivers/dpll/dpll_core.h new file mode 100644 index 000000000000..5585873c5c1b --- /dev/null +++ b/drivers/dpll/dpll_core.h @@ -0,0 +1,89 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2023 Meta Platforms, Inc. and affiliates + * Copyright (c) 2023 Intel and affiliates + */ + +#ifndef __DPLL_CORE_H__ +#define __DPLL_CORE_H__ + +#include +#include +#include +#include "dpll_nl.h" + +#define DPLL_REGISTERED XA_MARK_1 + +/** + * struct dpll_device - stores DPLL device internal data + * @id: unique id number for device given by dpll subsystem + * @device_idx: id given by dev driver + * @clock_id: unique identifier (clock_id) of a dpll + * @module: module of creator + * @type: type of a dpll + * @pin_refs: stores pins registered within a dpll + * @refcount: refcount + * @registration_list: list of registered ops and priv data of dpll owners + **/ +struct dpll_device { + u32 id; + u32 device_idx; + u64 clock_id; + struct module *module; + enum dpll_type type; + struct xarray pin_refs; + refcount_t refcount; + struct list_head registration_list; +}; + +/** + * struct dpll_pin - structure for a dpll pin + * @id: unique id number for pin given by dpll subsystem + * @pin_idx: index of a pin given by dev driver + * @clock_id: clock_id of creator + * @module: module of creator + * @dpll_refs: hold referencees to dplls pin was registered with + * @parent_refs: hold references to parent pins pin was registered with + * @prop: pointer to pin properties given by registerer + * @rclk_dev_name: holds name of device when pin can recover clock from it + * @refcount: refcount + **/ +struct dpll_pin { + u32 id; + u32 pin_idx; + u64 clock_id; + struct module *module; + struct xarray dpll_refs; + struct xarray parent_refs; + const struct dpll_pin_properties *prop; + refcount_t refcount; +}; + +/** + * struct dpll_pin_ref - structure for referencing either dpll or pins + * @dpll: pointer to a dpll + * @pin: pointer to a pin + * @registration_list: list of ops and priv data registered with the ref + * @refcount: refcount + **/ +struct dpll_pin_ref { + union { + struct dpll_device *dpll; + struct dpll_pin *pin; + }; + struct list_head registration_list; + refcount_t refcount; +}; + +void *dpll_priv(struct dpll_device *dpll); +void *dpll_pin_on_dpll_priv(struct dpll_device *dpll, struct dpll_pin *pin); +void *dpll_pin_on_pin_priv(struct dpll_pin *parent, struct dpll_pin *pin); + +const struct dpll_device_ops *dpll_device_ops(struct dpll_device *dpll); +struct dpll_device *dpll_device_get_by_id(int id); +const struct dpll_pin_ops *dpll_pin_ops(struct dpll_pin_ref *ref); +struct dpll_pin_ref *dpll_xa_ref_dpll_first(struct xarray *xa_refs); +extern struct xarray dpll_device_xa; +extern struct xarray dpll_pin_xa; +extern struct mutex dpll_lock; +#endif diff --git a/include/linux/dpll.h b/include/linux/dpll.h new file mode 100644 index 000000000000..b47c3560b937 --- /dev/null +++ b/include/linux/dpll.h @@ -0,0 +1,133 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2023 Meta Platforms, Inc. and affiliates + * Copyright (c) 2023 Intel and affiliates + */ + +#ifndef __DPLL_H__ +#define __DPLL_H__ + +#include +#include +#include + +struct dpll_device; +struct dpll_pin; + +struct dpll_device_ops { + int (*mode_get)(const struct dpll_device *dpll, void *dpll_priv, + enum dpll_mode *mode, struct netlink_ext_ack *extack); + bool (*mode_supported)(const struct dpll_device *dpll, void *dpll_priv, + const enum dpll_mode mode, + struct netlink_ext_ack *extack); + int (*lock_status_get)(const struct dpll_device *dpll, void *dpll_priv, + enum dpll_lock_status *status, + struct netlink_ext_ack *extack); + int (*temp_get)(const struct dpll_device *dpll, void *dpll_priv, + s32 *temp, struct netlink_ext_ack *extack); +}; + +struct dpll_pin_ops { + int (*frequency_set)(const struct dpll_pin *pin, void *pin_priv, + const struct dpll_device *dpll, void *dpll_priv, + const u64 frequency, + struct netlink_ext_ack *extack); + int (*frequency_get)(const struct dpll_pin *pin, void *pin_priv, + const struct dpll_device *dpll, void *dpll_priv, + u64 *frequency, struct netlink_ext_ack *extack); + int (*direction_set)(const struct dpll_pin *pin, void *pin_priv, + const struct dpll_device *dpll, void *dpll_priv, + const enum dpll_pin_direction direction, + struct netlink_ext_ack *extack); + int (*direction_get)(const struct dpll_pin *pin, void *pin_priv, + const struct dpll_device *dpll, void *dpll_priv, + enum dpll_pin_direction *direction, + struct netlink_ext_ack *extack); + int (*state_on_pin_get)(const struct dpll_pin *pin, void *pin_priv, + const struct dpll_pin *parent_pin, + void *parent_pin_priv, + enum dpll_pin_state *state, + struct netlink_ext_ack *extack); + int (*state_on_dpll_get)(const struct dpll_pin *pin, void *pin_priv, + const struct dpll_device *dpll, + void *dpll_priv, enum dpll_pin_state *state, + struct netlink_ext_ack *extack); + int (*state_on_pin_set)(const struct dpll_pin *pin, void *pin_priv, + const struct dpll_pin *parent_pin, + void *parent_pin_priv, + const enum dpll_pin_state state, + struct netlink_ext_ack *extack); + int (*state_on_dpll_set)(const struct dpll_pin *pin, void *pin_priv, + const struct dpll_device *dpll, + void *dpll_priv, + const enum dpll_pin_state state, + struct netlink_ext_ack *extack); + int (*prio_get)(const struct dpll_pin *pin, void *pin_priv, + const struct dpll_device *dpll, void *dpll_priv, + u32 *prio, struct netlink_ext_ack *extack); + int (*prio_set)(const struct dpll_pin *pin, void *pin_priv, + const struct dpll_device *dpll, void *dpll_priv, + const u32 prio, struct netlink_ext_ack *extack); +}; + +struct dpll_pin_frequency { + u64 min; + u64 max; +}; + +#define DPLL_PIN_FREQUENCY_RANGE(_min, _max) \ + { \ + .min = _min, \ + .max = _max, \ + } + +#define DPLL_PIN_FREQUENCY(_val) DPLL_PIN_FREQUENCY_RANGE(_val, _val) +#define DPLL_PIN_FREQUENCY_1PPS \ + DPLL_PIN_FREQUENCY(DPLL_PIN_FREQUENCY_1_HZ) +#define DPLL_PIN_FREQUENCY_10MHZ \ + DPLL_PIN_FREQUENCY(DPLL_PIN_FREQUENCY_10_MHZ) +#define DPLL_PIN_FREQUENCY_IRIG_B \ + DPLL_PIN_FREQUENCY(DPLL_PIN_FREQUENCY_10_KHZ) +#define DPLL_PIN_FREQUENCY_DCF77 \ + DPLL_PIN_FREQUENCY(DPLL_PIN_FREQUENCY_77_5_KHZ) + +struct dpll_pin_properties { + const char *board_label; + const char *panel_label; + const char *package_label; + enum dpll_pin_type type; + unsigned long capabilities; + u32 freq_supported_num; + struct dpll_pin_frequency *freq_supported; +}; + +struct dpll_device * +dpll_device_get(u64 clock_id, u32 dev_driver_id, struct module *module); + +void dpll_device_put(struct dpll_device *dpll); + +int dpll_device_register(struct dpll_device *dpll, enum dpll_type type, + const struct dpll_device_ops *ops, void *priv); + +void dpll_device_unregister(struct dpll_device *dpll, + const struct dpll_device_ops *ops, void *priv); + +struct dpll_pin * +dpll_pin_get(u64 clock_id, u32 dev_driver_id, struct module *module, + const struct dpll_pin_properties *prop); + +int dpll_pin_register(struct dpll_device *dpll, struct dpll_pin *pin, + const struct dpll_pin_ops *ops, void *priv); + +void dpll_pin_unregister(struct dpll_device *dpll, struct dpll_pin *pin, + const struct dpll_pin_ops *ops, void *priv); + +void dpll_pin_put(struct dpll_pin *pin); + +int dpll_pin_on_pin_register(struct dpll_pin *parent, struct dpll_pin *pin, + const struct dpll_pin_ops *ops, void *priv); + +void dpll_pin_on_pin_unregister(struct dpll_pin *parent, struct dpll_pin *pin, + const struct dpll_pin_ops *ops, void *priv); + +#endif -- cgit v1.2.3 From 9d71b54b65b1fb6c0d3a6c5c88ba9b915c783fbc Mon Sep 17 00:00:00 2001 From: Vadim Fedorenko Date: Wed, 13 Sep 2023 21:49:38 +0100 Subject: dpll: netlink: Add DPLL framework base functions DPLL framework is used to represent and configure DPLL devices in systems. Each device that has DPLL and can configure inputs and outputs can use this framework. Implement dpll netlink framework functions for enablement of dpll subsystem netlink family. Co-developed-by: Milena Olech Signed-off-by: Milena Olech Co-developed-by: Michal Michalik Signed-off-by: Michal Michalik Signed-off-by: Vadim Fedorenko Co-developed-by: Arkadiusz Kubalewski Signed-off-by: Arkadiusz Kubalewski Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/dpll/dpll_core.c | 11 +- drivers/dpll/dpll_netlink.c | 1241 +++++++++++++++++++++++++++++++++++++++++++ drivers/dpll/dpll_netlink.h | 13 + include/linux/dpll.h | 4 + 4 files changed, 1268 insertions(+), 1 deletion(-) create mode 100644 drivers/dpll/dpll_netlink.c create mode 100644 drivers/dpll/dpll_netlink.h (limited to 'include/linux') diff --git a/drivers/dpll/dpll_core.c b/drivers/dpll/dpll_core.c index 6449ba6a383b..3568149b9562 100644 --- a/drivers/dpll/dpll_core.c +++ b/drivers/dpll/dpll_core.c @@ -14,6 +14,7 @@ #include #include "dpll_core.h" +#include "dpll_netlink.h" /* Mutex lock to protect DPLL subsystem devices and pins */ DEFINE_MUTEX(dpll_lock); @@ -381,6 +382,7 @@ int dpll_device_register(struct dpll_device *dpll, enum dpll_type type, } xa_set_mark(&dpll_device_xa, dpll->id, DPLL_REGISTERED); + dpll_device_create_ntf(dpll); mutex_unlock(&dpll_lock); return 0; @@ -404,6 +406,7 @@ void dpll_device_unregister(struct dpll_device *dpll, mutex_lock(&dpll_lock); ASSERT_DPLL_REGISTERED(dpll); + dpll_device_delete_ntf(dpll); reg = dpll_device_registration_find(dpll, ops, priv); if (WARN_ON(!reg)) { mutex_unlock(&dpll_lock); @@ -528,6 +531,7 @@ __dpll_pin_register(struct dpll_device *dpll, struct dpll_pin *pin, if (ret) goto ref_pin_del; xa_set_mark(&dpll_pin_xa, pin->id, DPLL_REGISTERED); + dpll_pin_create_ntf(pin); return ret; @@ -602,6 +606,7 @@ void dpll_pin_unregister(struct dpll_device *dpll, struct dpll_pin *pin, return; mutex_lock(&dpll_lock); + dpll_pin_delete_ntf(pin); __dpll_pin_unregister(dpll, pin, ops, priv); mutex_unlock(&dpll_lock); } @@ -650,6 +655,7 @@ int dpll_pin_on_pin_register(struct dpll_pin *parent, struct dpll_pin *pin, stop = i; goto dpll_unregister; } + dpll_pin_create_ntf(pin); } mutex_unlock(&dpll_lock); @@ -657,8 +663,10 @@ int dpll_pin_on_pin_register(struct dpll_pin *parent, struct dpll_pin *pin, dpll_unregister: xa_for_each(&parent->dpll_refs, i, ref) - if (i < stop) + if (i < stop) { __dpll_pin_unregister(ref->dpll, pin, ops, priv); + dpll_pin_delete_ntf(pin); + } refcount_dec(&pin->refcount); dpll_xa_ref_pin_del(&pin->parent_refs, parent, ops, priv); unlock: @@ -684,6 +692,7 @@ void dpll_pin_on_pin_unregister(struct dpll_pin *parent, struct dpll_pin *pin, unsigned long i; mutex_lock(&dpll_lock); + dpll_pin_delete_ntf(pin); dpll_xa_ref_pin_del(&pin->parent_refs, parent, ops, priv); refcount_dec(&pin->refcount); xa_for_each(&pin->dpll_refs, i, ref) diff --git a/drivers/dpll/dpll_netlink.c b/drivers/dpll/dpll_netlink.c new file mode 100644 index 000000000000..9464a6865977 --- /dev/null +++ b/drivers/dpll/dpll_netlink.c @@ -0,0 +1,1241 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Generic netlink for DPLL management framework + * + * Copyright (c) 2023 Meta Platforms, Inc. and affiliates + * Copyright (c) 2023 Intel and affiliates + * + */ +#include +#include +#include +#include "dpll_core.h" +#include "dpll_netlink.h" +#include "dpll_nl.h" +#include + +#define ASSERT_NOT_NULL(ptr) (WARN_ON(!ptr)) + +#define xa_for_each_marked_start(xa, index, entry, filter, start) \ + for (index = start, entry = xa_find(xa, &index, ULONG_MAX, filter); \ + entry; entry = xa_find_after(xa, &index, ULONG_MAX, filter)) + +struct dpll_dump_ctx { + unsigned long idx; +}; + +static struct dpll_dump_ctx *dpll_dump_context(struct netlink_callback *cb) +{ + return (struct dpll_dump_ctx *)cb->ctx; +} + +static int +dpll_msg_add_dev_handle(struct sk_buff *msg, struct dpll_device *dpll) +{ + if (nla_put_u32(msg, DPLL_A_ID, dpll->id)) + return -EMSGSIZE; + + return 0; +} + +static int +dpll_msg_add_dev_parent_handle(struct sk_buff *msg, u32 id) +{ + if (nla_put_u32(msg, DPLL_A_PIN_PARENT_ID, id)) + return -EMSGSIZE; + + return 0; +} + +/** + * dpll_msg_add_pin_handle - attach pin handle attribute to a given message + * @msg: pointer to sk_buff message to attach a pin handle + * @pin: pin pointer + * + * Return: + * * 0 - success + * * -EMSGSIZE - no space in message to attach pin handle + */ +static int +dpll_msg_add_pin_handle(struct sk_buff *msg, struct dpll_pin *pin) +{ + if (!pin) + return 0; + if (nla_put_u32(msg, DPLL_A_PIN_ID, pin->id)) + return -EMSGSIZE; + return 0; +} + +static int +dpll_msg_add_mode(struct sk_buff *msg, struct dpll_device *dpll, + struct netlink_ext_ack *extack) +{ + const struct dpll_device_ops *ops = dpll_device_ops(dpll); + enum dpll_mode mode; + int ret; + + ret = ops->mode_get(dpll, dpll_priv(dpll), &mode, extack); + if (ret) + return ret; + if (nla_put_u32(msg, DPLL_A_MODE, mode)) + return -EMSGSIZE; + + return 0; +} + +static int +dpll_msg_add_mode_supported(struct sk_buff *msg, struct dpll_device *dpll, + struct netlink_ext_ack *extack) +{ + const struct dpll_device_ops *ops = dpll_device_ops(dpll); + enum dpll_mode mode; + + if (!ops->mode_supported) + return 0; + for (mode = DPLL_MODE_MANUAL; mode <= DPLL_MODE_MAX; mode++) + if (ops->mode_supported(dpll, dpll_priv(dpll), mode, extack)) + if (nla_put_u32(msg, DPLL_A_MODE_SUPPORTED, mode)) + return -EMSGSIZE; + + return 0; +} + +static int +dpll_msg_add_lock_status(struct sk_buff *msg, struct dpll_device *dpll, + struct netlink_ext_ack *extack) +{ + const struct dpll_device_ops *ops = dpll_device_ops(dpll); + enum dpll_lock_status status; + int ret; + + ret = ops->lock_status_get(dpll, dpll_priv(dpll), &status, extack); + if (ret) + return ret; + if (nla_put_u32(msg, DPLL_A_LOCK_STATUS, status)) + return -EMSGSIZE; + + return 0; +} + +static int +dpll_msg_add_temp(struct sk_buff *msg, struct dpll_device *dpll, + struct netlink_ext_ack *extack) +{ + const struct dpll_device_ops *ops = dpll_device_ops(dpll); + s32 temp; + int ret; + + if (!ops->temp_get) + return 0; + ret = ops->temp_get(dpll, dpll_priv(dpll), &temp, extack); + if (ret) + return ret; + if (nla_put_s32(msg, DPLL_A_TEMP, temp)) + return -EMSGSIZE; + + return 0; +} + +static int +dpll_msg_add_pin_prio(struct sk_buff *msg, struct dpll_pin *pin, + struct dpll_pin_ref *ref, + struct netlink_ext_ack *extack) +{ + const struct dpll_pin_ops *ops = dpll_pin_ops(ref); + struct dpll_device *dpll = ref->dpll; + u32 prio; + int ret; + + if (!ops->prio_get) + return 0; + ret = ops->prio_get(pin, dpll_pin_on_dpll_priv(dpll, pin), dpll, + dpll_priv(dpll), &prio, extack); + if (ret) + return ret; + if (nla_put_u32(msg, DPLL_A_PIN_PRIO, prio)) + return -EMSGSIZE; + + return 0; +} + +static int +dpll_msg_add_pin_on_dpll_state(struct sk_buff *msg, struct dpll_pin *pin, + struct dpll_pin_ref *ref, + struct netlink_ext_ack *extack) +{ + const struct dpll_pin_ops *ops = dpll_pin_ops(ref); + struct dpll_device *dpll = ref->dpll; + enum dpll_pin_state state; + int ret; + + if (!ops->state_on_dpll_get) + return 0; + ret = ops->state_on_dpll_get(pin, dpll_pin_on_dpll_priv(dpll, pin), + dpll, dpll_priv(dpll), &state, extack); + if (ret) + return ret; + if (nla_put_u32(msg, DPLL_A_PIN_STATE, state)) + return -EMSGSIZE; + + return 0; +} + +static int +dpll_msg_add_pin_direction(struct sk_buff *msg, struct dpll_pin *pin, + struct dpll_pin_ref *ref, + struct netlink_ext_ack *extack) +{ + const struct dpll_pin_ops *ops = dpll_pin_ops(ref); + struct dpll_device *dpll = ref->dpll; + enum dpll_pin_direction direction; + int ret; + + ret = ops->direction_get(pin, dpll_pin_on_dpll_priv(dpll, pin), dpll, + dpll_priv(dpll), &direction, extack); + if (ret) + return ret; + if (nla_put_u32(msg, DPLL_A_PIN_DIRECTION, direction)) + return -EMSGSIZE; + + return 0; +} + +static int +dpll_msg_add_pin_freq(struct sk_buff *msg, struct dpll_pin *pin, + struct dpll_pin_ref *ref, struct netlink_ext_ack *extack) +{ + const struct dpll_pin_ops *ops = dpll_pin_ops(ref); + struct dpll_device *dpll = ref->dpll; + struct nlattr *nest; + int fs, ret; + u64 freq; + + if (!ops->frequency_get) + return 0; + ret = ops->frequency_get(pin, dpll_pin_on_dpll_priv(dpll, pin), dpll, + dpll_priv(dpll), &freq, extack); + if (ret) + return ret; + if (nla_put_64bit(msg, DPLL_A_PIN_FREQUENCY, sizeof(freq), &freq, + DPLL_A_PIN_PAD)) + return -EMSGSIZE; + for (fs = 0; fs < pin->prop->freq_supported_num; fs++) { + nest = nla_nest_start(msg, DPLL_A_PIN_FREQUENCY_SUPPORTED); + if (!nest) + return -EMSGSIZE; + freq = pin->prop->freq_supported[fs].min; + if (nla_put_64bit(msg, DPLL_A_PIN_FREQUENCY_MIN, sizeof(freq), + &freq, DPLL_A_PIN_PAD)) { + nla_nest_cancel(msg, nest); + return -EMSGSIZE; + } + freq = pin->prop->freq_supported[fs].max; + if (nla_put_64bit(msg, DPLL_A_PIN_FREQUENCY_MAX, sizeof(freq), + &freq, DPLL_A_PIN_PAD)) { + nla_nest_cancel(msg, nest); + return -EMSGSIZE; + } + nla_nest_end(msg, nest); + } + + return 0; +} + +static bool dpll_pin_is_freq_supported(struct dpll_pin *pin, u32 freq) +{ + int fs; + + for (fs = 0; fs < pin->prop->freq_supported_num; fs++) + if (freq >= pin->prop->freq_supported[fs].min && + freq <= pin->prop->freq_supported[fs].max) + return true; + return false; +} + +static int +dpll_msg_add_pin_parents(struct sk_buff *msg, struct dpll_pin *pin, + struct dpll_pin_ref *dpll_ref, + struct netlink_ext_ack *extack) +{ + enum dpll_pin_state state; + struct dpll_pin_ref *ref; + struct dpll_pin *ppin; + struct nlattr *nest; + unsigned long index; + int ret; + + xa_for_each(&pin->parent_refs, index, ref) { + const struct dpll_pin_ops *ops = dpll_pin_ops(ref); + void *parent_priv; + + ppin = ref->pin; + parent_priv = dpll_pin_on_dpll_priv(dpll_ref->dpll, ppin); + ret = ops->state_on_pin_get(pin, + dpll_pin_on_pin_priv(ppin, pin), + ppin, parent_priv, &state, extack); + if (ret) + return ret; + nest = nla_nest_start(msg, DPLL_A_PIN_PARENT_PIN); + if (!nest) + return -EMSGSIZE; + ret = dpll_msg_add_dev_parent_handle(msg, ppin->id); + if (ret) + goto nest_cancel; + if (nla_put_u32(msg, DPLL_A_PIN_STATE, state)) { + ret = -EMSGSIZE; + goto nest_cancel; + } + nla_nest_end(msg, nest); + } + + return 0; + +nest_cancel: + nla_nest_cancel(msg, nest); + return ret; +} + +static int +dpll_msg_add_pin_dplls(struct sk_buff *msg, struct dpll_pin *pin, + struct netlink_ext_ack *extack) +{ + struct dpll_pin_ref *ref; + struct nlattr *attr; + unsigned long index; + int ret; + + xa_for_each(&pin->dpll_refs, index, ref) { + attr = nla_nest_start(msg, DPLL_A_PIN_PARENT_DEVICE); + if (!attr) + return -EMSGSIZE; + ret = dpll_msg_add_dev_parent_handle(msg, ref->dpll->id); + if (ret) + goto nest_cancel; + ret = dpll_msg_add_pin_on_dpll_state(msg, pin, ref, extack); + if (ret) + goto nest_cancel; + ret = dpll_msg_add_pin_prio(msg, pin, ref, extack); + if (ret) + goto nest_cancel; + ret = dpll_msg_add_pin_direction(msg, pin, ref, extack); + if (ret) + goto nest_cancel; + nla_nest_end(msg, attr); + } + + return 0; + +nest_cancel: + nla_nest_end(msg, attr); + return ret; +} + +static int +dpll_cmd_pin_get_one(struct sk_buff *msg, struct dpll_pin *pin, + struct netlink_ext_ack *extack) +{ + const struct dpll_pin_properties *prop = pin->prop; + struct dpll_pin_ref *ref; + int ret; + + ref = dpll_xa_ref_dpll_first(&pin->dpll_refs); + ASSERT_NOT_NULL(ref); + + ret = dpll_msg_add_pin_handle(msg, pin); + if (ret) + return ret; + if (nla_put_string(msg, DPLL_A_PIN_MODULE_NAME, + module_name(pin->module))) + return -EMSGSIZE; + if (nla_put_64bit(msg, DPLL_A_PIN_CLOCK_ID, sizeof(pin->clock_id), + &pin->clock_id, DPLL_A_PIN_PAD)) + return -EMSGSIZE; + if (prop->board_label && + nla_put_string(msg, DPLL_A_PIN_BOARD_LABEL, prop->board_label)) + return -EMSGSIZE; + if (prop->panel_label && + nla_put_string(msg, DPLL_A_PIN_PANEL_LABEL, prop->panel_label)) + return -EMSGSIZE; + if (prop->package_label && + nla_put_string(msg, DPLL_A_PIN_PACKAGE_LABEL, + prop->package_label)) + return -EMSGSIZE; + if (nla_put_u32(msg, DPLL_A_PIN_TYPE, prop->type)) + return -EMSGSIZE; + if (nla_put_u32(msg, DPLL_A_PIN_CAPABILITIES, prop->capabilities)) + return -EMSGSIZE; + ret = dpll_msg_add_pin_freq(msg, pin, ref, extack); + if (ret) + return ret; + if (xa_empty(&pin->parent_refs)) + ret = dpll_msg_add_pin_dplls(msg, pin, extack); + else + ret = dpll_msg_add_pin_parents(msg, pin, ref, extack); + + return ret; +} + +static int +dpll_device_get_one(struct dpll_device *dpll, struct sk_buff *msg, + struct netlink_ext_ack *extack) +{ + int ret; + + ret = dpll_msg_add_dev_handle(msg, dpll); + if (ret) + return ret; + if (nla_put_string(msg, DPLL_A_MODULE_NAME, module_name(dpll->module))) + return -EMSGSIZE; + if (nla_put_64bit(msg, DPLL_A_CLOCK_ID, sizeof(dpll->clock_id), + &dpll->clock_id, DPLL_A_PAD)) + return -EMSGSIZE; + ret = dpll_msg_add_temp(msg, dpll, extack); + if (ret) + return ret; + ret = dpll_msg_add_lock_status(msg, dpll, extack); + if (ret) + return ret; + ret = dpll_msg_add_mode(msg, dpll, extack); + if (ret) + return ret; + ret = dpll_msg_add_mode_supported(msg, dpll, extack); + if (ret) + return ret; + if (nla_put_u32(msg, DPLL_A_TYPE, dpll->type)) + return -EMSGSIZE; + + return ret; +} + +static int +dpll_device_event_send(enum dpll_cmd event, struct dpll_device *dpll) +{ + struct sk_buff *msg; + int ret = -ENOMEM; + void *hdr; + + if (WARN_ON(!xa_get_mark(&dpll_device_xa, dpll->id, DPLL_REGISTERED))) + return -ENODEV; + msg = genlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; + hdr = genlmsg_put(msg, 0, 0, &dpll_nl_family, 0, event); + if (!hdr) + goto err_free_msg; + ret = dpll_device_get_one(dpll, msg, NULL); + if (ret) + goto err_cancel_msg; + genlmsg_end(msg, hdr); + genlmsg_multicast(&dpll_nl_family, msg, 0, 0, GFP_KERNEL); + + return 0; + +err_cancel_msg: + genlmsg_cancel(msg, hdr); +err_free_msg: + nlmsg_free(msg); + + return ret; +} + +int dpll_device_create_ntf(struct dpll_device *dpll) +{ + return dpll_device_event_send(DPLL_CMD_DEVICE_CREATE_NTF, dpll); +} + +int dpll_device_delete_ntf(struct dpll_device *dpll) +{ + return dpll_device_event_send(DPLL_CMD_DEVICE_DELETE_NTF, dpll); +} + +static int +__dpll_device_change_ntf(struct dpll_device *dpll) +{ + return dpll_device_event_send(DPLL_CMD_DEVICE_CHANGE_NTF, dpll); +} + +/** + * dpll_device_change_ntf - notify that the dpll device has been changed + * @dpll: registered dpll pointer + * + * Context: acquires and holds a dpll_lock. + * Return: 0 if succeeds, error code otherwise. + */ +int dpll_device_change_ntf(struct dpll_device *dpll) +{ + int ret; + + mutex_lock(&dpll_lock); + ret = __dpll_device_change_ntf(dpll); + mutex_unlock(&dpll_lock); + + return ret; +} +EXPORT_SYMBOL_GPL(dpll_device_change_ntf); + +static int +dpll_pin_event_send(enum dpll_cmd event, struct dpll_pin *pin) +{ + struct sk_buff *msg; + int ret = -ENOMEM; + void *hdr; + + if (WARN_ON(!xa_get_mark(&dpll_pin_xa, pin->id, DPLL_REGISTERED))) + return -ENODEV; + + msg = genlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; + + hdr = genlmsg_put(msg, 0, 0, &dpll_nl_family, 0, event); + if (!hdr) + goto err_free_msg; + ret = dpll_cmd_pin_get_one(msg, pin, NULL); + if (ret) + goto err_cancel_msg; + genlmsg_end(msg, hdr); + genlmsg_multicast(&dpll_nl_family, msg, 0, 0, GFP_KERNEL); + + return 0; + +err_cancel_msg: + genlmsg_cancel(msg, hdr); +err_free_msg: + nlmsg_free(msg); + + return ret; +} + +int dpll_pin_create_ntf(struct dpll_pin *pin) +{ + return dpll_pin_event_send(DPLL_CMD_PIN_CREATE_NTF, pin); +} + +int dpll_pin_delete_ntf(struct dpll_pin *pin) +{ + return dpll_pin_event_send(DPLL_CMD_PIN_DELETE_NTF, pin); +} + +static int __dpll_pin_change_ntf(struct dpll_pin *pin) +{ + return dpll_pin_event_send(DPLL_CMD_PIN_CHANGE_NTF, pin); +} + +/** + * dpll_pin_change_ntf - notify that the pin has been changed + * @pin: registered pin pointer + * + * Context: acquires and holds a dpll_lock. + * Return: 0 if succeeds, error code otherwise. + */ +int dpll_pin_change_ntf(struct dpll_pin *pin) +{ + int ret; + + mutex_lock(&dpll_lock); + ret = __dpll_pin_change_ntf(pin); + mutex_unlock(&dpll_lock); + + return ret; +} +EXPORT_SYMBOL_GPL(dpll_pin_change_ntf); + +static int +dpll_pin_freq_set(struct dpll_pin *pin, struct nlattr *a, + struct netlink_ext_ack *extack) +{ + u64 freq = nla_get_u64(a); + struct dpll_pin_ref *ref; + unsigned long i; + int ret; + + if (!dpll_pin_is_freq_supported(pin, freq)) { + NL_SET_ERR_MSG_ATTR(extack, a, "frequency is not supported by the device"); + return -EINVAL; + } + + xa_for_each(&pin->dpll_refs, i, ref) { + const struct dpll_pin_ops *ops = dpll_pin_ops(ref); + struct dpll_device *dpll = ref->dpll; + + if (!ops->frequency_set) + return -EOPNOTSUPP; + ret = ops->frequency_set(pin, dpll_pin_on_dpll_priv(dpll, pin), + dpll, dpll_priv(dpll), freq, extack); + if (ret) + return ret; + } + __dpll_pin_change_ntf(pin); + + return 0; +} + +static int +dpll_pin_on_pin_state_set(struct dpll_pin *pin, u32 parent_idx, + enum dpll_pin_state state, + struct netlink_ext_ack *extack) +{ + struct dpll_pin_ref *parent_ref; + const struct dpll_pin_ops *ops; + struct dpll_pin_ref *dpll_ref; + void *pin_priv, *parent_priv; + struct dpll_pin *parent; + unsigned long i; + int ret; + + if (!(DPLL_PIN_CAPABILITIES_STATE_CAN_CHANGE & + pin->prop->capabilities)) { + NL_SET_ERR_MSG(extack, "state changing is not allowed"); + return -EOPNOTSUPP; + } + parent = xa_load(&dpll_pin_xa, parent_idx); + if (!parent) + return -EINVAL; + parent_ref = xa_load(&pin->parent_refs, parent->pin_idx); + if (!parent_ref) + return -EINVAL; + xa_for_each(&parent->dpll_refs, i, dpll_ref) { + ops = dpll_pin_ops(parent_ref); + if (!ops->state_on_pin_set) + return -EOPNOTSUPP; + pin_priv = dpll_pin_on_pin_priv(parent, pin); + parent_priv = dpll_pin_on_dpll_priv(dpll_ref->dpll, parent); + ret = ops->state_on_pin_set(pin, pin_priv, parent, parent_priv, + state, extack); + if (ret) + return ret; + } + __dpll_pin_change_ntf(pin); + + return 0; +} + +static int +dpll_pin_state_set(struct dpll_device *dpll, struct dpll_pin *pin, + enum dpll_pin_state state, + struct netlink_ext_ack *extack) +{ + const struct dpll_pin_ops *ops; + struct dpll_pin_ref *ref; + int ret; + + if (!(DPLL_PIN_CAPABILITIES_STATE_CAN_CHANGE & + pin->prop->capabilities)) { + NL_SET_ERR_MSG(extack, "state changing is not allowed"); + return -EOPNOTSUPP; + } + ref = xa_load(&pin->dpll_refs, dpll->id); + ASSERT_NOT_NULL(ref); + ops = dpll_pin_ops(ref); + if (!ops->state_on_dpll_set) + return -EOPNOTSUPP; + ret = ops->state_on_dpll_set(pin, dpll_pin_on_dpll_priv(dpll, pin), + dpll, dpll_priv(dpll), state, extack); + if (ret) + return ret; + __dpll_pin_change_ntf(pin); + + return 0; +} + +static int +dpll_pin_prio_set(struct dpll_device *dpll, struct dpll_pin *pin, + u32 prio, struct netlink_ext_ack *extack) +{ + const struct dpll_pin_ops *ops; + struct dpll_pin_ref *ref; + int ret; + + if (!(DPLL_PIN_CAPABILITIES_PRIORITY_CAN_CHANGE & + pin->prop->capabilities)) { + NL_SET_ERR_MSG(extack, "prio changing is not allowed"); + return -EOPNOTSUPP; + } + ref = xa_load(&pin->dpll_refs, dpll->id); + ASSERT_NOT_NULL(ref); + ops = dpll_pin_ops(ref); + if (!ops->prio_set) + return -EOPNOTSUPP; + ret = ops->prio_set(pin, dpll_pin_on_dpll_priv(dpll, pin), dpll, + dpll_priv(dpll), prio, extack); + if (ret) + return ret; + __dpll_pin_change_ntf(pin); + + return 0; +} + +static int +dpll_pin_direction_set(struct dpll_pin *pin, struct dpll_device *dpll, + enum dpll_pin_direction direction, + struct netlink_ext_ack *extack) +{ + const struct dpll_pin_ops *ops; + struct dpll_pin_ref *ref; + int ret; + + if (!(DPLL_PIN_CAPABILITIES_DIRECTION_CAN_CHANGE & + pin->prop->capabilities)) { + NL_SET_ERR_MSG(extack, "direction changing is not allowed"); + return -EOPNOTSUPP; + } + ref = xa_load(&pin->dpll_refs, dpll->id); + ASSERT_NOT_NULL(ref); + ops = dpll_pin_ops(ref); + if (!ops->direction_set) + return -EOPNOTSUPP; + ret = ops->direction_set(pin, dpll_pin_on_dpll_priv(dpll, pin), + dpll, dpll_priv(dpll), direction, extack); + if (ret) + return ret; + __dpll_pin_change_ntf(pin); + + return 0; +} + +static int +dpll_pin_parent_device_set(struct dpll_pin *pin, struct nlattr *parent_nest, + struct netlink_ext_ack *extack) +{ + struct nlattr *tb[DPLL_A_PIN_MAX + 1]; + enum dpll_pin_direction direction; + enum dpll_pin_state state; + struct dpll_pin_ref *ref; + struct dpll_device *dpll; + u32 pdpll_idx, prio; + int ret; + + nla_parse_nested(tb, DPLL_A_PIN_MAX, parent_nest, + dpll_pin_parent_device_nl_policy, extack); + if (!tb[DPLL_A_PIN_PARENT_ID]) { + NL_SET_ERR_MSG(extack, "device parent id expected"); + return -EINVAL; + } + pdpll_idx = nla_get_u32(tb[DPLL_A_PIN_PARENT_ID]); + dpll = xa_load(&dpll_device_xa, pdpll_idx); + if (!dpll) { + NL_SET_ERR_MSG(extack, "parent device not found"); + return -EINVAL; + } + ref = xa_load(&pin->dpll_refs, dpll->id); + if (!ref) { + NL_SET_ERR_MSG(extack, "pin not connected to given parent device"); + return -EINVAL; + } + if (tb[DPLL_A_PIN_STATE]) { + state = nla_get_u32(tb[DPLL_A_PIN_STATE]); + ret = dpll_pin_state_set(dpll, pin, state, extack); + if (ret) + return ret; + } + if (tb[DPLL_A_PIN_PRIO]) { + prio = nla_get_u32(tb[DPLL_A_PIN_PRIO]); + ret = dpll_pin_prio_set(dpll, pin, prio, extack); + if (ret) + return ret; + } + if (tb[DPLL_A_PIN_DIRECTION]) { + direction = nla_get_u32(tb[DPLL_A_PIN_DIRECTION]); + ret = dpll_pin_direction_set(pin, dpll, direction, extack); + if (ret) + return ret; + } + return 0; +} + +static int +dpll_pin_parent_pin_set(struct dpll_pin *pin, struct nlattr *parent_nest, + struct netlink_ext_ack *extack) +{ + struct nlattr *tb[DPLL_A_PIN_MAX + 1]; + enum dpll_pin_state state; + u32 ppin_idx; + int ret; + + nla_parse_nested(tb, DPLL_A_PIN_MAX, parent_nest, + dpll_pin_parent_pin_nl_policy, extack); + if (!tb[DPLL_A_PIN_PARENT_ID]) { + NL_SET_ERR_MSG(extack, "device parent id expected"); + return -EINVAL; + } + ppin_idx = nla_get_u32(tb[DPLL_A_PIN_PARENT_ID]); + state = nla_get_u32(tb[DPLL_A_PIN_STATE]); + ret = dpll_pin_on_pin_state_set(pin, ppin_idx, state, extack); + if (ret) + return ret; + + return 0; +} + +static int +dpll_pin_set_from_nlattr(struct dpll_pin *pin, struct genl_info *info) +{ + struct nlattr *a; + int rem, ret; + + nla_for_each_attr(a, genlmsg_data(info->genlhdr), + genlmsg_len(info->genlhdr), rem) { + switch (nla_type(a)) { + case DPLL_A_PIN_FREQUENCY: + ret = dpll_pin_freq_set(pin, a, info->extack); + if (ret) + return ret; + break; + case DPLL_A_PIN_PARENT_DEVICE: + ret = dpll_pin_parent_device_set(pin, a, info->extack); + if (ret) + return ret; + break; + case DPLL_A_PIN_PARENT_PIN: + ret = dpll_pin_parent_pin_set(pin, a, info->extack); + if (ret) + return ret; + break; + } + } + + return 0; +} + +static struct dpll_pin * +dpll_pin_find(u64 clock_id, struct nlattr *mod_name_attr, + enum dpll_pin_type type, struct nlattr *board_label, + struct nlattr *panel_label, struct nlattr *package_label, + struct netlink_ext_ack *extack) +{ + bool board_match, panel_match, package_match; + struct dpll_pin *pin_match = NULL, *pin; + const struct dpll_pin_properties *prop; + bool cid_match, mod_match, type_match; + unsigned long i; + + xa_for_each_marked(&dpll_pin_xa, i, pin, DPLL_REGISTERED) { + prop = pin->prop; + cid_match = clock_id ? pin->clock_id == clock_id : true; + mod_match = mod_name_attr && module_name(pin->module) ? + !nla_strcmp(mod_name_attr, + module_name(pin->module)) : true; + type_match = type ? prop->type == type : true; + board_match = board_label ? (prop->board_label ? + !nla_strcmp(board_label, prop->board_label) : false) : + true; + panel_match = panel_label ? (prop->panel_label ? + !nla_strcmp(panel_label, prop->panel_label) : false) : + true; + package_match = package_label ? (prop->package_label ? + !nla_strcmp(package_label, prop->package_label) : + false) : true; + if (cid_match && mod_match && type_match && board_match && + panel_match && package_match) { + if (pin_match) { + NL_SET_ERR_MSG(extack, "multiple matches"); + return ERR_PTR(-EINVAL); + } + pin_match = pin; + }; + } + if (!pin_match) { + NL_SET_ERR_MSG(extack, "not found"); + return ERR_PTR(-ENODEV); + } + return pin_match; +} + +static struct dpll_pin *dpll_pin_find_from_nlattr(struct genl_info *info) +{ + struct nlattr *attr, *mod_name_attr = NULL, *board_label_attr = NULL, + *panel_label_attr = NULL, *package_label_attr = NULL; + enum dpll_pin_type type = 0; + u64 clock_id = 0; + int rem = 0; + + nla_for_each_attr(attr, genlmsg_data(info->genlhdr), + genlmsg_len(info->genlhdr), rem) { + switch (nla_type(attr)) { + case DPLL_A_PIN_CLOCK_ID: + if (clock_id) + goto duplicated_attr; + clock_id = nla_get_u64(attr); + break; + case DPLL_A_PIN_MODULE_NAME: + if (mod_name_attr) + goto duplicated_attr; + mod_name_attr = attr; + break; + case DPLL_A_PIN_TYPE: + if (type) + goto duplicated_attr; + type = nla_get_u32(attr); + break; + case DPLL_A_PIN_BOARD_LABEL: + if (board_label_attr) + goto duplicated_attr; + board_label_attr = attr; + break; + case DPLL_A_PIN_PANEL_LABEL: + if (panel_label_attr) + goto duplicated_attr; + panel_label_attr = attr; + break; + case DPLL_A_PIN_PACKAGE_LABEL: + if (package_label_attr) + goto duplicated_attr; + package_label_attr = attr; + break; + default: + break; + } + } + if (!(clock_id || mod_name_attr || board_label_attr || + panel_label_attr || package_label_attr)) { + NL_SET_ERR_MSG(info->extack, "missing attributes"); + return ERR_PTR(-EINVAL); + } + return dpll_pin_find(clock_id, mod_name_attr, type, board_label_attr, + panel_label_attr, package_label_attr, + info->extack); +duplicated_attr: + NL_SET_ERR_MSG(info->extack, "duplicated attribute"); + return ERR_PTR(-EINVAL); +} + +int dpll_nl_pin_id_get_doit(struct sk_buff *skb, struct genl_info *info) +{ + struct dpll_pin *pin; + struct sk_buff *msg; + struct nlattr *hdr; + int ret; + + msg = genlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; + hdr = genlmsg_put_reply(msg, info, &dpll_nl_family, 0, + DPLL_CMD_PIN_ID_GET); + if (!hdr) + return -EMSGSIZE; + + pin = dpll_pin_find_from_nlattr(info); + if (!IS_ERR(pin)) { + ret = dpll_msg_add_pin_handle(msg, pin); + if (ret) { + nlmsg_free(msg); + return ret; + } + } + genlmsg_end(msg, hdr); + + return genlmsg_reply(msg, info); +} + +int dpll_nl_pin_get_doit(struct sk_buff *skb, struct genl_info *info) +{ + struct dpll_pin *pin = info->user_ptr[0]; + struct sk_buff *msg; + struct nlattr *hdr; + int ret; + + if (!pin) + return -ENODEV; + msg = genlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; + hdr = genlmsg_put_reply(msg, info, &dpll_nl_family, 0, + DPLL_CMD_PIN_GET); + if (!hdr) + return -EMSGSIZE; + ret = dpll_cmd_pin_get_one(msg, pin, info->extack); + if (ret) { + nlmsg_free(msg); + return ret; + } + genlmsg_end(msg, hdr); + + return genlmsg_reply(msg, info); +} + +int dpll_nl_pin_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct dpll_dump_ctx *ctx = dpll_dump_context(cb); + struct dpll_pin *pin; + struct nlattr *hdr; + unsigned long i; + int ret = 0; + + xa_for_each_marked_start(&dpll_pin_xa, i, pin, DPLL_REGISTERED, + ctx->idx) { + hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, + &dpll_nl_family, NLM_F_MULTI, + DPLL_CMD_PIN_GET); + if (!hdr) { + ret = -EMSGSIZE; + break; + } + ret = dpll_cmd_pin_get_one(skb, pin, cb->extack); + if (ret) { + genlmsg_cancel(skb, hdr); + break; + } + genlmsg_end(skb, hdr); + } + if (ret == -EMSGSIZE) { + ctx->idx = i; + return skb->len; + } + return ret; +} + +int dpll_nl_pin_set_doit(struct sk_buff *skb, struct genl_info *info) +{ + struct dpll_pin *pin = info->user_ptr[0]; + + return dpll_pin_set_from_nlattr(pin, info); +} + +static struct dpll_device * +dpll_device_find(u64 clock_id, struct nlattr *mod_name_attr, + enum dpll_type type, struct netlink_ext_ack *extack) +{ + struct dpll_device *dpll_match = NULL, *dpll; + bool cid_match, mod_match, type_match; + unsigned long i; + + xa_for_each_marked(&dpll_device_xa, i, dpll, DPLL_REGISTERED) { + cid_match = clock_id ? dpll->clock_id == clock_id : true; + mod_match = mod_name_attr ? (module_name(dpll->module) ? + !nla_strcmp(mod_name_attr, + module_name(dpll->module)) : false) : true; + type_match = type ? dpll->type == type : true; + if (cid_match && mod_match && type_match) { + if (dpll_match) { + NL_SET_ERR_MSG(extack, "multiple matches"); + return ERR_PTR(-EINVAL); + } + dpll_match = dpll; + } + } + if (!dpll_match) { + NL_SET_ERR_MSG(extack, "not found"); + return ERR_PTR(-ENODEV); + } + + return dpll_match; +} + +static struct dpll_device * +dpll_device_find_from_nlattr(struct genl_info *info) +{ + struct nlattr *attr, *mod_name_attr = NULL; + enum dpll_type type = 0; + u64 clock_id = 0; + int rem = 0; + + nla_for_each_attr(attr, genlmsg_data(info->genlhdr), + genlmsg_len(info->genlhdr), rem) { + switch (nla_type(attr)) { + case DPLL_A_CLOCK_ID: + if (clock_id) + goto duplicated_attr; + clock_id = nla_get_u64(attr); + break; + case DPLL_A_MODULE_NAME: + if (mod_name_attr) + goto duplicated_attr; + mod_name_attr = attr; + break; + case DPLL_A_TYPE: + if (type) + goto duplicated_attr; + type = nla_get_u32(attr); + break; + default: + break; + } + } + if (!clock_id && !mod_name_attr && !type) { + NL_SET_ERR_MSG(info->extack, "missing attributes"); + return ERR_PTR(-EINVAL); + } + return dpll_device_find(clock_id, mod_name_attr, type, info->extack); +duplicated_attr: + NL_SET_ERR_MSG(info->extack, "duplicated attribute"); + return ERR_PTR(-EINVAL); +} + +int dpll_nl_device_id_get_doit(struct sk_buff *skb, struct genl_info *info) +{ + struct dpll_device *dpll; + struct sk_buff *msg; + struct nlattr *hdr; + int ret; + + msg = genlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; + hdr = genlmsg_put_reply(msg, info, &dpll_nl_family, 0, + DPLL_CMD_DEVICE_ID_GET); + if (!hdr) + return -EMSGSIZE; + + dpll = dpll_device_find_from_nlattr(info); + if (!IS_ERR(dpll)) { + ret = dpll_msg_add_dev_handle(msg, dpll); + if (ret) { + nlmsg_free(msg); + return ret; + } + } + genlmsg_end(msg, hdr); + + return genlmsg_reply(msg, info); +} + +int dpll_nl_device_get_doit(struct sk_buff *skb, struct genl_info *info) +{ + struct dpll_device *dpll = info->user_ptr[0]; + struct sk_buff *msg; + struct nlattr *hdr; + int ret; + + msg = genlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; + hdr = genlmsg_put_reply(msg, info, &dpll_nl_family, 0, + DPLL_CMD_DEVICE_GET); + if (!hdr) + return -EMSGSIZE; + + ret = dpll_device_get_one(dpll, msg, info->extack); + if (ret) { + nlmsg_free(msg); + return ret; + } + genlmsg_end(msg, hdr); + + return genlmsg_reply(msg, info); +} + +int dpll_nl_device_set_doit(struct sk_buff *skb, struct genl_info *info) +{ + /* placeholder for set command */ + return 0; +} + +int dpll_nl_device_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct dpll_dump_ctx *ctx = dpll_dump_context(cb); + struct dpll_device *dpll; + struct nlattr *hdr; + unsigned long i; + int ret = 0; + + xa_for_each_marked_start(&dpll_device_xa, i, dpll, DPLL_REGISTERED, + ctx->idx) { + hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, &dpll_nl_family, + NLM_F_MULTI, DPLL_CMD_DEVICE_GET); + if (!hdr) { + ret = -EMSGSIZE; + break; + } + ret = dpll_device_get_one(dpll, skb, cb->extack); + if (ret) { + genlmsg_cancel(skb, hdr); + break; + } + genlmsg_end(skb, hdr); + } + if (ret == -EMSGSIZE) { + ctx->idx = i; + return skb->len; + } + return ret; +} + +int dpll_pre_doit(const struct genl_split_ops *ops, struct sk_buff *skb, + struct genl_info *info) +{ + u32 id; + + if (GENL_REQ_ATTR_CHECK(info, DPLL_A_ID)) + return -EINVAL; + + mutex_lock(&dpll_lock); + id = nla_get_u32(info->attrs[DPLL_A_ID]); + info->user_ptr[0] = dpll_device_get_by_id(id); + if (!info->user_ptr[0]) { + NL_SET_ERR_MSG(info->extack, "device not found"); + goto unlock; + } + return 0; +unlock: + mutex_unlock(&dpll_lock); + return -ENODEV; +} + +void dpll_post_doit(const struct genl_split_ops *ops, struct sk_buff *skb, + struct genl_info *info) +{ + mutex_unlock(&dpll_lock); +} + +int +dpll_lock_doit(const struct genl_split_ops *ops, struct sk_buff *skb, + struct genl_info *info) +{ + mutex_lock(&dpll_lock); + + return 0; +} + +void +dpll_unlock_doit(const struct genl_split_ops *ops, struct sk_buff *skb, + struct genl_info *info) +{ + mutex_unlock(&dpll_lock); +} + +int dpll_lock_dumpit(struct netlink_callback *cb) +{ + mutex_lock(&dpll_lock); + + return 0; +} + +int dpll_unlock_dumpit(struct netlink_callback *cb) +{ + mutex_unlock(&dpll_lock); + + return 0; +} + +int dpll_pin_pre_doit(const struct genl_split_ops *ops, struct sk_buff *skb, + struct genl_info *info) +{ + int ret; + + mutex_lock(&dpll_lock); + if (GENL_REQ_ATTR_CHECK(info, DPLL_A_PIN_ID)) { + ret = -EINVAL; + goto unlock_dev; + } + info->user_ptr[0] = xa_load(&dpll_pin_xa, + nla_get_u32(info->attrs[DPLL_A_PIN_ID])); + if (!info->user_ptr[0]) { + NL_SET_ERR_MSG(info->extack, "pin not found"); + ret = -ENODEV; + goto unlock_dev; + } + + return 0; + +unlock_dev: + mutex_unlock(&dpll_lock); + return ret; +} + +void dpll_pin_post_doit(const struct genl_split_ops *ops, struct sk_buff *skb, + struct genl_info *info) +{ + mutex_unlock(&dpll_lock); +} diff --git a/drivers/dpll/dpll_netlink.h b/drivers/dpll/dpll_netlink.h new file mode 100644 index 000000000000..a9cfd55f57fc --- /dev/null +++ b/drivers/dpll/dpll_netlink.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2023 Meta Platforms, Inc. and affiliates + * Copyright (c) 2023 Intel and affiliates + */ + +int dpll_device_create_ntf(struct dpll_device *dpll); + +int dpll_device_delete_ntf(struct dpll_device *dpll); + +int dpll_pin_create_ntf(struct dpll_pin *pin); + +int dpll_pin_delete_ntf(struct dpll_pin *pin); diff --git a/include/linux/dpll.h b/include/linux/dpll.h index b47c3560b937..2202310c10cd 100644 --- a/include/linux/dpll.h +++ b/include/linux/dpll.h @@ -130,4 +130,8 @@ int dpll_pin_on_pin_register(struct dpll_pin *parent, struct dpll_pin *pin, void dpll_pin_on_pin_unregister(struct dpll_pin *parent, struct dpll_pin *pin, const struct dpll_pin_ops *ops, void *priv); +int dpll_device_change_ntf(struct dpll_device *dpll); + +int dpll_pin_change_ntf(struct dpll_pin *pin); + #endif -- cgit v1.2.3 From 5f18426928800c59fb0f9bc8fb0c182bb6f5ee24 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Wed, 13 Sep 2023 21:49:39 +0100 Subject: netdev: expose DPLL pin handle for netdevice In case netdevice represents a SyncE port, the user needs to understand the connection between netdevice and associated DPLL pin. There might me multiple netdevices pointing to the same pin, in case of VF/SF implementation. Add a IFLA Netlink attribute to nest the DPLL pin handle, similar to how it is implemented for devlink port. Add a struct dpll_pin pointer to netdev and protect access to it by RTNL. Expose netdev_dpll_pin_set() and netdev_dpll_pin_clear() helpers to the drivers so they can set/clear the DPLL pin relationship to netdev. Note that during the lifetime of struct dpll_pin the pin handle does not change. Therefore it is save to access it lockless. It is drivers responsibility to call netdev_dpll_pin_clear() before dpll_pin_put(). Signed-off-by: Jiri Pirko Signed-off-by: Arkadiusz Kubalewski Signed-off-by: Vadim Fedorenko Signed-off-by: David S. Miller --- drivers/dpll/dpll_netlink.c | 16 ++++++++++++++-- include/linux/dpll.h | 15 +++++++++++++++ include/linux/netdevice.h | 21 +++++++++++++++++++++ include/uapi/linux/if_link.h | 2 +- net/core/dev.c | 22 ++++++++++++++++++++++ net/core/rtnetlink.c | 36 ++++++++++++++++++++++++++++++++++++ 6 files changed, 109 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/drivers/dpll/dpll_netlink.c b/drivers/dpll/dpll_netlink.c index 9464a6865977..764437a0661b 100644 --- a/drivers/dpll/dpll_netlink.c +++ b/drivers/dpll/dpll_netlink.c @@ -47,6 +47,18 @@ dpll_msg_add_dev_parent_handle(struct sk_buff *msg, u32 id) return 0; } +/** + * dpll_msg_pin_handle_size - get size of pin handle attribute for given pin + * @pin: pin pointer + * + * Return: byte size of pin handle attribute for given pin. + */ +size_t dpll_msg_pin_handle_size(struct dpll_pin *pin) +{ + return pin ? nla_total_size(4) : 0; /* DPLL_A_PIN_ID */ +} +EXPORT_SYMBOL_GPL(dpll_msg_pin_handle_size); + /** * dpll_msg_add_pin_handle - attach pin handle attribute to a given message * @msg: pointer to sk_buff message to attach a pin handle @@ -56,8 +68,7 @@ dpll_msg_add_dev_parent_handle(struct sk_buff *msg, u32 id) * * 0 - success * * -EMSGSIZE - no space in message to attach pin handle */ -static int -dpll_msg_add_pin_handle(struct sk_buff *msg, struct dpll_pin *pin) +int dpll_msg_add_pin_handle(struct sk_buff *msg, struct dpll_pin *pin) { if (!pin) return 0; @@ -65,6 +76,7 @@ dpll_msg_add_pin_handle(struct sk_buff *msg, struct dpll_pin *pin) return -EMSGSIZE; return 0; } +EXPORT_SYMBOL_GPL(dpll_msg_add_pin_handle); static int dpll_msg_add_mode(struct sk_buff *msg, struct dpll_device *dpll, diff --git a/include/linux/dpll.h b/include/linux/dpll.h index 2202310c10cd..bbc480cd2932 100644 --- a/include/linux/dpll.h +++ b/include/linux/dpll.h @@ -101,6 +101,21 @@ struct dpll_pin_properties { struct dpll_pin_frequency *freq_supported; }; +#if IS_ENABLED(CONFIG_DPLL) +size_t dpll_msg_pin_handle_size(struct dpll_pin *pin); +int dpll_msg_add_pin_handle(struct sk_buff *msg, struct dpll_pin *pin); +#else +static inline size_t dpll_msg_pin_handle_size(struct dpll_pin *pin) +{ + return 0; +} + +static inline int dpll_msg_add_pin_handle(struct sk_buff *msg, struct dpll_pin *pin) +{ + return 0; +} +#endif + struct dpll_device * dpll_device_get(u64 clock_id, u32 dev_driver_id, struct module *module); diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 0896aaa91dd7..db3d8429d50d 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -79,6 +79,8 @@ struct xdp_buff; struct xdp_frame; struct xdp_metadata_ops; struct xdp_md; +/* DPLL specific */ +struct dpll_pin; typedef u32 xdp_features_t; @@ -2049,6 +2051,9 @@ enum netdev_ml_priv_type { * SET_NETDEV_DEVLINK_PORT macro. This pointer is static * during the time netdevice is registered. * + * @dpll_pin: Pointer to the SyncE source pin of a DPLL subsystem, + * where the clock is recovered. + * * FIXME: cleanup struct net_device such that network protocol info * moves out. */ @@ -2405,6 +2410,10 @@ struct net_device { struct rtnl_hw_stats64 *offload_xstats_l3; struct devlink_port *devlink_port; + +#if IS_ENABLED(CONFIG_DPLL) + struct dpll_pin *dpll_pin; +#endif }; #define to_net_dev(d) container_of(d, struct net_device, dev) @@ -3940,6 +3949,18 @@ int dev_get_mac_address(struct sockaddr *sa, struct net *net, char *dev_name); int dev_get_port_parent_id(struct net_device *dev, struct netdev_phys_item_id *ppid, bool recurse); bool netdev_port_same_parent_id(struct net_device *a, struct net_device *b); +void netdev_dpll_pin_set(struct net_device *dev, struct dpll_pin *dpll_pin); +void netdev_dpll_pin_clear(struct net_device *dev); + +static inline struct dpll_pin *netdev_dpll_pin(const struct net_device *dev) +{ +#if IS_ENABLED(CONFIG_DPLL) + return dev->dpll_pin; +#else + return NULL; +#endif +} + struct sk_buff *validate_xmit_skb_list(struct sk_buff *skb, struct net_device *dev, bool *again); struct sk_buff *dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, struct netdev_queue *txq, int *ret); diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index ce3117df9cec..fac351a93aed 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -376,7 +376,7 @@ enum { IFLA_GSO_IPV4_MAX_SIZE, IFLA_GRO_IPV4_MAX_SIZE, - + IFLA_DPLL_PIN, __IFLA_MAX }; diff --git a/net/core/dev.c b/net/core/dev.c index ccff2b6ef958..cc03a5758d2d 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -9023,6 +9023,28 @@ bool netdev_port_same_parent_id(struct net_device *a, struct net_device *b) } EXPORT_SYMBOL(netdev_port_same_parent_id); +static void netdev_dpll_pin_assign(struct net_device *dev, struct dpll_pin *dpll_pin) +{ +#if IS_ENABLED(CONFIG_DPLL) + rtnl_lock(); + dev->dpll_pin = dpll_pin; + rtnl_unlock(); +#endif +} + +void netdev_dpll_pin_set(struct net_device *dev, struct dpll_pin *dpll_pin) +{ + WARN_ON(!dpll_pin); + netdev_dpll_pin_assign(dev, dpll_pin); +} +EXPORT_SYMBOL(netdev_dpll_pin_set); + +void netdev_dpll_pin_clear(struct net_device *dev) +{ + netdev_dpll_pin_assign(dev, NULL); +} +EXPORT_SYMBOL(netdev_dpll_pin_clear); + /** * dev_change_proto_down - set carrier according to proto_down. * diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 4a2ec33bfb51..7452a6d190c5 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -57,6 +57,7 @@ #if IS_ENABLED(CONFIG_IPV6) #include #endif +#include #include "dev.h" @@ -1055,6 +1056,15 @@ static size_t rtnl_devlink_port_size(const struct net_device *dev) return size; } +static size_t rtnl_dpll_pin_size(const struct net_device *dev) +{ + size_t size = nla_total_size(0); /* nest IFLA_DPLL_PIN */ + + size += dpll_msg_pin_handle_size(netdev_dpll_pin(dev)); + + return size; +} + static noinline size_t if_nlmsg_size(const struct net_device *dev, u32 ext_filter_mask) { @@ -1111,6 +1121,7 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev, + rtnl_prop_list_size(dev) + nla_total_size(MAX_ADDR_LEN) /* IFLA_PERM_ADDRESS */ + rtnl_devlink_port_size(dev) + + rtnl_dpll_pin_size(dev) + 0; } @@ -1774,6 +1785,28 @@ nest_cancel: return ret; } +static int rtnl_fill_dpll_pin(struct sk_buff *skb, + const struct net_device *dev) +{ + struct nlattr *dpll_pin_nest; + int ret; + + dpll_pin_nest = nla_nest_start(skb, IFLA_DPLL_PIN); + if (!dpll_pin_nest) + return -EMSGSIZE; + + ret = dpll_msg_add_pin_handle(skb, netdev_dpll_pin(dev)); + if (ret < 0) + goto nest_cancel; + + nla_nest_end(skb, dpll_pin_nest); + return 0; + +nest_cancel: + nla_nest_cancel(skb, dpll_pin_nest); + return ret; +} + static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, struct net *src_net, int type, u32 pid, u32 seq, u32 change, @@ -1916,6 +1949,9 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, if (rtnl_fill_devlink_port(skb, dev)) goto nla_put_failure; + if (rtnl_fill_dpll_pin(skb, dev)) + goto nla_put_failure; + nlmsg_end(skb, nlh); return 0; -- cgit v1.2.3 From 496fd0a26bbf73b6b12407ee4fbe5ff49d659a6d Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Wed, 13 Sep 2023 21:49:43 +0100 Subject: mlx5: Implement SyncE support using DPLL infrastructure Implement SyncE support using newly introduced DPLL support. Make sure that each PFs/VFs/SFs probed with appropriate capability will spawn a dpll auxiliary device and register appropriate dpll device and pin instances. Signed-off-by: Jiri Pirko Signed-off-by: Arkadiusz Kubalewski Signed-off-by: Vadim Fedorenko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/Kconfig | 8 + drivers/net/ethernet/mellanox/mlx5/core/Makefile | 3 + drivers/net/ethernet/mellanox/mlx5/core/dev.c | 17 + drivers/net/ethernet/mellanox/mlx5/core/dpll.c | 432 +++++++++++++++++++++++ include/linux/mlx5/driver.h | 2 + include/linux/mlx5/mlx5_ifc.h | 59 +++- 6 files changed, 520 insertions(+), 1 deletion(-) create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/dpll.c (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig index c4f4de82e29e..685335832a93 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig +++ b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig @@ -189,3 +189,11 @@ config MLX5_SF_MANAGER port is managed through devlink. A subfunction supports RDMA, netdevice and vdpa device. It is similar to a SRIOV VF but it doesn't require SRIOV support. + +config MLX5_DPLL + tristate "Mellanox 5th generation network adapters (ConnectX series) DPLL support" + depends on NETDEVICES && ETHERNET && PCI && MLX5_CORE + select DPLL + help + DPLL support in Mellanox Technologies ConnectX NICs. + diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index 7e94caca4888..c44870b175f9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -128,3 +128,6 @@ mlx5_core-$(CONFIG_MLX5_SF) += sf/vhca_event.o sf/dev/dev.o sf/dev/driver.o irq_ # SF manager # mlx5_core-$(CONFIG_MLX5_SF_MANAGER) += sf/cmd.o sf/hw_table.o sf/devlink.o + +obj-$(CONFIG_MLX5_DPLL) += mlx5_dpll.o +mlx5_dpll-y := dpll.o diff --git a/drivers/net/ethernet/mellanox/mlx5/core/dev.c b/drivers/net/ethernet/mellanox/mlx5/core/dev.c index 7909f378dc93..1fc03480c2ff 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/dev.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/dev.c @@ -206,6 +206,19 @@ static bool is_ib_enabled(struct mlx5_core_dev *dev) return err ? false : val.vbool; } +static bool is_dpll_supported(struct mlx5_core_dev *dev) +{ + if (!IS_ENABLED(CONFIG_MLX5_DPLL)) + return false; + + if (!MLX5_CAP_MCAM_REG2(dev, synce_registers)) { + mlx5_core_warn(dev, "Missing SyncE capability\n"); + return false; + } + + return true; +} + enum { MLX5_INTERFACE_PROTOCOL_ETH, MLX5_INTERFACE_PROTOCOL_ETH_REP, @@ -215,6 +228,8 @@ enum { MLX5_INTERFACE_PROTOCOL_MPIB, MLX5_INTERFACE_PROTOCOL_VNET, + + MLX5_INTERFACE_PROTOCOL_DPLL, }; static const struct mlx5_adev_device { @@ -237,6 +252,8 @@ static const struct mlx5_adev_device { .is_supported = &is_ib_rep_supported }, [MLX5_INTERFACE_PROTOCOL_MPIB] = { .suffix = "multiport", .is_supported = &is_mp_supported }, + [MLX5_INTERFACE_PROTOCOL_DPLL] = { .suffix = "dpll", + .is_supported = &is_dpll_supported }, }; int mlx5_adev_idx_alloc(void) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/dpll.c b/drivers/net/ethernet/mellanox/mlx5/core/dpll.c new file mode 100644 index 000000000000..74f0c7867120 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/dpll.c @@ -0,0 +1,432 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#include +#include + +/* This structure represents a reference to DPLL, one is created + * per mdev instance. + */ +struct mlx5_dpll { + struct dpll_device *dpll; + struct dpll_pin *dpll_pin; + struct mlx5_core_dev *mdev; + struct workqueue_struct *wq; + struct delayed_work work; + struct { + bool valid; + enum dpll_lock_status lock_status; + enum dpll_pin_state pin_state; + } last; + struct notifier_block mdev_nb; + struct net_device *tracking_netdev; +}; + +static int mlx5_dpll_clock_id_get(struct mlx5_core_dev *mdev, u64 *clock_id) +{ + u32 out[MLX5_ST_SZ_DW(msecq_reg)] = {}; + u32 in[MLX5_ST_SZ_DW(msecq_reg)] = {}; + int err; + + err = mlx5_core_access_reg(mdev, in, sizeof(in), out, sizeof(out), + MLX5_REG_MSECQ, 0, 0); + if (err) + return err; + *clock_id = MLX5_GET64(msecq_reg, out, local_clock_identity); + return 0; +} + +static int +mlx5_dpll_synce_status_get(struct mlx5_core_dev *mdev, + enum mlx5_msees_admin_status *admin_status, + enum mlx5_msees_oper_status *oper_status, + bool *ho_acq) +{ + u32 out[MLX5_ST_SZ_DW(msees_reg)] = {}; + u32 in[MLX5_ST_SZ_DW(msees_reg)] = {}; + int err; + + err = mlx5_core_access_reg(mdev, in, sizeof(in), out, sizeof(out), + MLX5_REG_MSEES, 0, 0); + if (err) + return err; + if (admin_status) + *admin_status = MLX5_GET(msees_reg, out, admin_status); + *oper_status = MLX5_GET(msees_reg, out, oper_status); + if (ho_acq) + *ho_acq = MLX5_GET(msees_reg, out, ho_acq); + return 0; +} + +static int +mlx5_dpll_synce_status_set(struct mlx5_core_dev *mdev, + enum mlx5_msees_admin_status admin_status) +{ + u32 out[MLX5_ST_SZ_DW(msees_reg)] = {}; + u32 in[MLX5_ST_SZ_DW(msees_reg)] = {}; + + MLX5_SET(msees_reg, in, field_select, + MLX5_MSEES_FIELD_SELECT_ENABLE | + MLX5_MSEES_FIELD_SELECT_ADMIN_STATUS); + MLX5_SET(msees_reg, in, admin_status, admin_status); + return mlx5_core_access_reg(mdev, in, sizeof(in), out, sizeof(out), + MLX5_REG_MSEES, 0, 1); +} + +static enum dpll_lock_status +mlx5_dpll_lock_status_get(enum mlx5_msees_oper_status oper_status, bool ho_acq) +{ + switch (oper_status) { + case MLX5_MSEES_OPER_STATUS_SELF_TRACK: + fallthrough; + case MLX5_MSEES_OPER_STATUS_OTHER_TRACK: + return ho_acq ? DPLL_LOCK_STATUS_LOCKED_HO_ACQ : + DPLL_LOCK_STATUS_LOCKED; + case MLX5_MSEES_OPER_STATUS_HOLDOVER: + fallthrough; + case MLX5_MSEES_OPER_STATUS_FAIL_HOLDOVER: + return DPLL_LOCK_STATUS_HOLDOVER; + default: + return DPLL_LOCK_STATUS_UNLOCKED; + } +} + +static enum dpll_pin_state +mlx5_dpll_pin_state_get(enum mlx5_msees_admin_status admin_status, + enum mlx5_msees_oper_status oper_status) +{ + return (admin_status == MLX5_MSEES_ADMIN_STATUS_TRACK && + (oper_status == MLX5_MSEES_OPER_STATUS_SELF_TRACK || + oper_status == MLX5_MSEES_OPER_STATUS_OTHER_TRACK)) ? + DPLL_PIN_STATE_CONNECTED : DPLL_PIN_STATE_DISCONNECTED; +} + +static int mlx5_dpll_device_lock_status_get(const struct dpll_device *dpll, + void *priv, + enum dpll_lock_status *status, + struct netlink_ext_ack *extack) +{ + enum mlx5_msees_oper_status oper_status; + struct mlx5_dpll *mdpll = priv; + bool ho_acq; + int err; + + err = mlx5_dpll_synce_status_get(mdpll->mdev, NULL, + &oper_status, &ho_acq); + if (err) + return err; + + *status = mlx5_dpll_lock_status_get(oper_status, ho_acq); + return 0; +} + +static int mlx5_dpll_device_mode_get(const struct dpll_device *dpll, + void *priv, + u32 *mode, struct netlink_ext_ack *extack) +{ + *mode = DPLL_MODE_MANUAL; + return 0; +} + +static bool mlx5_dpll_device_mode_supported(const struct dpll_device *dpll, + void *priv, + enum dpll_mode mode, + struct netlink_ext_ack *extack) +{ + return mode == DPLL_MODE_MANUAL; +} + +static const struct dpll_device_ops mlx5_dpll_device_ops = { + .lock_status_get = mlx5_dpll_device_lock_status_get, + .mode_get = mlx5_dpll_device_mode_get, + .mode_supported = mlx5_dpll_device_mode_supported, +}; + +static int mlx5_dpll_pin_direction_get(const struct dpll_pin *pin, + void *pin_priv, + const struct dpll_device *dpll, + void *dpll_priv, + enum dpll_pin_direction *direction, + struct netlink_ext_ack *extack) +{ + *direction = DPLL_PIN_DIRECTION_INPUT; + return 0; +} + +static int mlx5_dpll_state_on_dpll_get(const struct dpll_pin *pin, + void *pin_priv, + const struct dpll_device *dpll, + void *dpll_priv, + enum dpll_pin_state *state, + struct netlink_ext_ack *extack) +{ + enum mlx5_msees_admin_status admin_status; + enum mlx5_msees_oper_status oper_status; + struct mlx5_dpll *mdpll = pin_priv; + int err; + + err = mlx5_dpll_synce_status_get(mdpll->mdev, &admin_status, + &oper_status, NULL); + if (err) + return err; + *state = mlx5_dpll_pin_state_get(admin_status, oper_status); + return 0; +} + +static int mlx5_dpll_state_on_dpll_set(const struct dpll_pin *pin, + void *pin_priv, + const struct dpll_device *dpll, + void *dpll_priv, + enum dpll_pin_state state, + struct netlink_ext_ack *extack) +{ + struct mlx5_dpll *mdpll = pin_priv; + + return mlx5_dpll_synce_status_set(mdpll->mdev, + state == DPLL_PIN_STATE_CONNECTED ? + MLX5_MSEES_ADMIN_STATUS_TRACK : + MLX5_MSEES_ADMIN_STATUS_FREE_RUNNING); +} + +static const struct dpll_pin_ops mlx5_dpll_pins_ops = { + .direction_get = mlx5_dpll_pin_direction_get, + .state_on_dpll_get = mlx5_dpll_state_on_dpll_get, + .state_on_dpll_set = mlx5_dpll_state_on_dpll_set, +}; + +static const struct dpll_pin_properties mlx5_dpll_pin_properties = { + .type = DPLL_PIN_TYPE_SYNCE_ETH_PORT, + .capabilities = DPLL_PIN_CAPABILITIES_STATE_CAN_CHANGE, +}; + +#define MLX5_DPLL_PERIODIC_WORK_INTERVAL 500 /* ms */ + +static void mlx5_dpll_periodic_work_queue(struct mlx5_dpll *mdpll) +{ + queue_delayed_work(mdpll->wq, &mdpll->work, + msecs_to_jiffies(MLX5_DPLL_PERIODIC_WORK_INTERVAL)); +} + +static void mlx5_dpll_periodic_work(struct work_struct *work) +{ + struct mlx5_dpll *mdpll = container_of(work, struct mlx5_dpll, + work.work); + enum mlx5_msees_admin_status admin_status; + enum mlx5_msees_oper_status oper_status; + enum dpll_lock_status lock_status; + enum dpll_pin_state pin_state; + bool ho_acq; + int err; + + err = mlx5_dpll_synce_status_get(mdpll->mdev, &admin_status, + &oper_status, &ho_acq); + if (err) + goto err_out; + lock_status = mlx5_dpll_lock_status_get(oper_status, ho_acq); + pin_state = mlx5_dpll_pin_state_get(admin_status, oper_status); + + if (!mdpll->last.valid) + goto invalid_out; + + if (mdpll->last.lock_status != lock_status) + dpll_device_change_ntf(mdpll->dpll); + if (mdpll->last.pin_state != pin_state) + dpll_pin_change_ntf(mdpll->dpll_pin); + +invalid_out: + mdpll->last.lock_status = lock_status; + mdpll->last.pin_state = pin_state; + mdpll->last.valid = true; +err_out: + mlx5_dpll_periodic_work_queue(mdpll); +} + +static void mlx5_dpll_netdev_dpll_pin_set(struct mlx5_dpll *mdpll, + struct net_device *netdev) +{ + if (mdpll->tracking_netdev) + return; + netdev_dpll_pin_set(netdev, mdpll->dpll_pin); + mdpll->tracking_netdev = netdev; +} + +static void mlx5_dpll_netdev_dpll_pin_clear(struct mlx5_dpll *mdpll) +{ + if (!mdpll->tracking_netdev) + return; + netdev_dpll_pin_clear(mdpll->tracking_netdev); + mdpll->tracking_netdev = NULL; +} + +static int mlx5_dpll_mdev_notifier_event(struct notifier_block *nb, + unsigned long event, void *data) +{ + struct mlx5_dpll *mdpll = container_of(nb, struct mlx5_dpll, mdev_nb); + struct net_device *netdev = data; + + switch (event) { + case MLX5_DRIVER_EVENT_UPLINK_NETDEV: + if (netdev) + mlx5_dpll_netdev_dpll_pin_set(mdpll, netdev); + else + mlx5_dpll_netdev_dpll_pin_clear(mdpll); + break; + default: + return NOTIFY_DONE; + } + + return NOTIFY_OK; +} + +static void mlx5_dpll_mdev_netdev_track(struct mlx5_dpll *mdpll, + struct mlx5_core_dev *mdev) +{ + mdpll->mdev_nb.notifier_call = mlx5_dpll_mdev_notifier_event; + mlx5_blocking_notifier_register(mdev, &mdpll->mdev_nb); + mlx5_core_uplink_netdev_event_replay(mdev); +} + +static void mlx5_dpll_mdev_netdev_untrack(struct mlx5_dpll *mdpll, + struct mlx5_core_dev *mdev) +{ + mlx5_blocking_notifier_unregister(mdev, &mdpll->mdev_nb); + mlx5_dpll_netdev_dpll_pin_clear(mdpll); +} + +static int mlx5_dpll_probe(struct auxiliary_device *adev, + const struct auxiliary_device_id *id) +{ + struct mlx5_adev *edev = container_of(adev, struct mlx5_adev, adev); + struct mlx5_core_dev *mdev = edev->mdev; + struct mlx5_dpll *mdpll; + u64 clock_id; + int err; + + err = mlx5_dpll_synce_status_set(mdev, + MLX5_MSEES_ADMIN_STATUS_FREE_RUNNING); + if (err) + return err; + + err = mlx5_dpll_clock_id_get(mdev, &clock_id); + if (err) + return err; + + mdpll = kzalloc(sizeof(*mdpll), GFP_KERNEL); + if (!mdpll) + return -ENOMEM; + mdpll->mdev = mdev; + auxiliary_set_drvdata(adev, mdpll); + + /* Multiple mdev instances might share one DPLL device. */ + mdpll->dpll = dpll_device_get(clock_id, 0, THIS_MODULE); + if (IS_ERR(mdpll->dpll)) { + err = PTR_ERR(mdpll->dpll); + goto err_free_mdpll; + } + + err = dpll_device_register(mdpll->dpll, DPLL_TYPE_EEC, + &mlx5_dpll_device_ops, mdpll); + if (err) + goto err_put_dpll_device; + + /* Multiple mdev instances might share one DPLL pin. */ + mdpll->dpll_pin = dpll_pin_get(clock_id, mlx5_get_dev_index(mdev), + THIS_MODULE, &mlx5_dpll_pin_properties); + if (IS_ERR(mdpll->dpll_pin)) { + err = PTR_ERR(mdpll->dpll_pin); + goto err_unregister_dpll_device; + } + + err = dpll_pin_register(mdpll->dpll, mdpll->dpll_pin, + &mlx5_dpll_pins_ops, mdpll); + if (err) + goto err_put_dpll_pin; + + mdpll->wq = create_singlethread_workqueue("mlx5_dpll"); + if (!mdpll->wq) { + err = -ENOMEM; + goto err_unregister_dpll_pin; + } + + mlx5_dpll_mdev_netdev_track(mdpll, mdev); + + INIT_DELAYED_WORK(&mdpll->work, &mlx5_dpll_periodic_work); + mlx5_dpll_periodic_work_queue(mdpll); + + return 0; + +err_unregister_dpll_pin: + dpll_pin_unregister(mdpll->dpll, mdpll->dpll_pin, + &mlx5_dpll_pins_ops, mdpll); +err_put_dpll_pin: + dpll_pin_put(mdpll->dpll_pin); +err_unregister_dpll_device: + dpll_device_unregister(mdpll->dpll, &mlx5_dpll_device_ops, mdpll); +err_put_dpll_device: + dpll_device_put(mdpll->dpll); +err_free_mdpll: + kfree(mdpll); + return err; +} + +static void mlx5_dpll_remove(struct auxiliary_device *adev) +{ + struct mlx5_dpll *mdpll = auxiliary_get_drvdata(adev); + struct mlx5_core_dev *mdev = mdpll->mdev; + + cancel_delayed_work(&mdpll->work); + mlx5_dpll_mdev_netdev_untrack(mdpll, mdev); + destroy_workqueue(mdpll->wq); + dpll_pin_unregister(mdpll->dpll, mdpll->dpll_pin, + &mlx5_dpll_pins_ops, mdpll); + dpll_pin_put(mdpll->dpll_pin); + dpll_device_unregister(mdpll->dpll, &mlx5_dpll_device_ops, mdpll); + dpll_device_put(mdpll->dpll); + kfree(mdpll); + + mlx5_dpll_synce_status_set(mdev, + MLX5_MSEES_ADMIN_STATUS_FREE_RUNNING); +} + +static int mlx5_dpll_suspend(struct auxiliary_device *adev, pm_message_t state) +{ + return 0; +} + +static int mlx5_dpll_resume(struct auxiliary_device *adev) +{ + return 0; +} + +static const struct auxiliary_device_id mlx5_dpll_id_table[] = { + { .name = MLX5_ADEV_NAME ".dpll", }, + {}, +}; + +MODULE_DEVICE_TABLE(auxiliary, mlx5_dpll_id_table); + +static struct auxiliary_driver mlx5_dpll_driver = { + .name = "dpll", + .probe = mlx5_dpll_probe, + .remove = mlx5_dpll_remove, + .suspend = mlx5_dpll_suspend, + .resume = mlx5_dpll_resume, + .id_table = mlx5_dpll_id_table, +}; + +static int __init mlx5_dpll_init(void) +{ + return auxiliary_driver_register(&mlx5_dpll_driver); +} + +static void __exit mlx5_dpll_exit(void) +{ + auxiliary_driver_unregister(&mlx5_dpll_driver); +} + +module_init(mlx5_dpll_init); +module_exit(mlx5_dpll_exit); + +MODULE_AUTHOR("Jiri Pirko "); +MODULE_DESCRIPTION("Mellanox 5th generation network adapters (ConnectX series) DPLL driver"); +MODULE_LICENSE("Dual BSD/GPL"); diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 3033bbaeac81..92434814c855 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -155,6 +155,8 @@ enum { MLX5_REG_MCC = 0x9062, MLX5_REG_MCDA = 0x9063, MLX5_REG_MCAM = 0x907f, + MLX5_REG_MSECQ = 0x9155, + MLX5_REG_MSEES = 0x9156, MLX5_REG_MIRC = 0x9162, MLX5_REG_SBCAM = 0xB01F, MLX5_REG_RESOURCE_DUMP = 0xC000, diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index fc3db401f8a2..dd8421d021cf 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -10176,7 +10176,9 @@ struct mlx5_ifc_mcam_access_reg_bits2 { u8 mirc[0x1]; u8 regs_97_to_96[0x2]; - u8 regs_95_to_64[0x20]; + u8 regs_95_to_87[0x09]; + u8 synce_registers[0x2]; + u8 regs_84_to_64[0x15]; u8 regs_63_to_32[0x20]; @@ -12549,4 +12551,59 @@ struct mlx5_ifc_modify_page_track_obj_in_bits { struct mlx5_ifc_page_track_bits obj_context; }; +struct mlx5_ifc_msecq_reg_bits { + u8 reserved_at_0[0x20]; + + u8 reserved_at_20[0x12]; + u8 network_option[0x2]; + u8 local_ssm_code[0x4]; + u8 local_enhanced_ssm_code[0x8]; + + u8 local_clock_identity[0x40]; + + u8 reserved_at_80[0x180]; +}; + +enum { + MLX5_MSEES_FIELD_SELECT_ENABLE = BIT(0), + MLX5_MSEES_FIELD_SELECT_ADMIN_STATUS = BIT(1), + MLX5_MSEES_FIELD_SELECT_ADMIN_FREQ_MEASURE = BIT(2), +}; + +enum mlx5_msees_admin_status { + MLX5_MSEES_ADMIN_STATUS_FREE_RUNNING = 0x0, + MLX5_MSEES_ADMIN_STATUS_TRACK = 0x1, +}; + +enum mlx5_msees_oper_status { + MLX5_MSEES_OPER_STATUS_FREE_RUNNING = 0x0, + MLX5_MSEES_OPER_STATUS_SELF_TRACK = 0x1, + MLX5_MSEES_OPER_STATUS_OTHER_TRACK = 0x2, + MLX5_MSEES_OPER_STATUS_HOLDOVER = 0x3, + MLX5_MSEES_OPER_STATUS_FAIL_HOLDOVER = 0x4, + MLX5_MSEES_OPER_STATUS_FAIL_FREE_RUNNING = 0x5, +}; + +struct mlx5_ifc_msees_reg_bits { + u8 reserved_at_0[0x8]; + u8 local_port[0x8]; + u8 pnat[0x2]; + u8 lp_msb[0x2]; + u8 reserved_at_14[0xc]; + + u8 field_select[0x20]; + + u8 admin_status[0x4]; + u8 oper_status[0x4]; + u8 ho_acq[0x1]; + u8 reserved_at_49[0xc]; + u8 admin_freq_measure[0x1]; + u8 oper_freq_measure[0x1]; + u8 failure_reason[0x9]; + + u8 frequency_diff[0x20]; + + u8 reserved_at_80[0x180]; +}; + #endif /* MLX5_IFC_H */ -- cgit v1.2.3 From ac5f395685bd16ca30c1c834dcbf8b555605ccae Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Wed, 13 Sep 2023 09:12:40 +0200 Subject: net/mlx5: SF, Implement peer devlink set for SF representor devlink port Benefit from the existence of internal mlx5 notifier and extend it by event MLX5_DRIVER_EVENT_SF_PEER_DEVLINK. Use this event from SF auxiliary device probe/remove functions to pass the registered SF devlink instance to the SF representor. Process the new event in SF representor code and call devl_port_fn_devlink_set() to do the assignments. Implement this in work to avoid possible deadlock when probe/remove function of SF may be called with devlink instance lock held during devlink reload. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlx5/core/sf/dev/dev.h | 6 ++++ .../ethernet/mellanox/mlx5/core/sf/dev/driver.c | 26 +++++++++++++++++ .../net/ethernet/mellanox/mlx5/core/sf/devlink.c | 34 ++++++++++++++++++++++ include/linux/mlx5/device.h | 1 + 4 files changed, 67 insertions(+) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.h b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.h index 2a66a427ef15..b99131e95e37 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.h @@ -19,6 +19,12 @@ struct mlx5_sf_dev { u16 fn_id; }; +struct mlx5_sf_peer_devlink_event_ctx { + u16 fn_id; + struct devlink *devlink; + int err; +}; + void mlx5_sf_dev_table_create(struct mlx5_core_dev *dev); void mlx5_sf_dev_table_destroy(struct mlx5_core_dev *dev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c index 8fe82f1191bb..169c2c68ed5c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c @@ -8,6 +8,20 @@ #include "dev.h" #include "devlink.h" +static int mlx5_core_peer_devlink_set(struct mlx5_sf_dev *sf_dev, struct devlink *devlink) +{ + struct mlx5_sf_peer_devlink_event_ctx event_ctx = { + .fn_id = sf_dev->fn_id, + .devlink = devlink, + }; + int ret; + + ret = mlx5_blocking_notifier_call_chain(sf_dev->parent_mdev, + MLX5_DRIVER_EVENT_SF_PEER_DEVLINK, + &event_ctx); + return ret == NOTIFY_OK ? event_ctx.err : 0; +} + static int mlx5_sf_dev_probe(struct auxiliary_device *adev, const struct auxiliary_device_id *id) { struct mlx5_sf_dev *sf_dev = container_of(adev, struct mlx5_sf_dev, adev); @@ -54,9 +68,21 @@ static int mlx5_sf_dev_probe(struct auxiliary_device *adev, const struct auxilia mlx5_core_warn(mdev, "mlx5_init_one err=%d\n", err); goto init_one_err; } + + err = mlx5_core_peer_devlink_set(sf_dev, devlink); + if (err) { + mlx5_core_warn(mdev, "mlx5_core_peer_devlink_set err=%d\n", err); + goto peer_devlink_set_err; + } + devlink_register(devlink); return 0; +peer_devlink_set_err: + if (mlx5_dev_is_lightweight(sf_dev->mdev)) + mlx5_uninit_one_light(sf_dev->mdev); + else + mlx5_uninit_one(sf_dev->mdev); init_one_err: iounmap(mdev->iseg); remap_err: diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c index e34a8f88c518..964a5b1876f3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c @@ -28,6 +28,7 @@ struct mlx5_sf_table { struct mutex sf_state_lock; /* Serializes sf state among user cmds & vhca event handler. */ struct notifier_block esw_nb; struct notifier_block vhca_nb; + struct notifier_block mdev_nb; }; static struct mlx5_sf * @@ -511,6 +512,35 @@ static int mlx5_sf_esw_event(struct notifier_block *nb, unsigned long event, voi return 0; } +static int mlx5_sf_mdev_event(struct notifier_block *nb, unsigned long event, void *data) +{ + struct mlx5_sf_table *table = container_of(nb, struct mlx5_sf_table, mdev_nb); + struct mlx5_sf_peer_devlink_event_ctx *event_ctx = data; + int ret = NOTIFY_DONE; + struct mlx5_sf *sf; + + if (event != MLX5_DRIVER_EVENT_SF_PEER_DEVLINK) + return NOTIFY_DONE; + + table = mlx5_sf_table_try_get(table->dev); + if (!table) + return NOTIFY_DONE; + + mutex_lock(&table->sf_state_lock); + sf = mlx5_sf_lookup_by_function_id(table, event_ctx->fn_id); + if (!sf) + goto out; + + event_ctx->err = devl_port_fn_devlink_set(&sf->dl_port.dl_port, + event_ctx->devlink); + + ret = NOTIFY_OK; +out: + mutex_unlock(&table->sf_state_lock); + mlx5_sf_table_put(table); + return ret; +} + static bool mlx5_sf_table_supported(const struct mlx5_core_dev *dev) { return dev->priv.eswitch && MLX5_ESWITCH_MANAGER(dev) && @@ -544,6 +574,9 @@ int mlx5_sf_table_init(struct mlx5_core_dev *dev) if (err) goto vhca_err; + table->mdev_nb.notifier_call = mlx5_sf_mdev_event; + mlx5_blocking_notifier_register(dev, &table->mdev_nb); + return 0; vhca_err: @@ -562,6 +595,7 @@ void mlx5_sf_table_cleanup(struct mlx5_core_dev *dev) if (!table) return; + mlx5_blocking_notifier_unregister(dev, &table->mdev_nb); mlx5_vhca_event_notifier_unregister(table->dev, &table->vhca_nb); mlx5_esw_event_notifier_unregister(dev->priv.eswitch, &table->esw_nb); WARN_ON(refcount_read(&table->refcount)); diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 4d5be378fa8c..8fbe22de16ef 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -366,6 +366,7 @@ enum mlx5_driver_event { MLX5_DRIVER_EVENT_UPLINK_NETDEV, MLX5_DRIVER_EVENT_MACSEC_SA_ADDED, MLX5_DRIVER_EVENT_MACSEC_SA_DELETED, + MLX5_DRIVER_EVENT_SF_PEER_DEVLINK, }; enum { -- cgit v1.2.3 From 86328b338c3996b814417dd68e3f899a1a649059 Mon Sep 17 00:00:00 2001 From: Hari Bathini Date: Tue, 12 Sep 2023 13:59:49 +0530 Subject: vmcore: remove dependency with is_kdump_kernel() for exporting vmcore Currently, is_kdump_kernel() returns true when elfcorehdr_addr is set. While elfcorehdr_addr is set for kexec based kernel dump mechanism, alternate dump capturing methods like fadump [1] also set it to export the vmcore. Since, is_kdump_kernel() is used to restrict resources in crash dump capture kernel and such restrictions may not be desirable for fadump, allow is_kdump_kernel() to be defined differently for such scenarios. With this, is_kdump_kernel() could be false while vmcore is usable. So, remove unnecessary dependency with is_kdump_kernel(), for exporting vmcore. [1] https://docs.kernel.org/powerpc/firmware-assisted-dump.html Suggested-by: Michael Ellerman Signed-off-by: Hari Bathini Acked-by: Baoquan He Signed-off-by: Michael Ellerman Link: https://msgid.link/20230912082950.856977-1-hbathini@linux.ibm.com --- include/linux/crash_dump.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/crash_dump.h b/include/linux/crash_dump.h index 0f3a656293b0..acc55626afdc 100644 --- a/include/linux/crash_dump.h +++ b/include/linux/crash_dump.h @@ -50,6 +50,7 @@ void vmcore_cleanup(void); #define vmcore_elf64_check_arch(x) (elf_check_arch(x) || vmcore_elf_check_arch_cross(x)) #endif +#ifndef is_kdump_kernel /* * is_kdump_kernel() checks whether this kernel is booting after a panic of * previous kernel or not. This is determined by checking if previous kernel @@ -64,6 +65,7 @@ static inline bool is_kdump_kernel(void) { return elfcorehdr_addr != ELFCORE_ADDR_MAX; } +#endif /* is_vmcore_usable() checks if the kernel is booting after a panic and * the vmcore region is usable. @@ -75,7 +77,8 @@ static inline bool is_kdump_kernel(void) static inline int is_vmcore_usable(void) { - return is_kdump_kernel() && elfcorehdr_addr != ELFCORE_ADDR_ERR ? 1 : 0; + return elfcorehdr_addr != ELFCORE_ADDR_ERR && + elfcorehdr_addr != ELFCORE_ADDR_MAX ? 1 : 0; } /* vmcore_unusable() marks the vmcore as unusable, @@ -84,8 +87,7 @@ static inline int is_vmcore_usable(void) static inline void vmcore_unusable(void) { - if (is_kdump_kernel()) - elfcorehdr_addr = ELFCORE_ADDR_ERR; + elfcorehdr_addr = ELFCORE_ADDR_ERR; } /** -- cgit v1.2.3 From fbaa6a181a4b1886cbf4214abdf9a2df68471510 Mon Sep 17 00:00:00 2001 From: Elliot Berman Date: Fri, 8 Sep 2023 15:49:15 -0700 Subject: sched/core: Remove ifdeffery for saved_state In preparation for freezer to also use saved_state, remove the CONFIG_PREEMPT_RT compilation guard around saved_state. On the arm64 platform I tested which did not have CONFIG_PREEMPT_RT, there was no statistically significant deviation by applying this patch. Test methodology: perf bench sched message -g 40 -l 40 Signed-off-by: Elliot Berman Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar --- include/linux/sched.h | 2 -- kernel/sched/core.c | 8 ++------ 2 files changed, 2 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 77f01ac385f7..dc37ae787e33 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -750,10 +750,8 @@ struct task_struct { #endif unsigned int __state; -#ifdef CONFIG_PREEMPT_RT /* saved state for "spinlock sleepers" */ unsigned int saved_state; -#endif /* * This begins the randomizable portion of task_struct. Only diff --git a/kernel/sched/core.c b/kernel/sched/core.c index f39482d6a6e6..49541e3c1295 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -2232,23 +2232,20 @@ int __task_state_match(struct task_struct *p, unsigned int state) if (READ_ONCE(p->__state) & state) return 1; -#ifdef CONFIG_PREEMPT_RT if (READ_ONCE(p->saved_state) & state) return -1; -#endif + return 0; } static __always_inline int task_state_match(struct task_struct *p, unsigned int state) { -#ifdef CONFIG_PREEMPT_RT /* * Serialize against current_save_and_set_rtlock_wait_state() and * current_restore_rtlock_saved_state(). */ guard(raw_spinlock_irq)(&p->pi_lock); -#endif return __task_state_match(p, state); } @@ -4038,7 +4035,6 @@ bool ttwu_state_match(struct task_struct *p, unsigned int state, int *success) *success = !!(match = __task_state_match(p, state)); -#ifdef CONFIG_PREEMPT_RT /* * Saved state preserves the task state across blocking on * an RT lock. If the state matches, set p::saved_state to @@ -4054,7 +4050,7 @@ bool ttwu_state_match(struct task_struct *p, unsigned int state, int *success) */ if (match < 0) p->saved_state = TASK_RUNNING; -#endif + return match > 0; } -- cgit v1.2.3 From f7b5bd725b737de3f2c4a836e07c82ba156d75df Mon Sep 17 00:00:00 2001 From: Shannon Nelson Date: Thu, 14 Sep 2023 15:31:57 -0700 Subject: pds_core: check health in devcmd wait Similar to what we do in the AdminQ, check for devcmd health while waiting for an answer. Signed-off-by: Shannon Nelson Reviewed-by: Brett Creeley Signed-off-by: David S. Miller --- drivers/net/ethernet/amd/pds_core/dev.c | 11 +++++++++-- include/linux/pds/pds_core_if.h | 1 + 2 files changed, 10 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/amd/pds_core/dev.c b/drivers/net/ethernet/amd/pds_core/dev.c index f77cd9f5a2fd..7c1b965d61a9 100644 --- a/drivers/net/ethernet/amd/pds_core/dev.c +++ b/drivers/net/ethernet/amd/pds_core/dev.c @@ -42,6 +42,8 @@ int pdsc_err_to_errno(enum pds_core_status_code code) return -ERANGE; case PDS_RC_BAD_ADDR: return -EFAULT; + case PDS_RC_BAD_PCI: + return -ENXIO; case PDS_RC_EOPCODE: case PDS_RC_EINTR: case PDS_RC_DEV_CMD: @@ -62,7 +64,7 @@ bool pdsc_is_fw_running(struct pdsc *pdsc) /* Firmware is useful only if the running bit is set and * fw_status != 0xff (bad PCI read) */ - return (pdsc->fw_status != 0xff) && + return (pdsc->fw_status != PDS_RC_BAD_PCI) && (pdsc->fw_status & PDS_CORE_FW_STS_F_RUNNING); } @@ -128,6 +130,7 @@ static int pdsc_devcmd_wait(struct pdsc *pdsc, u8 opcode, int max_seconds) unsigned long max_wait; unsigned long duration; int timeout = 0; + bool running; int done = 0; int err = 0; int status; @@ -136,6 +139,10 @@ static int pdsc_devcmd_wait(struct pdsc *pdsc, u8 opcode, int max_seconds) max_wait = start_time + (max_seconds * HZ); while (!done && !timeout) { + running = pdsc_is_fw_running(pdsc); + if (!running) + break; + done = pdsc_devcmd_done(pdsc); if (done) break; @@ -152,7 +159,7 @@ static int pdsc_devcmd_wait(struct pdsc *pdsc, u8 opcode, int max_seconds) dev_dbg(dev, "DEVCMD %d %s after %ld secs\n", opcode, pdsc_devcmd_str(opcode), duration / HZ); - if (!done || timeout) { + if ((!done || timeout) && running) { dev_err(dev, "DEVCMD %d %s timeout, done %d timeout %d max_seconds=%d\n", opcode, pdsc_devcmd_str(opcode), done, timeout, max_seconds); diff --git a/include/linux/pds/pds_core_if.h b/include/linux/pds/pds_core_if.h index e838a2b90440..17a87c1a55d7 100644 --- a/include/linux/pds/pds_core_if.h +++ b/include/linux/pds/pds_core_if.h @@ -79,6 +79,7 @@ enum pds_core_status_code { PDS_RC_EVFID = 31, /* VF ID does not exist */ PDS_RC_BAD_FW = 32, /* FW file is invalid or corrupted */ PDS_RC_ECLIENT = 33, /* No such client id */ + PDS_RC_BAD_PCI = 255, /* Broken PCI when reading status */ }; /** -- cgit v1.2.3 From b0af4bcb49464c221ad5f95d40f2b1b252ceedcc Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 14 Sep 2023 20:43:18 +0206 Subject: serial: core: Provide port lock wrappers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When a serial port is used for kernel console output, then all modifications to the UART registers which are done from other contexts, e.g. getty, termios, are interference points for the kernel console. So far this has been ignored and the printk output is based on the principle of hope. The rework of the console infrastructure which aims to support threaded and atomic consoles, requires to mark sections which modify the UART registers as unsafe. This allows the atomic write function to make informed decisions and eventually to restore operational state. It also allows to prevent the regular UART code from modifying UART registers while printk output is in progress. All modifications of UART registers are guarded by the UART port lock, which provides an obvious synchronization point with the console infrastructure. Provide wrapper functions for spin_[un]lock*(port->lock) invocations so that the console mechanics can be applied later on at a single place and does not require to copy the same logic all over the drivers. Signed-off-by: Thomas Gleixner Reviewed-by: Ilpo Järvinen Signed-off-by: John Ogness Link: https://lore.kernel.org/r/20230914183831.587273-2-john.ogness@linutronix.de Signed-off-by: Greg Kroah-Hartman --- include/linux/serial_core.h | 79 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 79 insertions(+) (limited to 'include/linux') diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index bb6f073bc159..f1d5c0d1568c 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -588,6 +588,85 @@ struct uart_port { void *private_data; /* generic platform data pointer */ }; +/** + * uart_port_lock - Lock the UART port + * @up: Pointer to UART port structure + */ +static inline void uart_port_lock(struct uart_port *up) +{ + spin_lock(&up->lock); +} + +/** + * uart_port_lock_irq - Lock the UART port and disable interrupts + * @up: Pointer to UART port structure + */ +static inline void uart_port_lock_irq(struct uart_port *up) +{ + spin_lock_irq(&up->lock); +} + +/** + * uart_port_lock_irqsave - Lock the UART port, save and disable interrupts + * @up: Pointer to UART port structure + * @flags: Pointer to interrupt flags storage + */ +static inline void uart_port_lock_irqsave(struct uart_port *up, unsigned long *flags) +{ + spin_lock_irqsave(&up->lock, *flags); +} + +/** + * uart_port_trylock - Try to lock the UART port + * @up: Pointer to UART port structure + * + * Returns: True if lock was acquired, false otherwise + */ +static inline bool uart_port_trylock(struct uart_port *up) +{ + return spin_trylock(&up->lock); +} + +/** + * uart_port_trylock_irqsave - Try to lock the UART port, save and disable interrupts + * @up: Pointer to UART port structure + * @flags: Pointer to interrupt flags storage + * + * Returns: True if lock was acquired, false otherwise + */ +static inline bool uart_port_trylock_irqsave(struct uart_port *up, unsigned long *flags) +{ + return spin_trylock_irqsave(&up->lock, *flags); +} + +/** + * uart_port_unlock - Unlock the UART port + * @up: Pointer to UART port structure + */ +static inline void uart_port_unlock(struct uart_port *up) +{ + spin_unlock(&up->lock); +} + +/** + * uart_port_unlock_irq - Unlock the UART port and re-enable interrupts + * @up: Pointer to UART port structure + */ +static inline void uart_port_unlock_irq(struct uart_port *up) +{ + spin_unlock_irq(&up->lock); +} + +/** + * uart_port_lock_irqrestore - Unlock the UART port, restore interrupts + * @up: Pointer to UART port structure + * @flags: The saved interrupt flags for restore + */ +static inline void uart_port_unlock_irqrestore(struct uart_port *up, unsigned long flags) +{ + spin_unlock_irqrestore(&up->lock, flags); +} + static inline int serial_port_in(struct uart_port *up, int offset) { return up->serial_in(up, offset); -- cgit v1.2.3 From c5cbdb76e8e33ce90fec2946e8eee7d71d68e57a Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 14 Sep 2023 20:43:19 +0206 Subject: serial: core: Use lock wrappers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When a serial port is used for kernel console output, then all modifications to the UART registers which are done from other contexts, e.g. getty, termios, are interference points for the kernel console. So far this has been ignored and the printk output is based on the principle of hope. The rework of the console infrastructure which aims to support threaded and atomic consoles, requires to mark sections which modify the UART registers as unsafe. This allows the atomic write function to make informed decisions and eventually to restore operational state. It also allows to prevent the regular UART code from modifying UART registers while printk output is in progress. All modifications of UART registers are guarded by the UART port lock, which provides an obvious synchronization point with the console infrastructure. To avoid adding this functionality to all UART drivers, wrap the spin_[un]lock*() invocations for uart_port::lock into helper functions which just contain the spin_[un]lock*() invocations for now. In a subsequent step these helpers will gain the console synchronization mechanisms. Converted with coccinelle. No functional change. Signed-off-by: Thomas Gleixner Reviewed-by: Ilpo Järvinen Signed-off-by: John Ogness Link: https://lore.kernel.org/r/20230914183831.587273-3-john.ogness@linutronix.de Signed-off-by: Greg Kroah-Hartman --- include/linux/serial_core.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index f1d5c0d1568c..3091c62ec37b 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -1035,14 +1035,14 @@ static inline void uart_unlock_and_check_sysrq(struct uart_port *port) u8 sysrq_ch; if (!port->has_sysrq) { - spin_unlock(&port->lock); + uart_port_unlock(port); return; } sysrq_ch = port->sysrq_ch; port->sysrq_ch = 0; - spin_unlock(&port->lock); + uart_port_unlock(port); if (sysrq_ch) handle_sysrq(sysrq_ch); @@ -1054,14 +1054,14 @@ static inline void uart_unlock_and_check_sysrq_irqrestore(struct uart_port *port u8 sysrq_ch; if (!port->has_sysrq) { - spin_unlock_irqrestore(&port->lock, flags); + uart_port_unlock_irqrestore(port, flags); return; } sysrq_ch = port->sysrq_ch; port->sysrq_ch = 0; - spin_unlock_irqrestore(&port->lock, flags); + uart_port_unlock_irqrestore(port, flags); if (sysrq_ch) handle_sysrq(sysrq_ch); @@ -1077,12 +1077,12 @@ static inline int uart_prepare_sysrq_char(struct uart_port *port, u8 ch) } static inline void uart_unlock_and_check_sysrq(struct uart_port *port) { - spin_unlock(&port->lock); + uart_port_unlock(port); } static inline void uart_unlock_and_check_sysrq_irqrestore(struct uart_port *port, unsigned long flags) { - spin_unlock_irqrestore(&port->lock, flags); + uart_port_unlock_irqrestore(port, flags); } #endif /* CONFIG_MAGIC_SYSRQ_SERIAL */ -- cgit v1.2.3 From 1cb6422ecac8804ebe0b71f4b3440674955fec73 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 15 Sep 2023 13:15:10 -0700 Subject: ceph: Annotate struct ceph_monmap with __counted_by Prepare for the coming implementation by GCC and Clang of the __counted_by attribute. Flexible array members annotated with __counted_by can have their accesses bounds-checked at run-time checking via CONFIG_UBSAN_BOUNDS (for array indexing) and CONFIG_FORTIFY_SOURCE (for strcpy/memcpy-family functions). As found with Coccinelle[1], add __counted_by for struct ceph_monmap. Additionally, since the element count member must be set before accessing the annotated flexible array member, move its initialization earlier. [1] https://github.com/kees/kernel-tools/blob/trunk/coccinelle/examples/counted_by.cocci Cc: Ilya Dryomov Cc: Xiubo Li Cc: Jeff Layton Cc: "David S. Miller" Cc: Eric Dumazet Cc: Jakub Kicinski Cc: Paolo Abeni Cc: ceph-devel@vger.kernel.org Cc: netdev@vger.kernel.org Signed-off-by: Kees Cook Reviewed-by: Gustavo A. R. Silva Reviewed-by: Xiubo Li Signed-off-by: David S. Miller --- include/linux/ceph/mon_client.h | 2 +- net/ceph/mon_client.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ceph/mon_client.h b/include/linux/ceph/mon_client.h index b658961156a0..7a9a40163c0f 100644 --- a/include/linux/ceph/mon_client.h +++ b/include/linux/ceph/mon_client.h @@ -19,7 +19,7 @@ struct ceph_monmap { struct ceph_fsid fsid; u32 epoch; u32 num_mon; - struct ceph_entity_inst mon_inst[]; + struct ceph_entity_inst mon_inst[] __counted_by(num_mon); }; struct ceph_mon_client; diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c index faabad6603db..f263f7e91a21 100644 --- a/net/ceph/mon_client.c +++ b/net/ceph/mon_client.c @@ -1136,6 +1136,7 @@ static int build_initial_monmap(struct ceph_mon_client *monc) GFP_KERNEL); if (!monc->monmap) return -ENOMEM; + monc->monmap->num_mon = num_mon; for (i = 0; i < num_mon; i++) { struct ceph_entity_inst *inst = &monc->monmap->mon_inst[i]; @@ -1147,7 +1148,6 @@ static int build_initial_monmap(struct ceph_mon_client *monc) inst->name.type = CEPH_ENTITY_TYPE_MON; inst->name.num = cpu_to_le64(i); } - monc->monmap->num_mon = num_mon; return 0; } -- cgit v1.2.3 From d57125b55a292a8e74a1fb17182576a3b2b2e795 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Mon, 18 Sep 2023 10:44:08 +0200 Subject: Revert "ceph: make members in struct ceph_mds_request_args_ext a union" This reverts commit 3af5ae22030cb59fab4fba35f5a2b62f47e14df9. ceph_mds_request_args_ext was already (and remains to be) a union. An additional anonymous union inside is bogus: union ceph_mds_request_args_ext { union { union ceph_mds_request_args old; struct { ... } __attribute__ ((packed)) setattr_ext; }; } Signed-off-by: Ilya Dryomov Reviewed-by: Xiubo Li --- include/linux/ceph/ceph_fs.h | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h index 5f2301ee88bc..f3b3593254b9 100644 --- a/include/linux/ceph/ceph_fs.h +++ b/include/linux/ceph/ceph_fs.h @@ -467,19 +467,17 @@ union ceph_mds_request_args { } __attribute__ ((packed)); union ceph_mds_request_args_ext { - union { - union ceph_mds_request_args old; - struct { - __le32 mode; - __le32 uid; - __le32 gid; - struct ceph_timespec mtime; - struct ceph_timespec atime; - __le64 size, old_size; /* old_size needed by truncate */ - __le32 mask; /* CEPH_SETATTR_* */ - struct ceph_timespec btime; - } __attribute__ ((packed)) setattr_ext; - }; + union ceph_mds_request_args old; + struct { + __le32 mode; + __le32 uid; + __le32 gid; + struct ceph_timespec mtime; + struct ceph_timespec atime; + __le64 size, old_size; /* old_size needed by truncate */ + __le32 mask; /* CEPH_SETATTR_* */ + struct ceph_timespec btime; + } __attribute__ ((packed)) setattr_ext; }; #define CEPH_MDS_FLAG_REPLAY 1 /* this is a replayed op */ -- cgit v1.2.3 From 8452a05b2c633b708dbe3e742f71b24bf21fe42d Mon Sep 17 00:00:00 2001 From: Rohan G Thomas Date: Sat, 16 Sep 2023 14:33:12 +0800 Subject: net: stmmac: Tx coe sw fallback Add sw fallback of tx checksum calculation for those tx queues that don't support tx checksum offloading. DW xGMAC IP can be synthesized such that it can support tx checksum offloading only for a few initial tx queues. Also as Serge pointed out, for the DW QoS IP, tx coe can be individually configured for each tx queue. So when tx coe is enabled, for any tx queue that doesn't support tx coe with 'coe-unsupported' flag set will have a sw fallback happen in the driver for tx checksum calculation when any packets to be transmitted on these tx queues. Signed-off-by: Rohan G Thomas Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 10 ++++++++++ drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c | 3 +++ include/linux/stmmac.h | 1 + 3 files changed, 14 insertions(+) (limited to 'include/linux') diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 2206789802bf..9201ed778ebc 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -4401,6 +4401,16 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev) WARN_ON(tx_q->tx_skbuff[first_entry]); csum_insertion = (skb->ip_summed == CHECKSUM_PARTIAL); + /* DWMAC IPs can be synthesized to support tx coe only for a few tx + * queues. In that case, checksum offloading for those queues that don't + * support tx coe needs to fallback to software checksum calculation. + */ + if (csum_insertion && + priv->plat->tx_queues_cfg[queue].coe_unsupported) { + if (unlikely(skb_checksum_help(skb))) + goto dma_map_err; + csum_insertion = !csum_insertion; + } if (likely(priv->extend_desc)) desc = (struct dma_desc *)(tx_q->dma_etx + entry); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c index 0f28795e581c..a09014c9e7d0 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c @@ -276,6 +276,9 @@ static int stmmac_mtl_setup(struct platform_device *pdev, plat->tx_queues_cfg[queue].use_prio = true; } + plat->tx_queues_cfg[queue].coe_unsupported = + of_property_read_bool(q_node, "snps,coe-unsupported"); + queue++; } if (queue != plat->tx_queues_to_use) { diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index ce89cc3e4913..c0079a7574ae 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -139,6 +139,7 @@ struct stmmac_rxq_cfg { struct stmmac_txq_cfg { u32 weight; + bool coe_unsupported; u8 mode_to_use; /* Credit Base Shaper parameters */ u32 send_slope; -- cgit v1.2.3 From 6b93bb41f6eaa1cc5c5f30ec3b687b380f116cd0 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 16 Sep 2023 21:26:00 +0206 Subject: printk: Add non-BKL (nbcon) console basic infrastructure The current console/printk subsystem is protected by a Big Kernel Lock, (aka console_lock) which has ill defined semantics and is more or less stateless. This puts severe limitations on the console subsystem and makes forced takeover and output in emergency and panic situations a fragile endeavour that is based on try and pray. The goal of non-BKL (nbcon) consoles is to break out of the console lock jail and to provide a new infrastructure that avoids the pitfalls and also allows console drivers to be gradually converted over. The proposed infrastructure aims for the following properties: - Per console locking instead of global locking - Per console state that allows to make informed decisions - Stateful handover and takeover As a first step, state is added to struct console. The per console state is an atomic_t using a 32bit bit field. Reserve state bits, which will be populated later in the series. Wire it up into the console register/unregister functionality. It was decided to use a bitfield because using a plain u32 with mask/shift operations resulted in uncomprehensible code. Co-developed-by: John Ogness Signed-off-by: John Ogness Signed-off-by: Thomas Gleixner (Intel) Reviewed-by: Petr Mladek Signed-off-by: Petr Mladek Link: https://lore.kernel.org/r/20230916192007.608398-2-john.ogness@linutronix.de --- include/linux/console.h | 31 +++++++++++++++++++++ kernel/printk/Makefile | 2 +- kernel/printk/internal.h | 8 ++++++ kernel/printk/nbcon.c | 70 ++++++++++++++++++++++++++++++++++++++++++++++++ kernel/printk/printk.c | 13 ++++++--- 5 files changed, 120 insertions(+), 4 deletions(-) create mode 100644 kernel/printk/nbcon.c (limited to 'include/linux') diff --git a/include/linux/console.h b/include/linux/console.h index 7de11c763eb3..a2d37a7a98a8 100644 --- a/include/linux/console.h +++ b/include/linux/console.h @@ -156,6 +156,8 @@ static inline int con_debug_leave(void) * /dev/kmesg which requires a larger output buffer. * @CON_SUSPENDED: Indicates if a console is suspended. If true, the * printing callbacks must not be called. + * @CON_NBCON: Console can operate outside of the legacy style console_lock + * constraints. */ enum cons_flags { CON_PRINTBUFFER = BIT(0), @@ -166,8 +168,32 @@ enum cons_flags { CON_BRL = BIT(5), CON_EXTENDED = BIT(6), CON_SUSPENDED = BIT(7), + CON_NBCON = BIT(8), }; +/** + * struct nbcon_state - console state for nbcon consoles + * @atom: Compound of the state fields for atomic operations + * + * To be used for reading and preparing of the value stored in the nbcon + * state variable @console::nbcon_state. + */ +struct nbcon_state { + union { + unsigned int atom; + struct { + }; + }; +}; + +/* + * The nbcon_state struct is used to easily create and interpret values that + * are stored in the @console::nbcon_state variable. Ensure this struct stays + * within the size boundaries of the atomic variable's underlying type in + * order to avoid any accidental truncation. + */ +static_assert(sizeof(struct nbcon_state) <= sizeof(int)); + /** * struct console - The console descriptor structure * @name: The name of the console driver @@ -187,6 +213,8 @@ enum cons_flags { * @dropped: Number of unreported dropped ringbuffer records * @data: Driver private data * @node: hlist node for the console list + * + * @nbcon_state: State for nbcon consoles */ struct console { char name[16]; @@ -206,6 +234,9 @@ struct console { unsigned long dropped; void *data; struct hlist_node node; + + /* nbcon console specific members */ + atomic_t __private nbcon_state; }; #ifdef CONFIG_LOCKDEP diff --git a/kernel/printk/Makefile b/kernel/printk/Makefile index f5b388e810b9..39a2b61c7232 100644 --- a/kernel/printk/Makefile +++ b/kernel/printk/Makefile @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-only obj-y = printk.o -obj-$(CONFIG_PRINTK) += printk_safe.o +obj-$(CONFIG_PRINTK) += printk_safe.o nbcon.o obj-$(CONFIG_A11Y_BRAILLE_CONSOLE) += braille.o obj-$(CONFIG_PRINTK_INDEX) += index.o diff --git a/kernel/printk/internal.h b/kernel/printk/internal.h index 7d4979d5c3ce..2ca0ab78802c 100644 --- a/kernel/printk/internal.h +++ b/kernel/printk/internal.h @@ -3,6 +3,7 @@ * internal.h - printk internal definitions */ #include +#include #if defined(CONFIG_PRINTK) && defined(CONFIG_SYSCTL) void __init printk_sysctl_init(void); @@ -61,6 +62,10 @@ void defer_console_output(void); u16 printk_parse_prefix(const char *text, int *level, enum printk_info_flags *flags); + +void nbcon_init(struct console *con); +void nbcon_cleanup(struct console *con); + #else #define PRINTK_PREFIX_MAX 0 @@ -76,6 +81,9 @@ u16 printk_parse_prefix(const char *text, int *level, #define printk_safe_exit_irqrestore(flags) local_irq_restore(flags) static inline bool printk_percpu_data_ready(void) { return false; } +static inline void nbcon_init(struct console *con) { } +static inline void nbcon_cleanup(struct console *con) { } + #endif /* CONFIG_PRINTK */ /** diff --git a/kernel/printk/nbcon.c b/kernel/printk/nbcon.c new file mode 100644 index 000000000000..63d24ca62ac5 --- /dev/null +++ b/kernel/printk/nbcon.c @@ -0,0 +1,70 @@ +// SPDX-License-Identifier: GPL-2.0-only +// Copyright (C) 2022 Linutronix GmbH, John Ogness +// Copyright (C) 2022 Intel, Thomas Gleixner + +#include +#include +#include "internal.h" +/* + * Printk console printing implementation for consoles which does not depend + * on the legacy style console_lock mechanism. + */ + +/** + * nbcon_state_set - Helper function to set the console state + * @con: Console to update + * @new: The new state to write + * + * Only to be used when the console is not yet or no longer visible in the + * system. Otherwise use nbcon_state_try_cmpxchg(). + */ +static inline void nbcon_state_set(struct console *con, struct nbcon_state *new) +{ + atomic_set(&ACCESS_PRIVATE(con, nbcon_state), new->atom); +} + +/** + * nbcon_state_read - Helper function to read the console state + * @con: Console to read + * @state: The state to store the result + */ +static inline void nbcon_state_read(struct console *con, struct nbcon_state *state) +{ + state->atom = atomic_read(&ACCESS_PRIVATE(con, nbcon_state)); +} + +/** + * nbcon_state_try_cmpxchg() - Helper function for atomic_try_cmpxchg() on console state + * @con: Console to update + * @cur: Old/expected state + * @new: New state + * + * Return: True on success. False on fail and @cur is updated. + */ +static inline bool nbcon_state_try_cmpxchg(struct console *con, struct nbcon_state *cur, + struct nbcon_state *new) +{ + return atomic_try_cmpxchg(&ACCESS_PRIVATE(con, nbcon_state), &cur->atom, new->atom); +} + +/** + * nbcon_init - Initialize the nbcon console specific data + * @con: Console to initialize + */ +void nbcon_init(struct console *con) +{ + struct nbcon_state state = { }; + + nbcon_state_set(con, &state); +} + +/** + * nbcon_cleanup - Cleanup the nbcon console specific data + * @con: Console to cleanup + */ +void nbcon_cleanup(struct console *con) +{ + struct nbcon_state state = { }; + + nbcon_state_set(con, &state); +} diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 7e0b4dd02398..5f372eaceb29 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -3326,9 +3326,10 @@ static void try_enable_default_console(struct console *newcon) newcon->flags |= CON_CONSDEV; } -#define con_printk(lvl, con, fmt, ...) \ - printk(lvl pr_fmt("%sconsole [%s%d] " fmt), \ - (con->flags & CON_BOOT) ? "boot" : "", \ +#define con_printk(lvl, con, fmt, ...) \ + printk(lvl pr_fmt("%s%sconsole [%s%d] " fmt), \ + (con->flags & CON_NBCON) ? "" : "legacy ", \ + (con->flags & CON_BOOT) ? "boot" : "", \ con->name, con->index, ##__VA_ARGS__) static void console_init_seq(struct console *newcon, bool bootcon_registered) @@ -3488,6 +3489,9 @@ void register_console(struct console *newcon) newcon->dropped = 0; console_init_seq(newcon, bootcon_registered); + if (newcon->flags & CON_NBCON) + nbcon_init(newcon); + /* * Put this console in the list - keep the * preferred driver at the head of the list. @@ -3579,6 +3583,9 @@ static int unregister_console_locked(struct console *console) */ synchronize_srcu(&console_srcu); + if (console->flags & CON_NBCON) + nbcon_cleanup(console); + console_sysfs_notify(); if (console->exit) -- cgit v1.2.3 From 3a5bb25162b880da749f7cdf281c78dbade4164b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 16 Sep 2023 21:26:01 +0206 Subject: printk: nbcon: Add acquire/release logic Add per console acquire/release functionality. The state of the console is maintained in the "nbcon_state" atomic variable. The console is locked when: - The 'prio' field contains the priority of the context that owns the console. Only higher priority contexts are allowed to take over the lock. A value of 0 (NBCON_PRIO_NONE) means the console is not locked. - The 'cpu' field denotes on which CPU the console is locked. It is used to prevent busy waiting on the same CPU. Also it informs the lock owner that it has lost the lock in a more complex scenario when the lock was taken over by a higher priority context, released, and taken on another CPU with the same priority as the interrupted owner. The acquire mechanism uses a few more fields: - The 'req_prio' field is used by the handover approach to make the current owner aware that there is a context with a higher priority waiting for the friendly handover. - The 'unsafe' field allows to take over the console in a safe way in the middle of emitting a message. The field is set only when accessing some shared resources or when the console device is manipulated. It can be cleared, for example, after emitting one character when the console device is in a consistent state. - The 'unsafe_takeover' field is set when a hostile takeover took the console in an unsafe state. The console will stay in the unsafe state until re-initialized. The acquire mechanism uses three approaches: 1) Direct acquire when the console is not owned or is owned by a lower priority context and is in a safe state. 2) Friendly handover mechanism uses a request/grant handshake. It is used when the current owner has lower priority and the console is in an unsafe state. The requesting context: a) Sets its priority into the 'req_prio' field. b) Waits (with a timeout) for the owning context to unlock the console. c) Takes the lock and clears the 'req_prio' field. The owning context: a) Observes the 'req_prio' field set on exit from the unsafe console state. b) Gives up console ownership by clearing the 'prio' field. 3) Unsafe hostile takeover allows to take over the lock even when the console is an unsafe state. It is used only in panic() by the final attempt to flush consoles in a try and hope mode. Note that separate record buffers are used in panic(). As a result, the messages can be read and formatted without any risk even after using the hostile takeover in unsafe state. The release function simply clears the 'prio' field. All operations on @console::nbcon_state are atomic cmpxchg based to handle concurrency. The acquire/release functions implement only minimal policies: - Preference for higher priority contexts. - Protection of the panic CPU. All other policy decisions must be made at the call sites: - What is marked as an unsafe section. - Whether to spin-wait if there is already an owner and the console is in an unsafe state. - Whether to attempt an unsafe hostile takeover. The design allows to implement the well known: acquire() output_one_printk_record() release() The output of one printk record might be interrupted with a higher priority context. The new owner is supposed to reprint the entire interrupted record from scratch. Co-developed-by: John Ogness Signed-off-by: John Ogness Signed-off-by: Thomas Gleixner (Intel) Signed-off-by: Petr Mladek Link: https://lore.kernel.org/r/20230916192007.608398-3-john.ogness@linutronix.de --- include/linux/console.h | 56 ++++++ kernel/printk/nbcon.c | 497 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 553 insertions(+) (limited to 'include/linux') diff --git a/include/linux/console.h b/include/linux/console.h index a2d37a7a98a8..98210fd01f18 100644 --- a/include/linux/console.h +++ b/include/linux/console.h @@ -175,13 +175,29 @@ enum cons_flags { * struct nbcon_state - console state for nbcon consoles * @atom: Compound of the state fields for atomic operations * + * @req_prio: The priority of a handover request + * @prio: The priority of the current owner + * @unsafe: Console is busy in a non takeover region + * @unsafe_takeover: A hostile takeover in an unsafe state happened in the + * past. The console cannot be safe until re-initialized. + * @cpu: The CPU on which the owner runs + * * To be used for reading and preparing of the value stored in the nbcon * state variable @console::nbcon_state. + * + * The @prio and @req_prio fields are particularly important to allow + * spin-waiting to timeout and give up without the risk of a waiter being + * assigned the lock after giving up. */ struct nbcon_state { union { unsigned int atom; struct { + unsigned int prio : 2; + unsigned int req_prio : 2; + unsigned int unsafe : 1; + unsigned int unsafe_takeover : 1; + unsigned int cpu : 24; }; }; }; @@ -194,6 +210,46 @@ struct nbcon_state { */ static_assert(sizeof(struct nbcon_state) <= sizeof(int)); +/** + * nbcon_prio - console owner priority for nbcon consoles + * @NBCON_PRIO_NONE: Unused + * @NBCON_PRIO_NORMAL: Normal (non-emergency) usage + * @NBCON_PRIO_EMERGENCY: Emergency output (WARN/OOPS...) + * @NBCON_PRIO_PANIC: Panic output + * @NBCON_PRIO_MAX: The number of priority levels + * + * A higher priority context can takeover the console when it is + * in the safe state. The final attempt to flush consoles in panic() + * can be allowed to do so even in an unsafe state (Hope and pray). + */ +enum nbcon_prio { + NBCON_PRIO_NONE = 0, + NBCON_PRIO_NORMAL, + NBCON_PRIO_EMERGENCY, + NBCON_PRIO_PANIC, + NBCON_PRIO_MAX, +}; + +struct console; + +/** + * struct nbcon_context - Context for console acquire/release + * @console: The associated console + * @spinwait_max_us: Limit for spin-wait acquire + * @prio: Priority of the context + * @allow_unsafe_takeover: Allow performing takeover even if unsafe. Can + * be used only with NBCON_PRIO_PANIC @prio. It + * might cause a system freeze when the console + * is used later. + */ +struct nbcon_context { + /* members set by caller */ + struct console *console; + unsigned int spinwait_max_us; + enum nbcon_prio prio; + unsigned int allow_unsafe_takeover : 1; +}; + /** * struct console - The console descriptor structure * @name: The name of the console driver diff --git a/kernel/printk/nbcon.c b/kernel/printk/nbcon.c index 63d24ca62ac5..a2a354f859f9 100644 --- a/kernel/printk/nbcon.c +++ b/kernel/printk/nbcon.c @@ -4,10 +4,98 @@ #include #include +#include #include "internal.h" /* * Printk console printing implementation for consoles which does not depend * on the legacy style console_lock mechanism. + * + * The state of the console is maintained in the "nbcon_state" atomic + * variable. + * + * The console is locked when: + * + * - The 'prio' field contains the priority of the context that owns the + * console. Only higher priority contexts are allowed to take over the + * lock. A value of 0 (NBCON_PRIO_NONE) means the console is not locked. + * + * - The 'cpu' field denotes on which CPU the console is locked. It is used + * to prevent busy waiting on the same CPU. Also it informs the lock owner + * that it has lost the lock in a more complex scenario when the lock was + * taken over by a higher priority context, released, and taken on another + * CPU with the same priority as the interrupted owner. + * + * The acquire mechanism uses a few more fields: + * + * - The 'req_prio' field is used by the handover approach to make the + * current owner aware that there is a context with a higher priority + * waiting for the friendly handover. + * + * - The 'unsafe' field allows to take over the console in a safe way in the + * middle of emitting a message. The field is set only when accessing some + * shared resources or when the console device is manipulated. It can be + * cleared, for example, after emitting one character when the console + * device is in a consistent state. + * + * - The 'unsafe_takeover' field is set when a hostile takeover took the + * console in an unsafe state. The console will stay in the unsafe state + * until re-initialized. + * + * The acquire mechanism uses three approaches: + * + * 1) Direct acquire when the console is not owned or is owned by a lower + * priority context and is in a safe state. + * + * 2) Friendly handover mechanism uses a request/grant handshake. It is used + * when the current owner has lower priority and the console is in an + * unsafe state. + * + * The requesting context: + * + * a) Sets its priority into the 'req_prio' field. + * + * b) Waits (with a timeout) for the owning context to unlock the + * console. + * + * c) Takes the lock and clears the 'req_prio' field. + * + * The owning context: + * + * a) Observes the 'req_prio' field set on exit from the unsafe + * console state. + * + * b) Gives up console ownership by clearing the 'prio' field. + * + * 3) Unsafe hostile takeover allows to take over the lock even when the + * console is an unsafe state. It is used only in panic() by the final + * attempt to flush consoles in a try and hope mode. + * + * The release function simply clears the 'prio' field. + * + * All operations on @console::nbcon_state are atomic cmpxchg based to + * handle concurrency. + * + * The acquire/release functions implement only minimal policies: + * + * - Preference for higher priority contexts. + * - Protection of the panic CPU. + * + * All other policy decisions must be made at the call sites: + * + * - What is marked as an unsafe section. + * - Whether to spin-wait if there is already an owner and the console is + * in an unsafe state. + * - Whether to attempt an unsafe hostile takeover. + * + * The design allows to implement the well known: + * + * acquire() + * output_one_printk_record() + * release() + * + * The output of one printk record might be interrupted with a higher priority + * context. The new owner is supposed to reprint the entire interrupted record + * from scratch. */ /** @@ -47,6 +135,415 @@ static inline bool nbcon_state_try_cmpxchg(struct console *con, struct nbcon_sta return atomic_try_cmpxchg(&ACCESS_PRIVATE(con, nbcon_state), &cur->atom, new->atom); } +/** + * nbcon_context_try_acquire_direct - Try to acquire directly + * @ctxt: The context of the caller + * @cur: The current console state + * + * Acquire the console when it is released. Also acquire the console when + * the current owner has a lower priority and the console is in a safe state. + * + * Return: 0 on success. Otherwise, an error code on failure. Also @cur + * is updated to the latest state when failed to modify it. + * + * Errors: + * + * -EPERM: A panic is in progress and this is not the panic CPU. + * Or the current owner or waiter has the same or higher + * priority. No acquire method can be successful in + * this case. + * + * -EBUSY: The current owner has a lower priority but the console + * in an unsafe state. The caller should try using + * the handover acquire method. + */ +static int nbcon_context_try_acquire_direct(struct nbcon_context *ctxt, + struct nbcon_state *cur) +{ + unsigned int cpu = smp_processor_id(); + struct console *con = ctxt->console; + struct nbcon_state new; + + do { + if (other_cpu_in_panic()) + return -EPERM; + + if (ctxt->prio <= cur->prio || ctxt->prio <= cur->req_prio) + return -EPERM; + + if (cur->unsafe) + return -EBUSY; + + /* + * The console should never be safe for a direct acquire + * if an unsafe hostile takeover has ever happened. + */ + WARN_ON_ONCE(cur->unsafe_takeover); + + new.atom = cur->atom; + new.prio = ctxt->prio; + new.req_prio = NBCON_PRIO_NONE; + new.unsafe = cur->unsafe_takeover; + new.cpu = cpu; + + } while (!nbcon_state_try_cmpxchg(con, cur, &new)); + + return 0; +} + +static bool nbcon_waiter_matches(struct nbcon_state *cur, int expected_prio) +{ + /* + * The request context is well defined by the @req_prio because: + * + * - Only a context with a higher priority can take over the request. + * - There are only three priorities. + * - Only one CPU is allowed to request PANIC priority. + * - Lower priorities are ignored during panic() until reboot. + * + * As a result, the following scenario is *not* possible: + * + * 1. Another context with a higher priority directly takes ownership. + * 2. The higher priority context releases the ownership. + * 3. A lower priority context takes the ownership. + * 4. Another context with the same priority as this context + * creates a request and starts waiting. + */ + + return (cur->req_prio == expected_prio); +} + +/** + * nbcon_context_try_acquire_requested - Try to acquire after having + * requested a handover + * @ctxt: The context of the caller + * @cur: The current console state + * + * This is a helper function for nbcon_context_try_acquire_handover(). + * It is called when the console is in an unsafe state. The current + * owner will release the console on exit from the unsafe region. + * + * Return: 0 on success and @cur is updated to the new console state. + * Otherwise an error code on failure. + * + * Errors: + * + * -EPERM: A panic is in progress and this is not the panic CPU + * or this context is no longer the waiter. + * + * -EBUSY: The console is still locked. The caller should + * continue waiting. + * + * Note: The caller must still remove the request when an error has occurred + * except when this context is no longer the waiter. + */ +static int nbcon_context_try_acquire_requested(struct nbcon_context *ctxt, + struct nbcon_state *cur) +{ + unsigned int cpu = smp_processor_id(); + struct console *con = ctxt->console; + struct nbcon_state new; + + /* Note that the caller must still remove the request! */ + if (other_cpu_in_panic()) + return -EPERM; + + /* + * Note that the waiter will also change if there was an unsafe + * hostile takeover. + */ + if (!nbcon_waiter_matches(cur, ctxt->prio)) + return -EPERM; + + /* If still locked, caller should continue waiting. */ + if (cur->prio != NBCON_PRIO_NONE) + return -EBUSY; + + /* + * The previous owner should have never released ownership + * in an unsafe region. + */ + WARN_ON_ONCE(cur->unsafe); + + new.atom = cur->atom; + new.prio = ctxt->prio; + new.req_prio = NBCON_PRIO_NONE; + new.unsafe = cur->unsafe_takeover; + new.cpu = cpu; + + if (!nbcon_state_try_cmpxchg(con, cur, &new)) { + /* + * The acquire could fail only when it has been taken + * over by a higher priority context. + */ + WARN_ON_ONCE(nbcon_waiter_matches(cur, ctxt->prio)); + return -EPERM; + } + + /* Handover success. This context now owns the console. */ + return 0; +} + +/** + * nbcon_context_try_acquire_handover - Try to acquire via handover + * @ctxt: The context of the caller + * @cur: The current console state + * + * The function must be called only when the context has higher priority + * than the current owner and the console is in an unsafe state. + * It is the case when nbcon_context_try_acquire_direct() returns -EBUSY. + * + * The function sets "req_prio" field to make the current owner aware of + * the request. Then it waits until the current owner releases the console, + * or an even higher context takes over the request, or timeout expires. + * + * The current owner checks the "req_prio" field on exit from the unsafe + * region and releases the console. It does not touch the "req_prio" field + * so that the console stays reserved for the waiter. + * + * Return: 0 on success. Otherwise, an error code on failure. Also @cur + * is updated to the latest state when failed to modify it. + * + * Errors: + * + * -EPERM: A panic is in progress and this is not the panic CPU. + * Or a higher priority context has taken over the + * console or the handover request. + * + * -EBUSY: The current owner is on the same CPU so that the hand + * shake could not work. Or the current owner is not + * willing to wait (zero timeout). Or the console does + * not enter the safe state before timeout passed. The + * caller might still use the unsafe hostile takeover + * when allowed. + * + * -EAGAIN: @cur has changed when creating the handover request. + * The caller should retry with direct acquire. + */ +static int nbcon_context_try_acquire_handover(struct nbcon_context *ctxt, + struct nbcon_state *cur) +{ + unsigned int cpu = smp_processor_id(); + struct console *con = ctxt->console; + struct nbcon_state new; + int timeout; + int request_err = -EBUSY; + + /* + * Check that the handover is called when the direct acquire failed + * with -EBUSY. + */ + WARN_ON_ONCE(ctxt->prio <= cur->prio || ctxt->prio <= cur->req_prio); + WARN_ON_ONCE(!cur->unsafe); + + /* Handover is not possible on the same CPU. */ + if (cur->cpu == cpu) + return -EBUSY; + + /* + * Console stays unsafe after an unsafe takeover until re-initialized. + * Waiting is not going to help in this case. + */ + if (cur->unsafe_takeover) + return -EBUSY; + + /* Is the caller willing to wait? */ + if (ctxt->spinwait_max_us == 0) + return -EBUSY; + + /* + * Setup a request for the handover. The caller should try to acquire + * the console directly when the current state has been modified. + */ + new.atom = cur->atom; + new.req_prio = ctxt->prio; + if (!nbcon_state_try_cmpxchg(con, cur, &new)) + return -EAGAIN; + + cur->atom = new.atom; + + /* Wait until there is no owner and then acquire the console. */ + for (timeout = ctxt->spinwait_max_us; timeout >= 0; timeout--) { + /* On successful acquire, this request is cleared. */ + request_err = nbcon_context_try_acquire_requested(ctxt, cur); + if (!request_err) + return 0; + + /* + * If the acquire should be aborted, it must be ensured + * that the request is removed before returning to caller. + */ + if (request_err == -EPERM) + break; + + udelay(1); + + /* Re-read the state because some time has passed. */ + nbcon_state_read(con, cur); + } + + /* Timed out or aborted. Carefully remove handover request. */ + do { + /* + * No need to remove request if there is a new waiter. This + * can only happen if a higher priority context has taken over + * the console or the handover request. + */ + if (!nbcon_waiter_matches(cur, ctxt->prio)) + return -EPERM; + + /* Unset request for handover. */ + new.atom = cur->atom; + new.req_prio = NBCON_PRIO_NONE; + if (nbcon_state_try_cmpxchg(con, cur, &new)) { + /* + * Request successfully unset. Report failure of + * acquiring via handover. + */ + cur->atom = new.atom; + return request_err; + } + + /* + * Unable to remove request. Try to acquire in case + * the owner has released the lock. + */ + } while (nbcon_context_try_acquire_requested(ctxt, cur)); + + /* Lucky timing. The acquire succeeded while removing the request. */ + return 0; +} + +/** + * nbcon_context_try_acquire_hostile - Acquire via unsafe hostile takeover + * @ctxt: The context of the caller + * @cur: The current console state + * + * Acquire the console even in the unsafe state. + * + * It can be permitted by setting the 'allow_unsafe_takeover' field only + * by the final attempt to flush messages in panic(). + * + * Return: 0 on success. -EPERM when not allowed by the context. + */ +static int nbcon_context_try_acquire_hostile(struct nbcon_context *ctxt, + struct nbcon_state *cur) +{ + unsigned int cpu = smp_processor_id(); + struct console *con = ctxt->console; + struct nbcon_state new; + + if (!ctxt->allow_unsafe_takeover) + return -EPERM; + + /* Ensure caller is allowed to perform unsafe hostile takeovers. */ + if (WARN_ON_ONCE(ctxt->prio != NBCON_PRIO_PANIC)) + return -EPERM; + + /* + * Check that try_acquire_direct() and try_acquire_handover() returned + * -EBUSY in the right situation. + */ + WARN_ON_ONCE(ctxt->prio <= cur->prio || ctxt->prio <= cur->req_prio); + WARN_ON_ONCE(cur->unsafe != true); + + do { + new.atom = cur->atom; + new.cpu = cpu; + new.prio = ctxt->prio; + new.unsafe |= cur->unsafe_takeover; + new.unsafe_takeover |= cur->unsafe; + + } while (!nbcon_state_try_cmpxchg(con, cur, &new)); + + return 0; +} + +/** + * nbcon_context_try_acquire - Try to acquire nbcon console + * @ctxt: The context of the caller + * + * Return: True if the console was acquired. False otherwise. + * + * If the caller allowed an unsafe hostile takeover, on success the + * caller should check the current console state to see if it is + * in an unsafe state. Otherwise, on success the caller may assume + * the console is not in an unsafe state. + */ +__maybe_unused +static bool nbcon_context_try_acquire(struct nbcon_context *ctxt) +{ + struct console *con = ctxt->console; + struct nbcon_state cur; + int err; + + nbcon_state_read(con, &cur); +try_again: + err = nbcon_context_try_acquire_direct(ctxt, &cur); + if (err != -EBUSY) + goto out; + + err = nbcon_context_try_acquire_handover(ctxt, &cur); + if (err == -EAGAIN) + goto try_again; + if (err != -EBUSY) + goto out; + + err = nbcon_context_try_acquire_hostile(ctxt, &cur); +out: + return !err; +} + +static bool nbcon_owner_matches(struct nbcon_state *cur, int expected_cpu, + int expected_prio) +{ + /* + * Since consoles can only be acquired by higher priorities, + * owning contexts are uniquely identified by @prio. However, + * since contexts can unexpectedly lose ownership, it is + * possible that later another owner appears with the same + * priority. For this reason @cpu is also needed. + */ + + if (cur->prio != expected_prio) + return false; + + if (cur->cpu != expected_cpu) + return false; + + return true; +} + +/** + * nbcon_context_release - Release the console + * @ctxt: The nbcon context from nbcon_context_try_acquire() + */ +__maybe_unused +static void nbcon_context_release(struct nbcon_context *ctxt) +{ + unsigned int cpu = smp_processor_id(); + struct console *con = ctxt->console; + struct nbcon_state cur; + struct nbcon_state new; + + nbcon_state_read(con, &cur); + + do { + if (!nbcon_owner_matches(&cur, cpu, ctxt->prio)) + return; + + new.atom = cur.atom; + new.prio = NBCON_PRIO_NONE; + + /* + * If @unsafe_takeover is set, it is kept set so that + * the state remains permanently unsafe. + */ + new.unsafe |= cur.unsafe_takeover; + + } while (!nbcon_state_try_cmpxchg(con, &cur, &new)); +} + /** * nbcon_init - Initialize the nbcon console specific data * @con: Console to initialize -- cgit v1.2.3 From 5634c90fd8553de7cbafecd048d0273690a2e84e Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 16 Sep 2023 21:26:03 +0206 Subject: printk: nbcon: Add buffer management In case of hostile takeovers it must be ensured that the previous owner cannot scribble over the output buffer of the emergency/panic context. This is achieved by: - Adding a global output buffer instance for the panic context. This is the only situation where hostile takeovers can occur and there is always at most 1 panic context. - Allocating an output buffer per non-boot console upon console registration. This buffer is used by the console owner when not in panic context. (For boot consoles, the existing shared global legacy output buffer is used instead. Boot console printing will be synchronized with legacy console printing.) - Choosing the appropriate buffer is handled in the acquire/release functions. Co-developed-by: John Ogness Signed-off-by: John Ogness Signed-off-by: Thomas Gleixner (Intel) Reviewed-by: Petr Mladek Signed-off-by: Petr Mladek Link: https://lore.kernel.org/r/20230916192007.608398-5-john.ogness@linutronix.de --- include/linux/console.h | 7 +++++ kernel/printk/internal.h | 12 ++++++-- kernel/printk/nbcon.c | 73 ++++++++++++++++++++++++++++++++++++++++++++---- kernel/printk/printk.c | 22 +++++++++------ 4 files changed, 99 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/console.h b/include/linux/console.h index 98210fd01f18..ca1ef8700e55 100644 --- a/include/linux/console.h +++ b/include/linux/console.h @@ -231,6 +231,7 @@ enum nbcon_prio { }; struct console; +struct printk_buffers; /** * struct nbcon_context - Context for console acquire/release @@ -241,6 +242,7 @@ struct console; * be used only with NBCON_PRIO_PANIC @prio. It * might cause a system freeze when the console * is used later. + * @pbufs: Pointer to the text buffer for this context */ struct nbcon_context { /* members set by caller */ @@ -248,6 +250,9 @@ struct nbcon_context { unsigned int spinwait_max_us; enum nbcon_prio prio; unsigned int allow_unsafe_takeover : 1; + + /* members set by acquire */ + struct printk_buffers *pbufs; }; /** @@ -271,6 +276,7 @@ struct nbcon_context { * @node: hlist node for the console list * * @nbcon_state: State for nbcon consoles + * @pbufs: Pointer to nbcon private buffer */ struct console { char name[16]; @@ -293,6 +299,7 @@ struct console { /* nbcon console specific members */ atomic_t __private nbcon_state; + struct printk_buffers *pbufs; }; #ifdef CONFIG_LOCKDEP diff --git a/kernel/printk/internal.h b/kernel/printk/internal.h index 7199d60bfc25..f6161cd75d7d 100644 --- a/kernel/printk/internal.h +++ b/kernel/printk/internal.h @@ -13,6 +13,12 @@ int devkmsg_sysctl_set_loglvl(struct ctl_table *table, int write, #define printk_sysctl_init() do { } while (0) #endif +#define con_printk(lvl, con, fmt, ...) \ + printk(lvl pr_fmt("%s%sconsole [%s%d] " fmt), \ + (con->flags & CON_NBCON) ? "" : "legacy ", \ + (con->flags & CON_BOOT) ? "boot" : "", \ + con->name, con->index, ##__VA_ARGS__) + #ifdef CONFIG_PRINTK #ifdef CONFIG_PRINTK_CALLER @@ -63,8 +69,9 @@ void defer_console_output(void); u16 printk_parse_prefix(const char *text, int *level, enum printk_info_flags *flags); +bool nbcon_alloc(struct console *con); void nbcon_init(struct console *con); -void nbcon_cleanup(struct console *con); +void nbcon_free(struct console *con); #else @@ -81,8 +88,9 @@ void nbcon_cleanup(struct console *con); #define printk_safe_exit_irqrestore(flags) local_irq_restore(flags) static inline bool printk_percpu_data_ready(void) { return false; } +static inline bool nbcon_alloc(struct console *con) { return false; } static inline void nbcon_init(struct console *con) { } -static inline void nbcon_cleanup(struct console *con) { } +static inline void nbcon_free(struct console *con) { } #endif /* CONFIG_PRINTK */ diff --git a/kernel/printk/nbcon.c b/kernel/printk/nbcon.c index a2a354f859f9..ba1febf15db6 100644 --- a/kernel/printk/nbcon.c +++ b/kernel/printk/nbcon.c @@ -5,6 +5,7 @@ #include #include #include +#include #include "internal.h" /* * Printk console printing implementation for consoles which does not depend @@ -70,6 +71,10 @@ * console is an unsafe state. It is used only in panic() by the final * attempt to flush consoles in a try and hope mode. * + * Note that separate record buffers are used in panic(). As a result, + * the messages can be read and formatted without any risk even after + * using the hostile takeover in unsafe state. + * * The release function simply clears the 'prio' field. * * All operations on @console::nbcon_state are atomic cmpxchg based to @@ -459,6 +464,8 @@ static int nbcon_context_try_acquire_hostile(struct nbcon_context *ctxt, return 0; } +static struct printk_buffers panic_nbcon_pbufs; + /** * nbcon_context_try_acquire - Try to acquire nbcon console * @ctxt: The context of the caller @@ -473,6 +480,7 @@ static int nbcon_context_try_acquire_hostile(struct nbcon_context *ctxt, __maybe_unused static bool nbcon_context_try_acquire(struct nbcon_context *ctxt) { + unsigned int cpu = smp_processor_id(); struct console *con = ctxt->console; struct nbcon_state cur; int err; @@ -491,7 +499,18 @@ try_again: err = nbcon_context_try_acquire_hostile(ctxt, &cur); out: - return !err; + if (err) + return false; + + /* Acquire succeeded. */ + + /* Assign the appropriate buffer for this context. */ + if (atomic_read(&panic_cpu) == cpu) + ctxt->pbufs = &panic_nbcon_pbufs; + else + ctxt->pbufs = con->pbufs; + + return true; } static bool nbcon_owner_matches(struct nbcon_state *cur, int expected_cpu, @@ -530,7 +549,7 @@ static void nbcon_context_release(struct nbcon_context *ctxt) do { if (!nbcon_owner_matches(&cur, cpu, ctxt->prio)) - return; + break; new.atom = cur.atom; new.prio = NBCON_PRIO_NONE; @@ -542,26 +561,70 @@ static void nbcon_context_release(struct nbcon_context *ctxt) new.unsafe |= cur.unsafe_takeover; } while (!nbcon_state_try_cmpxchg(con, &cur, &new)); + + ctxt->pbufs = NULL; +} + +/** + * nbcon_alloc - Allocate buffers needed by the nbcon console + * @con: Console to allocate buffers for + * + * Return: True on success. False otherwise and the console cannot + * be used. + * + * This is not part of nbcon_init() because buffer allocation must + * be performed earlier in the console registration process. + */ +bool nbcon_alloc(struct console *con) +{ + if (con->flags & CON_BOOT) { + /* + * Boot console printing is synchronized with legacy console + * printing, so boot consoles can share the same global printk + * buffers. + */ + con->pbufs = &printk_shared_pbufs; + } else { + con->pbufs = kmalloc(sizeof(*con->pbufs), GFP_KERNEL); + if (!con->pbufs) { + con_printk(KERN_ERR, con, "failed to allocate printing buffer\n"); + return false; + } + } + + return true; } /** * nbcon_init - Initialize the nbcon console specific data * @con: Console to initialize + * + * nbcon_alloc() *must* be called and succeed before this function + * is called. */ void nbcon_init(struct console *con) { struct nbcon_state state = { }; + /* nbcon_alloc() must have been called and successful! */ + BUG_ON(!con->pbufs); + nbcon_state_set(con, &state); } /** - * nbcon_cleanup - Cleanup the nbcon console specific data - * @con: Console to cleanup + * nbcon_free - Free and cleanup the nbcon console specific data + * @con: Console to free/cleanup nbcon data */ -void nbcon_cleanup(struct console *con) +void nbcon_free(struct console *con) { struct nbcon_state state = { }; nbcon_state_set(con, &state); + + /* Boot consoles share global printk buffers. */ + if (!(con->flags & CON_BOOT)) + kfree(con->pbufs); + + con->pbufs = NULL; } diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 17def3791bc0..1c9720acd960 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -3331,12 +3331,6 @@ static void try_enable_default_console(struct console *newcon) newcon->flags |= CON_CONSDEV; } -#define con_printk(lvl, con, fmt, ...) \ - printk(lvl pr_fmt("%s%sconsole [%s%d] " fmt), \ - (con->flags & CON_NBCON) ? "" : "legacy ", \ - (con->flags & CON_BOOT) ? "boot" : "", \ - con->name, con->index, ##__VA_ARGS__) - static void console_init_seq(struct console *newcon, bool bootcon_registered) { struct console *con; @@ -3450,6 +3444,15 @@ void register_console(struct console *newcon) goto unlock; } + if (newcon->flags & CON_NBCON) { + /* + * Ensure the nbcon console buffers can be allocated + * before modifying any global data. + */ + if (!nbcon_alloc(newcon)) + goto unlock; + } + /* * See if we want to enable this console driver by default. * @@ -3477,8 +3480,11 @@ void register_console(struct console *newcon) err = try_enable_preferred_console(newcon, false); /* printk() messages are not printed to the Braille console. */ - if (err || newcon->flags & CON_BRL) + if (err || newcon->flags & CON_BRL) { + if (newcon->flags & CON_NBCON) + nbcon_free(newcon); goto unlock; + } /* * If we have a bootconsole, and are switching to a real console, @@ -3589,7 +3595,7 @@ static int unregister_console_locked(struct console *console) synchronize_srcu(&console_srcu); if (console->flags & CON_NBCON) - nbcon_cleanup(console); + nbcon_free(console); console_sysfs_notify(); -- cgit v1.2.3 From ad56ebd1d79b216dc147474fac89a11daf6b10df Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 16 Sep 2023 21:26:05 +0206 Subject: printk: nbcon: Add sequence handling Add an atomic_long_t field @nbcon_seq to the console struct to store the sequence number for nbcon consoles. For nbcon consoles this will be used instead of the non-atomic @seq field. The new field allows for safe atomic sequence number updates without requiring any locking. On 64bit systems the new field stores the full sequence number. On 32bit systems the new field stores the lower 32 bits of the sequence number, which are expanded to 64bit as needed by folding the values based on the sequence numbers available in the ringbuffer. For 32bit systems, having a 32bit representation in the console is sufficient. If a console ever gets more than 2^31 records behind the ringbuffer then this is the least of the problems. Co-developed-by: John Ogness Signed-off-by: John Ogness Signed-off-by: Thomas Gleixner (Intel) Signed-off-by: Petr Mladek Link: https://lore.kernel.org/r/20230916192007.608398-7-john.ogness@linutronix.de --- include/linux/console.h | 4 ++ kernel/printk/internal.h | 7 ++++ kernel/printk/nbcon.c | 101 +++++++++++++++++++++++++++++++++++++++++++++++ kernel/printk/printk.c | 31 +++++++++++---- 4 files changed, 136 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/console.h b/include/linux/console.h index ca1ef8700e55..20cd486b76ad 100644 --- a/include/linux/console.h +++ b/include/linux/console.h @@ -243,6 +243,7 @@ struct printk_buffers; * might cause a system freeze when the console * is used later. * @pbufs: Pointer to the text buffer for this context + * @seq: The sequence number to print for this context */ struct nbcon_context { /* members set by caller */ @@ -253,6 +254,7 @@ struct nbcon_context { /* members set by acquire */ struct printk_buffers *pbufs; + u64 seq; }; /** @@ -276,6 +278,7 @@ struct nbcon_context { * @node: hlist node for the console list * * @nbcon_state: State for nbcon consoles + * @nbcon_seq: Sequence number of the next record for nbcon to print * @pbufs: Pointer to nbcon private buffer */ struct console { @@ -299,6 +302,7 @@ struct console { /* nbcon console specific members */ atomic_t __private nbcon_state; + atomic_long_t __private nbcon_seq; struct printk_buffers *pbufs; }; diff --git a/kernel/printk/internal.h b/kernel/printk/internal.h index f6161cd75d7d..6473f5ae4a18 100644 --- a/kernel/printk/internal.h +++ b/kernel/printk/internal.h @@ -4,6 +4,7 @@ */ #include #include +#include "printk_ringbuffer.h" #if defined(CONFIG_PRINTK) && defined(CONFIG_SYSCTL) void __init printk_sysctl_init(void); @@ -42,6 +43,8 @@ enum printk_info_flags { LOG_CONT = 8, /* text is a fragment of a continuation line */ }; +extern struct printk_ringbuffer *prb; + __printf(4, 0) int vprintk_store(int facility, int level, const struct dev_printk_info *dev_info, @@ -69,6 +72,8 @@ void defer_console_output(void); u16 printk_parse_prefix(const char *text, int *level, enum printk_info_flags *flags); +u64 nbcon_seq_read(struct console *con); +void nbcon_seq_force(struct console *con, u64 seq); bool nbcon_alloc(struct console *con); void nbcon_init(struct console *con); void nbcon_free(struct console *con); @@ -88,6 +93,8 @@ void nbcon_free(struct console *con); #define printk_safe_exit_irqrestore(flags) local_irq_restore(flags) static inline bool printk_percpu_data_ready(void) { return false; } +static inline u64 nbcon_seq_read(struct console *con) { return 0; } +static inline void nbcon_seq_force(struct console *con, u64 seq) { } static inline bool nbcon_alloc(struct console *con) { return false; } static inline void nbcon_init(struct console *con) { } static inline void nbcon_free(struct console *con) { } diff --git a/kernel/printk/nbcon.c b/kernel/printk/nbcon.c index 98e4be5429f0..e076096b31c0 100644 --- a/kernel/printk/nbcon.c +++ b/kernel/printk/nbcon.c @@ -140,6 +140,101 @@ static inline bool nbcon_state_try_cmpxchg(struct console *con, struct nbcon_sta return atomic_try_cmpxchg(&ACCESS_PRIVATE(con, nbcon_state), &cur->atom, new->atom); } +#ifdef CONFIG_64BIT + +#define __seq_to_nbcon_seq(seq) (seq) +#define __nbcon_seq_to_seq(seq) (seq) + +#else /* CONFIG_64BIT */ + +#define __seq_to_nbcon_seq(seq) ((u32)seq) + +static inline u64 __nbcon_seq_to_seq(u32 nbcon_seq) +{ + u64 seq; + u64 rb_next_seq; + + /* + * The provided sequence is only the lower 32 bits of the ringbuffer + * sequence. It needs to be expanded to 64bit. Get the next sequence + * number from the ringbuffer and fold it. + * + * Having a 32bit representation in the console is sufficient. + * If a console ever gets more than 2^31 records behind + * the ringbuffer then this is the least of the problems. + * + * Also the access to the ring buffer is always safe. + */ + rb_next_seq = prb_next_seq(prb); + seq = rb_next_seq - ((u32)rb_next_seq - nbcon_seq); + + return seq; +} + +#endif /* CONFIG_64BIT */ + +/** + * nbcon_seq_read - Read the current console sequence + * @con: Console to read the sequence of + * + * Return: Sequence number of the next record to print on @con. + */ +u64 nbcon_seq_read(struct console *con) +{ + unsigned long nbcon_seq = atomic_long_read(&ACCESS_PRIVATE(con, nbcon_seq)); + + return __nbcon_seq_to_seq(nbcon_seq); +} + +/** + * nbcon_seq_force - Force console sequence to a specific value + * @con: Console to work on + * @seq: Sequence number value to set + * + * Only to be used during init (before registration) or in extreme situations + * (such as panic with CONSOLE_REPLAY_ALL). + */ +void nbcon_seq_force(struct console *con, u64 seq) +{ + /* + * If the specified record no longer exists, the oldest available record + * is chosen. This is especially important on 32bit systems because only + * the lower 32 bits of the sequence number are stored. The upper 32 bits + * are derived from the sequence numbers available in the ringbuffer. + */ + u64 valid_seq = max_t(u64, seq, prb_first_valid_seq(prb)); + + atomic_long_set(&ACCESS_PRIVATE(con, nbcon_seq), __seq_to_nbcon_seq(valid_seq)); + + /* Clear con->seq since nbcon consoles use con->nbcon_seq instead. */ + con->seq = 0; +} + +/** + * nbcon_seq_try_update - Try to update the console sequence number + * @ctxt: Pointer to an acquire context that contains + * all information about the acquire mode + * @new_seq: The new sequence number to set + * + * @ctxt->seq is updated to the new value of @con::nbcon_seq (expanded to + * the 64bit value). This could be a different value than @new_seq if + * nbcon_seq_force() was used or the current context no longer owns the + * console. In the later case, it will stop printing anyway. + */ +__maybe_unused +static void nbcon_seq_try_update(struct nbcon_context *ctxt, u64 new_seq) +{ + unsigned long nbcon_seq = __seq_to_nbcon_seq(ctxt->seq); + struct console *con = ctxt->console; + + if (atomic_long_try_cmpxchg(&ACCESS_PRIVATE(con, nbcon_seq), &nbcon_seq, + __seq_to_nbcon_seq(new_seq))) { + ctxt->seq = new_seq; + } else { + ctxt->seq = nbcon_seq_read(con); + } +} + /** * nbcon_context_try_acquire_direct - Try to acquire directly * @ctxt: The context of the caller @@ -510,6 +605,9 @@ out: else ctxt->pbufs = con->pbufs; + /* Set the record sequence for this context to print. */ + ctxt->seq = nbcon_seq_read(ctxt->console); + return true; } @@ -722,6 +820,8 @@ bool nbcon_alloc(struct console *con) * * nbcon_alloc() *must* be called and succeed before this function * is called. + * + * This function expects that the legacy @con->seq has been set. */ void nbcon_init(struct console *con) { @@ -730,6 +830,7 @@ void nbcon_init(struct console *con) /* nbcon_alloc() must have been called and successful! */ BUG_ON(!con->pbufs); + nbcon_seq_force(con, con->seq); nbcon_state_set(con, &state); } diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 1c9720acd960..77857d2118ca 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -494,7 +494,7 @@ _DEFINE_PRINTKRB(printk_rb_static, CONFIG_LOG_BUF_SHIFT - PRB_AVGBITS, static struct printk_ringbuffer printk_rb_dynamic; -static struct printk_ringbuffer *prb = &printk_rb_static; +struct printk_ringbuffer *prb = &printk_rb_static; /* * We cannot access per-CPU data (e.g. per-CPU flush irq_work) before @@ -3168,6 +3168,7 @@ void console_flush_on_panic(enum con_flush_mode mode) if (mode == CONSOLE_REPLAY_ALL) { struct console *c; + short flags; int cookie; u64 seq; @@ -3175,11 +3176,17 @@ void console_flush_on_panic(enum con_flush_mode mode) cookie = console_srcu_read_lock(); for_each_console_srcu(c) { - /* - * This is an unsynchronized assignment, but the - * kernel is in "hope and pray" mode anyway. - */ - c->seq = seq; + flags = console_srcu_read_flags(c); + + if (flags & CON_NBCON) { + nbcon_seq_force(c, seq); + } else { + /* + * This is an unsynchronized assignment. On + * panic legacy consoles are only best effort. + */ + c->seq = seq; + } } console_srcu_read_unlock(cookie); } @@ -3750,6 +3757,7 @@ static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progre struct console *c; u64 last_diff = 0; u64 printk_seq; + short flags; int cookie; u64 diff; u64 seq; @@ -3771,6 +3779,9 @@ static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progre for_each_console_srcu(c) { if (con && con != c) continue; + + flags = console_srcu_read_flags(c); + /* * If consoles are not usable, it cannot be expected * that they make forward progress, so only increment @@ -3778,7 +3789,13 @@ static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progre */ if (!console_is_usable(c)) continue; - printk_seq = c->seq; + + if (flags & CON_NBCON) { + printk_seq = nbcon_seq_read(c); + } else { + printk_seq = c->seq; + } + if (printk_seq < seq) diff += seq - printk_seq; } -- cgit v1.2.3 From 06653d57ff283be627a2c769139d73ecc487810f Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 16 Sep 2023 21:26:06 +0206 Subject: printk: nbcon: Add emit function and callback function for atomic printing Implement an emit function for nbcon consoles to output printk messages. It utilizes the lockless printk_get_next_message() and console_prepend_dropped() functions to retrieve/build the output message. The emit function includes the required safety points to check for handover/takeover and calls a new write_atomic callback of the console driver to output the message. It also includes proper handling for updating the nbcon console sequence number. A new nbcon_write_context struct is introduced. This is provided to the write_atomic callback and includes only the information necessary for performing atomic writes. Co-developed-by: John Ogness Signed-off-by: John Ogness Signed-off-by: Thomas Gleixner (Intel) Reviewed-by: Petr Mladek Signed-off-by: Petr Mladek Link: https://lore.kernel.org/r/20230916192007.608398-8-john.ogness@linutronix.de --- include/linux/console.h | 21 ++++++++++ kernel/printk/internal.h | 6 +++ kernel/printk/nbcon.c | 106 ++++++++++++++++++++++++++++++++++++++++++++++- kernel/printk/printk.c | 9 ++-- 4 files changed, 134 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/console.h b/include/linux/console.h index 20cd486b76ad..14563dcb34b1 100644 --- a/include/linux/console.h +++ b/include/linux/console.h @@ -242,6 +242,7 @@ struct printk_buffers; * be used only with NBCON_PRIO_PANIC @prio. It * might cause a system freeze when the console * is used later. + * @backlog: Ringbuffer has pending records * @pbufs: Pointer to the text buffer for this context * @seq: The sequence number to print for this context */ @@ -252,11 +253,28 @@ struct nbcon_context { enum nbcon_prio prio; unsigned int allow_unsafe_takeover : 1; + /* members set by emit */ + unsigned int backlog : 1; + /* members set by acquire */ struct printk_buffers *pbufs; u64 seq; }; +/** + * struct nbcon_write_context - Context handed to the nbcon write callbacks + * @ctxt: The core console context + * @outbuf: Pointer to the text buffer for output + * @len: Length to write + * @unsafe_takeover: If a hostile takeover in an unsafe state has occurred + */ +struct nbcon_write_context { + struct nbcon_context __private ctxt; + char *outbuf; + unsigned int len; + bool unsafe_takeover; +}; + /** * struct console - The console descriptor structure * @name: The name of the console driver @@ -277,6 +295,7 @@ struct nbcon_context { * @data: Driver private data * @node: hlist node for the console list * + * @write_atomic: Write callback for atomic context * @nbcon_state: State for nbcon consoles * @nbcon_seq: Sequence number of the next record for nbcon to print * @pbufs: Pointer to nbcon private buffer @@ -301,6 +320,8 @@ struct console { struct hlist_node node; /* nbcon console specific members */ + bool (*write_atomic)(struct console *con, + struct nbcon_write_context *wctxt); atomic_t __private nbcon_state; atomic_long_t __private nbcon_seq; struct printk_buffers *pbufs; diff --git a/kernel/printk/internal.h b/kernel/printk/internal.h index 6473f5ae4a18..6c2afee5ef62 100644 --- a/kernel/printk/internal.h +++ b/kernel/printk/internal.h @@ -130,3 +130,9 @@ struct printk_message { }; bool other_cpu_in_panic(void); +bool printk_get_next_message(struct printk_message *pmsg, u64 seq, + bool is_extended, bool may_supress); + +#ifdef CONFIG_PRINTK +void console_prepend_dropped(struct printk_message *pmsg, unsigned long dropped); +#endif diff --git a/kernel/printk/nbcon.c b/kernel/printk/nbcon.c index e076096b31c0..6e05d263fd22 100644 --- a/kernel/printk/nbcon.c +++ b/kernel/printk/nbcon.c @@ -221,7 +221,6 @@ void nbcon_seq_force(struct console *con, u64 seq) * nbcon_seq_force() was used or the current context no longer owns the * console. In the later case, it will stop printing anyway. */ -__maybe_unused static void nbcon_seq_try_update(struct nbcon_context *ctxt, u64 new_seq) { unsigned long nbcon_seq = __seq_to_nbcon_seq(ctxt->seq); @@ -755,7 +754,6 @@ static bool nbcon_context_can_proceed(struct nbcon_context *ctxt, struct nbcon_s * * Internal helper to avoid duplicated code. */ -__maybe_unused static bool __nbcon_context_update_unsafe(struct nbcon_context *ctxt, bool unsafe) { struct console *con = ctxt->console; @@ -784,6 +782,110 @@ out: return nbcon_context_can_proceed(ctxt, &cur); } +/** + * nbcon_emit_next_record - Emit a record in the acquired context + * @wctxt: The write context that will be handed to the write function + * + * Return: True if this context still owns the console. False if + * ownership was handed over or taken. + * + * When this function returns false then the calling context no longer owns + * the console and is no longer allowed to go forward. In this case it must + * back out immediately and carefully. The buffer content is also no longer + * trusted since it no longer belongs to the calling context. If the caller + * wants to do more it must reacquire the console first. + * + * When true is returned, @wctxt->ctxt.backlog indicates whether there are + * still records pending in the ringbuffer, + */ +__maybe_unused +static bool nbcon_emit_next_record(struct nbcon_write_context *wctxt) +{ + struct nbcon_context *ctxt = &ACCESS_PRIVATE(wctxt, ctxt); + struct console *con = ctxt->console; + bool is_extended = console_srcu_read_flags(con) & CON_EXTENDED; + struct printk_message pmsg = { + .pbufs = ctxt->pbufs, + }; + unsigned long con_dropped; + struct nbcon_state cur; + unsigned long dropped; + bool done; + + /* + * The printk buffers are filled within an unsafe section. This + * prevents NBCON_PRIO_NORMAL and NBCON_PRIO_EMERGENCY from + * clobbering each other. + */ + + if (!nbcon_context_enter_unsafe(ctxt)) + return false; + + ctxt->backlog = printk_get_next_message(&pmsg, ctxt->seq, is_extended, true); + if (!ctxt->backlog) + return nbcon_context_exit_unsafe(ctxt); + + /* + * @con->dropped is not protected in case of an unsafe hostile + * takeover. In that situation the update can be racy so + * annotate it accordingly. + */ + con_dropped = data_race(READ_ONCE(con->dropped)); + + dropped = con_dropped + pmsg.dropped; + if (dropped && !is_extended) + console_prepend_dropped(&pmsg, dropped); + + if (!nbcon_context_exit_unsafe(ctxt)) + return false; + + /* For skipped records just update seq/dropped in @con. */ + if (pmsg.outbuf_len == 0) + goto update_con; + + /* Initialize the write context for driver callbacks. */ + wctxt->outbuf = &pmsg.pbufs->outbuf[0]; + wctxt->len = pmsg.outbuf_len; + nbcon_state_read(con, &cur); + wctxt->unsafe_takeover = cur.unsafe_takeover; + + if (con->write_atomic) { + done = con->write_atomic(con, wctxt); + } else { + nbcon_context_release(ctxt); + WARN_ON_ONCE(1); + done = false; + } + + /* If not done, the emit was aborted. */ + if (!done) + return false; + + /* + * Since any dropped message was successfully output, reset the + * dropped count for the console. + */ + dropped = 0; +update_con: + /* + * The dropped count and the sequence number are updated within an + * unsafe section. This limits update races to the panic context and + * allows the panic context to win. + */ + + if (!nbcon_context_enter_unsafe(ctxt)) + return false; + + if (dropped != con_dropped) { + /* Counterpart to the READ_ONCE() above. */ + WRITE_ONCE(con->dropped, dropped); + } + + nbcon_seq_try_update(ctxt, pmsg.seq + 1); + + return nbcon_context_exit_unsafe(ctxt); +} + /** * nbcon_alloc - Allocate buffers needed by the nbcon console * @con: Console to allocate buffers for diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 77857d2118ca..778359b21761 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -698,9 +698,6 @@ out: return len; } -static bool printk_get_next_message(struct printk_message *pmsg, u64 seq, - bool is_extended, bool may_supress); - /* /dev/kmsg - userspace message inject/listen interface */ struct devkmsg_user { atomic64_t seq; @@ -2733,7 +2730,7 @@ static void __console_unlock(void) * If @pmsg->pbufs->outbuf is modified, @pmsg->outbuf_len is updated. */ #ifdef CONFIG_PRINTK -static void console_prepend_dropped(struct printk_message *pmsg, unsigned long dropped) +void console_prepend_dropped(struct printk_message *pmsg, unsigned long dropped) { struct printk_buffers *pbufs = pmsg->pbufs; const size_t scratchbuf_sz = sizeof(pbufs->scratchbuf); @@ -2787,8 +2784,8 @@ static void console_prepend_dropped(struct printk_message *pmsg, unsigned long d * of @pmsg are valid. (See the documentation of struct printk_message * for information about the @pmsg fields.) */ -static bool printk_get_next_message(struct printk_message *pmsg, u64 seq, - bool is_extended, bool may_suppress) +bool printk_get_next_message(struct printk_message *pmsg, u64 seq, + bool is_extended, bool may_suppress) { static int panic_console_dropped; -- cgit v1.2.3 From 9757acd0a700ba4a0d16dde4ba820eb052aba1a7 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 16 Sep 2023 21:26:07 +0206 Subject: printk: nbcon: Allow drivers to mark unsafe regions and check state For the write_atomic callback, the console driver may have unsafe regions that need to be appropriately marked. Provide functions that accept the nbcon_write_context struct to allow for the driver to enter and exit unsafe regions. Also provide a function for drivers to check if they are still the owner of the console. Co-developed-by: John Ogness Signed-off-by: John Ogness Signed-off-by: Thomas Gleixner (Intel) Reviewed-by: Petr Mladek Signed-off-by: Petr Mladek Link: https://lore.kernel.org/r/20230916192007.608398-9-john.ogness@linutronix.de --- include/linux/console.h | 10 +++++++ kernel/printk/nbcon.c | 75 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 85 insertions(+) (limited to 'include/linux') diff --git a/include/linux/console.h b/include/linux/console.h index 14563dcb34b1..e4fc6f7c1496 100644 --- a/include/linux/console.h +++ b/include/linux/console.h @@ -451,6 +451,16 @@ static inline bool console_is_registered(const struct console *con) lockdep_assert_console_list_lock_held(); \ hlist_for_each_entry(con, &console_list, node) +#ifdef CONFIG_PRINTK +extern bool nbcon_can_proceed(struct nbcon_write_context *wctxt); +extern bool nbcon_enter_unsafe(struct nbcon_write_context *wctxt); +extern bool nbcon_exit_unsafe(struct nbcon_write_context *wctxt); +#else +static inline bool nbcon_can_proceed(struct nbcon_write_context *wctxt) { return false; } +static inline bool nbcon_enter_unsafe(struct nbcon_write_context *wctxt) { return false; } +static inline bool nbcon_exit_unsafe(struct nbcon_write_context *wctxt) { return false; } +#endif + extern int console_set_on_cmdline; extern struct console *early_console; diff --git a/kernel/printk/nbcon.c b/kernel/printk/nbcon.c index 6e05d263fd22..b96077152f49 100644 --- a/kernel/printk/nbcon.c +++ b/kernel/printk/nbcon.c @@ -732,6 +732,41 @@ static bool nbcon_context_can_proceed(struct nbcon_context *ctxt, struct nbcon_s return false; } +/** + * nbcon_can_proceed - Check whether ownership can proceed + * @wctxt: The write context that was handed to the write function + * + * Return: True if this context still owns the console. False if + * ownership was handed over or taken. + * + * It is used in nbcon_enter_unsafe() to make sure that it still owns the + * lock. Also it is used in nbcon_exit_unsafe() to eventually free the lock + * for a higher priority context which asked for the friendly handover. + * + * It can be called inside an unsafe section when the console is just + * temporary in safe state instead of exiting and entering the unsafe state. + * + * Also it can be called in the safe context before doing an expensive safe + * operation. It does not make sense to do the operation when a higher + * priority context took the lock. + * + * When this function returns false then the calling context no longer owns + * the console and is no longer allowed to go forward. In this case it must + * back out immediately and carefully. The buffer content is also no longer + * trusted since it no longer belongs to the calling context. + */ +bool nbcon_can_proceed(struct nbcon_write_context *wctxt) +{ + struct nbcon_context *ctxt = &ACCESS_PRIVATE(wctxt, ctxt); + struct console *con = ctxt->console; + struct nbcon_state cur; + + nbcon_state_read(con, &cur); + + return nbcon_context_can_proceed(ctxt, &cur); +} +EXPORT_SYMBOL_GPL(nbcon_can_proceed); + #define nbcon_context_enter_unsafe(c) __nbcon_context_update_unsafe(c, true) #define nbcon_context_exit_unsafe(c) __nbcon_context_update_unsafe(c, false) @@ -782,6 +817,46 @@ out: return nbcon_context_can_proceed(ctxt, &cur); } +/** + * nbcon_enter_unsafe - Enter an unsafe region in the driver + * @wctxt: The write context that was handed to the write function + * + * Return: True if this context still owns the console. False if + * ownership was handed over or taken. + * + * When this function returns false then the calling context no longer owns + * the console and is no longer allowed to go forward. In this case it must + * back out immediately and carefully. The buffer content is also no longer + * trusted since it no longer belongs to the calling context. + */ +bool nbcon_enter_unsafe(struct nbcon_write_context *wctxt) +{ + struct nbcon_context *ctxt = &ACCESS_PRIVATE(wctxt, ctxt); + + return nbcon_context_enter_unsafe(ctxt); +} +EXPORT_SYMBOL_GPL(nbcon_enter_unsafe); + +/** + * nbcon_exit_unsafe - Exit an unsafe region in the driver + * @wctxt: The write context that was handed to the write function + * + * Return: True if this context still owns the console. False if + * ownership was handed over or taken. + * + * When this function returns false then the calling context no longer owns + * the console and is no longer allowed to go forward. In this case it must + * back out immediately and carefully. The buffer content is also no longer + * trusted since it no longer belongs to the calling context. + */ +bool nbcon_exit_unsafe(struct nbcon_write_context *wctxt) +{ + struct nbcon_context *ctxt = &ACCESS_PRIVATE(wctxt, ctxt); + + return nbcon_context_exit_unsafe(ctxt); +} +EXPORT_SYMBOL_GPL(nbcon_exit_unsafe); + /** * nbcon_emit_next_record - Emit a record in the acquired context * @wctxt: The write context that will be handed to the write function -- cgit v1.2.3 From 492032760127251e5540a5716a70996bacf2a3fd Mon Sep 17 00:00:00 2001 From: Ziyang Xuan Date: Mon, 18 Sep 2023 20:30:11 +0800 Subject: team: fix null-ptr-deref when team device type is changed Get a null-ptr-deref bug as follows with reproducer [1]. BUG: kernel NULL pointer dereference, address: 0000000000000228 ... RIP: 0010:vlan_dev_hard_header+0x35/0x140 [8021q] ... Call Trace: ? __die+0x24/0x70 ? page_fault_oops+0x82/0x150 ? exc_page_fault+0x69/0x150 ? asm_exc_page_fault+0x26/0x30 ? vlan_dev_hard_header+0x35/0x140 [8021q] ? vlan_dev_hard_header+0x8e/0x140 [8021q] neigh_connected_output+0xb2/0x100 ip6_finish_output2+0x1cb/0x520 ? nf_hook_slow+0x43/0xc0 ? ip6_mtu+0x46/0x80 ip6_finish_output+0x2a/0xb0 mld_sendpack+0x18f/0x250 mld_ifc_work+0x39/0x160 process_one_work+0x1e6/0x3f0 worker_thread+0x4d/0x2f0 ? __pfx_worker_thread+0x10/0x10 kthread+0xe5/0x120 ? __pfx_kthread+0x10/0x10 ret_from_fork+0x34/0x50 ? __pfx_kthread+0x10/0x10 ret_from_fork_asm+0x1b/0x30 [1] $ teamd -t team0 -d -c '{"runner": {"name": "loadbalance"}}' $ ip link add name t-dummy type dummy $ ip link add link t-dummy name t-dummy.100 type vlan id 100 $ ip link add name t-nlmon type nlmon $ ip link set t-nlmon master team0 $ ip link set t-nlmon nomaster $ ip link set t-dummy up $ ip link set team0 up $ ip link set t-dummy.100 down $ ip link set t-dummy.100 master team0 When enslave a vlan device to team device and team device type is changed from non-ether to ether, header_ops of team device is changed to vlan_header_ops. That is incorrect and will trigger null-ptr-deref for vlan->real_dev in vlan_dev_hard_header() because team device is not a vlan device. Cache eth_header_ops in team_setup(), then assign cached header_ops to header_ops of team net device when its type is changed from non-ether to ether to fix the bug. Fixes: 1d76efe1577b ("team: add support for non-ethernet devices") Suggested-by: Hangbin Liu Reviewed-by: Hangbin Liu Signed-off-by: Ziyang Xuan Reviewed-by: Jiri Pirko Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20230918123011.1884401-1-william.xuanziyang@huawei.com Signed-off-by: Paolo Abeni --- drivers/net/team/team.c | 10 +++++++++- include/linux/if_team.h | 2 ++ 2 files changed, 11 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index e8b94580194e..508d9a392ab1 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -2115,7 +2115,12 @@ static const struct ethtool_ops team_ethtool_ops = { static void team_setup_by_port(struct net_device *dev, struct net_device *port_dev) { - dev->header_ops = port_dev->header_ops; + struct team *team = netdev_priv(dev); + + if (port_dev->type == ARPHRD_ETHER) + dev->header_ops = team->header_ops_cache; + else + dev->header_ops = port_dev->header_ops; dev->type = port_dev->type; dev->hard_header_len = port_dev->hard_header_len; dev->needed_headroom = port_dev->needed_headroom; @@ -2162,8 +2167,11 @@ static int team_dev_type_check_change(struct net_device *dev, static void team_setup(struct net_device *dev) { + struct team *team = netdev_priv(dev); + ether_setup(dev); dev->max_mtu = ETH_MAX_MTU; + team->header_ops_cache = dev->header_ops; dev->netdev_ops = &team_netdev_ops; dev->ethtool_ops = &team_ethtool_ops; diff --git a/include/linux/if_team.h b/include/linux/if_team.h index 1b9b15a492fa..cdc684e04a2f 100644 --- a/include/linux/if_team.h +++ b/include/linux/if_team.h @@ -189,6 +189,8 @@ struct team { struct net_device *dev; /* associated netdevice */ struct team_pcpu_stats __percpu *pcpu_stats; + const struct header_ops *header_ops_cache; + struct mutex lock; /* used for overall locking, e.g. port lists write */ /* -- cgit v1.2.3 From 6a23c555f7eb436d6799533675ffa179db3d5834 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Mon, 18 Sep 2023 14:25:36 +0100 Subject: net: phy: fix regression with AX88772A PHY driver Marek reports that a deadlock occurs with the AX88772A PHY used on the ASIX USB network driver: asix 1-1.4:1.0 (unnamed net_device) (uninitialized): PHY [usb-001:003:10] driver [Asix Electronics AX88772A] (irq=POLL) Asix Electronics AX88772A usb-001:003:10: attached PHY driver(mii_bus:phy_addr=usb-001:003:10, irq=POLL) asix 1-1.4:1.0 eth0: register 'asix' at usb-12110000.usb-1.4, ASIX AX88772 USB 2.0 Ethernet, a2:99:b6:cd:11:eb asix 1-1.4:1.0 eth0: configuring for phy/internal link mode ============================================ WARNING: possible recursive locking detected 6.6.0-rc1-00239-g8da77df649c4-dirty #13949 Not tainted -------------------------------------------- kworker/3:3/71 is trying to acquire lock: c6c704cc (&dev->lock){+.+.}-{3:3}, at: phy_start_aneg+0x1c/0x38 but task is already holding lock: c6c704cc (&dev->lock){+.+.}-{3:3}, at: phy_state_machine+0x100/0x2b8 This is because we now consistently call phy_process_state_change() while holding phydev->lock, but the AX88772A PHY driver then goes on to call phy_start_aneg() which tries to grab the same lock - causing deadlock. Fix this by exporting the unlocked version, and use this in the PHY driver instead. Reported-by: Marek Szyprowski Tested-by: Marek Szyprowski Fixes: ef113a60d0a9 ("net: phy: call phy_error_precise() while holding the lock") Signed-off-by: Russell King (Oracle) Reviewed-by: Andrew Lunn Link: https://lore.kernel.org/r/E1qiEFs-007g7b-Lq@rmk-PC.armlinux.org.uk Signed-off-by: Paolo Abeni --- drivers/net/phy/ax88796b.c | 2 +- drivers/net/phy/phy.c | 3 ++- include/linux/phy.h | 1 + 3 files changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/phy/ax88796b.c b/drivers/net/phy/ax88796b.c index 0f1e617a26c9..eb74a8cf8df1 100644 --- a/drivers/net/phy/ax88796b.c +++ b/drivers/net/phy/ax88796b.c @@ -90,7 +90,7 @@ static void asix_ax88772a_link_change_notify(struct phy_device *phydev) */ if (phydev->state == PHY_NOLINK) { phy_init_hw(phydev); - phy_start_aneg(phydev); + _phy_start_aneg(phydev); } } diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index 93a8676dd8d8..a5fa077650e8 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -981,7 +981,7 @@ static int phy_check_link_status(struct phy_device *phydev) * If the PHYCONTROL Layer is operating, we change the state to * reflect the beginning of Auto-negotiation or forcing. */ -static int _phy_start_aneg(struct phy_device *phydev) +int _phy_start_aneg(struct phy_device *phydev) { int err; @@ -1002,6 +1002,7 @@ static int _phy_start_aneg(struct phy_device *phydev) return err; } +EXPORT_SYMBOL(_phy_start_aneg); /** * phy_start_aneg - start auto-negotiation for this PHY device diff --git a/include/linux/phy.h b/include/linux/phy.h index 1351b802ffcf..3cc52826f18e 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -1736,6 +1736,7 @@ void phy_detach(struct phy_device *phydev); void phy_start(struct phy_device *phydev); void phy_stop(struct phy_device *phydev); int phy_config_aneg(struct phy_device *phydev); +int _phy_start_aneg(struct phy_device *phydev); int phy_start_aneg(struct phy_device *phydev); int phy_aneg_done(struct phy_device *phydev); int phy_speed_down(struct phy_device *phydev, bool sync); -- cgit v1.2.3 From fa17a6d8a5bd0cd7565b613cb804242cd0f6b7ab Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 18 Sep 2023 14:23:21 +0000 Subject: ipv6: lockless IPV6_ADDR_PREFERENCES implementation We have data-races while reading np->srcprefs Switch the field to a plain byte, add READ_ONCE() and WRITE_ONCE() annotations where needed, and IPV6_ADDR_PREFERENCES setsockopt() can now be lockless. Signed-off-by: Eric Dumazet Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20230918142321.1794107-1-edumazet@google.com Signed-off-by: Paolo Abeni --- include/linux/ipv6.h | 2 +- include/net/ip6_route.h | 5 ++--- include/net/ipv6.h | 20 +++++++------------- net/ipv6/ip6_output.c | 2 +- net/ipv6/ipv6_sockglue.c | 19 ++++++++++--------- net/ipv6/route.c | 2 +- 6 files changed, 22 insertions(+), 28 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 09253825c99c..e400ff757f13 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -243,7 +243,7 @@ struct ipv6_pinfo { } rxopt; /* sockopt flags */ - __u8 srcprefs:3; /* 001: prefer temporary address + __u8 srcprefs; /* 001: prefer temporary address * 010: prefer public address * 100: prefer care-of address */ diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index b1ea49900b4a..28b065790261 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -53,13 +53,12 @@ struct route_info { */ static inline int rt6_srcprefs2flags(unsigned int srcprefs) { - /* No need to bitmask because srcprefs have only 3 bits. */ - return srcprefs << 3; + return (srcprefs & IPV6_PREFER_SRC_MASK) << 3; } static inline unsigned int rt6_flags2srcprefs(int flags) { - return (flags >> 3) & 7; + return (flags >> 3) & IPV6_PREFER_SRC_MASK; } static inline bool rt6_need_strict(const struct in6_addr *daddr) diff --git a/include/net/ipv6.h b/include/net/ipv6.h index bd115980809f..b3444c8a6f74 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -1306,10 +1306,13 @@ static inline void ip6_sock_set_recverr(struct sock *sk) inet6_set_bit(RECVERR6, sk); } -static inline int __ip6_sock_set_addr_preferences(struct sock *sk, int val) +#define IPV6_PREFER_SRC_MASK (IPV6_PREFER_SRC_TMP | IPV6_PREFER_SRC_PUBLIC | \ + IPV6_PREFER_SRC_COA) + +static inline int ip6_sock_set_addr_preferences(struct sock *sk, int val) { + unsigned int prefmask = ~IPV6_PREFER_SRC_MASK; unsigned int pref = 0; - unsigned int prefmask = ~0; /* check PUBLIC/TMP/PUBTMP_DEFAULT conflicts */ switch (val & (IPV6_PREFER_SRC_PUBLIC | @@ -1359,20 +1362,11 @@ static inline int __ip6_sock_set_addr_preferences(struct sock *sk, int val) return -EINVAL; } - inet6_sk(sk)->srcprefs = (inet6_sk(sk)->srcprefs & prefmask) | pref; + WRITE_ONCE(inet6_sk(sk)->srcprefs, + (READ_ONCE(inet6_sk(sk)->srcprefs) & prefmask) | pref); return 0; } -static inline int ip6_sock_set_addr_preferences(struct sock *sk, int val) -{ - int ret; - - lock_sock(sk); - ret = __ip6_sock_set_addr_preferences(sk, val); - release_sock(sk); - return ret; -} - static inline void ip6_sock_set_recvpktinfo(struct sock *sk) { lock_sock(sk); diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 7e5d9eeb990f..951ba8089b5b 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1113,7 +1113,7 @@ static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk, rcu_read_lock(); from = rt ? rcu_dereference(rt->from) : NULL; err = ip6_route_get_saddr(net, from, &fl6->daddr, - sk ? inet6_sk(sk)->srcprefs : 0, + sk ? READ_ONCE(inet6_sk(sk)->srcprefs) : 0, &fl6->saddr); rcu_read_unlock(); diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index e9dc6f881bb9..7d661735cb9d 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -505,6 +505,10 @@ int do_ipv6_setsockopt(struct sock *sk, int level, int optname, return -EINVAL; inet6_assign_bit(SNDFLOW, sk, valbool); return 0; + case IPV6_ADDR_PREFERENCES: + if (optlen < sizeof(int)) + return -EINVAL; + return ip6_sock_set_addr_preferences(sk, val); } if (needs_rtnl) rtnl_lock(); @@ -964,11 +968,6 @@ done: retv = xfrm_user_policy(sk, optname, optval, optlen); break; - case IPV6_ADDR_PREFERENCES: - if (optlen < sizeof(int)) - goto e_inval; - retv = __ip6_sock_set_addr_preferences(sk, val); - break; case IPV6_RECVFRAGSIZE: np->rxopt.bits.recvfragsize = valbool; retv = 0; @@ -1415,23 +1414,25 @@ int do_ipv6_getsockopt(struct sock *sk, int level, int optname, } case IPV6_ADDR_PREFERENCES: + { + u8 srcprefs = READ_ONCE(np->srcprefs); val = 0; - if (np->srcprefs & IPV6_PREFER_SRC_TMP) + if (srcprefs & IPV6_PREFER_SRC_TMP) val |= IPV6_PREFER_SRC_TMP; - else if (np->srcprefs & IPV6_PREFER_SRC_PUBLIC) + else if (srcprefs & IPV6_PREFER_SRC_PUBLIC) val |= IPV6_PREFER_SRC_PUBLIC; else { /* XXX: should we return system default? */ val |= IPV6_PREFER_SRC_PUBTMP_DEFAULT; } - if (np->srcprefs & IPV6_PREFER_SRC_COA) + if (srcprefs & IPV6_PREFER_SRC_COA) val |= IPV6_PREFER_SRC_COA; else val |= IPV6_PREFER_SRC_HOME; break; - + } case IPV6_MINHOPCOUNT: val = READ_ONCE(np->min_hopcount); break; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 9d8dfc7423e4..b132feae3393 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2622,7 +2622,7 @@ static struct dst_entry *ip6_route_output_flags_noref(struct net *net, if (!any_src) flags |= RT6_LOOKUP_F_HAS_SADDR; else if (sk) - flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs); + flags |= rt6_srcprefs2flags(READ_ONCE(inet6_sk(sk)->srcprefs)); return fib6_rule_lookup(net, fl6, NULL, flags, ip6_pol_route_output); } -- cgit v1.2.3 From bafd764a8baa87e19e6beeaa58eb85fcbbdd6b20 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Mon, 18 Sep 2023 12:29:07 +0200 Subject: net: ethernet: mtk_wed: rename mtk_rxbm_desc in mtk_wed_bm_desc Rename mtk_rxbm_desc structure in mtk_wed_bm_desc since it will be used even on tx side by MT7988 SoC. Signed-off-by: Lorenzo Bianconi Signed-off-by: Paolo Abeni --- drivers/net/ethernet/mediatek/mtk_wed.c | 4 ++-- drivers/net/wireless/mediatek/mt76/mt7915/mmio.c | 2 +- include/linux/soc/mediatek/mtk_wed.h | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mediatek/mtk_wed.c b/drivers/net/ethernet/mediatek/mtk_wed.c index 750326b298dc..f166d4f0b793 100644 --- a/drivers/net/ethernet/mediatek/mtk_wed.c +++ b/drivers/net/ethernet/mediatek/mtk_wed.c @@ -422,7 +422,7 @@ free_pagelist: static int mtk_wed_rx_buffer_alloc(struct mtk_wed_device *dev) { - struct mtk_rxbm_desc *desc; + struct mtk_wed_bm_desc *desc; dma_addr_t desc_phys; dev->rx_buf_ring.size = dev->wlan.rx_nbuf; @@ -442,7 +442,7 @@ mtk_wed_rx_buffer_alloc(struct mtk_wed_device *dev) static void mtk_wed_free_rx_buffer(struct mtk_wed_device *dev) { - struct mtk_rxbm_desc *desc = dev->rx_buf_ring.desc; + struct mtk_wed_bm_desc *desc = dev->rx_buf_ring.desc; if (!desc) return; diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mmio.c b/drivers/net/wireless/mediatek/mt76/mt7915/mmio.c index fc7ace638ce8..e7d8e03f826f 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7915/mmio.c +++ b/drivers/net/wireless/mediatek/mt76/mt7915/mmio.c @@ -591,7 +591,7 @@ static void mt7915_mmio_wed_release_rx_buf(struct mtk_wed_device *wed) static u32 mt7915_mmio_wed_init_rx_buf(struct mtk_wed_device *wed, int size) { - struct mtk_rxbm_desc *desc = wed->rx_buf_ring.desc; + struct mtk_wed_bm_desc *desc = wed->rx_buf_ring.desc; struct mt76_txwi_cache *t = NULL; struct mt7915_dev *dev; struct mt76_queue *q; diff --git a/include/linux/soc/mediatek/mtk_wed.h b/include/linux/soc/mediatek/mtk_wed.h index b2b28180dff7..c6512c216b27 100644 --- a/include/linux/soc/mediatek/mtk_wed.h +++ b/include/linux/soc/mediatek/mtk_wed.h @@ -45,7 +45,7 @@ enum mtk_wed_wo_cmd { MTK_WED_WO_CMD_WED_END }; -struct mtk_rxbm_desc { +struct mtk_wed_bm_desc { __le32 buf0; __le32 token; } __packed __aligned(4); @@ -104,7 +104,7 @@ struct mtk_wed_device { struct { int size; - struct mtk_rxbm_desc *desc; + struct mtk_wed_bm_desc *desc; dma_addr_t desc_phys; } rx_buf_ring; -- cgit v1.2.3 From ff0ea57fa30e860d3373acd1383e9d9599144b58 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Mon, 18 Sep 2023 12:29:08 +0200 Subject: net: ethernet: mtk_wed: introduce mtk_wed_buf structure Introduce mtk_wed_buf structure to store both virtual and physical addresses allocated in mtk_wed_tx_buffer_alloc() routine. This is a preliminary patch to add WED support for MT7988 SoC since it relies on a different dma descriptor layout not storing page dma addresses. Co-developed-by: Sujuan Chen Signed-off-by: Sujuan Chen Signed-off-by: Lorenzo Bianconi Signed-off-by: Paolo Abeni --- drivers/net/ethernet/mediatek/mtk_wed.c | 12 ++++++------ include/linux/soc/mediatek/mtk_wed.h | 7 ++++++- 2 files changed, 12 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mediatek/mtk_wed.c b/drivers/net/ethernet/mediatek/mtk_wed.c index f166d4f0b793..592e497984e3 100644 --- a/drivers/net/ethernet/mediatek/mtk_wed.c +++ b/drivers/net/ethernet/mediatek/mtk_wed.c @@ -300,9 +300,9 @@ out: static int mtk_wed_tx_buffer_alloc(struct mtk_wed_device *dev) { + struct mtk_wed_buf *page_list; struct mtk_wdma_desc *desc; dma_addr_t desc_phys; - void **page_list; int token = dev->wlan.token_start; int ring_size; int n_pages; @@ -343,7 +343,8 @@ mtk_wed_tx_buffer_alloc(struct mtk_wed_device *dev) return -ENOMEM; } - page_list[page_idx++] = page; + page_list[page_idx].p = page; + page_list[page_idx++].phy_addr = page_phys; dma_sync_single_for_cpu(dev->hw->dev, page_phys, PAGE_SIZE, DMA_BIDIRECTIONAL); @@ -387,8 +388,8 @@ mtk_wed_tx_buffer_alloc(struct mtk_wed_device *dev) static void mtk_wed_free_tx_buffer(struct mtk_wed_device *dev) { + struct mtk_wed_buf *page_list = dev->tx_buf_ring.pages; struct mtk_wdma_desc *desc = dev->tx_buf_ring.desc; - void **page_list = dev->tx_buf_ring.pages; int page_idx; int i; @@ -400,13 +401,12 @@ mtk_wed_free_tx_buffer(struct mtk_wed_device *dev) for (i = 0, page_idx = 0; i < dev->tx_buf_ring.size; i += MTK_WED_BUF_PER_PAGE) { - void *page = page_list[page_idx++]; - dma_addr_t buf_addr; + dma_addr_t buf_addr = page_list[page_idx].phy_addr; + void *page = page_list[page_idx++].p; if (!page) break; - buf_addr = le32_to_cpu(desc[i].buf0); dma_unmap_page(dev->hw->dev, buf_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); __free_page(page); diff --git a/include/linux/soc/mediatek/mtk_wed.h b/include/linux/soc/mediatek/mtk_wed.h index c6512c216b27..5f00dc26582b 100644 --- a/include/linux/soc/mediatek/mtk_wed.h +++ b/include/linux/soc/mediatek/mtk_wed.h @@ -76,6 +76,11 @@ struct mtk_wed_wo_rx_stats { __le32 rx_drop_cnt; }; +struct mtk_wed_buf { + void *p; + dma_addr_t phy_addr; +}; + struct mtk_wed_device { #ifdef CONFIG_NET_MEDIATEK_SOC_WED const struct mtk_wed_ops *ops; @@ -97,7 +102,7 @@ struct mtk_wed_device { struct { int size; - void **pages; + struct mtk_wed_buf *pages; struct mtk_wdma_desc *desc; dma_addr_t desc_phys; } tx_buf_ring; -- cgit v1.2.3 From e2f64db13aa1d08e32621067e4fe16bbc114b375 Mon Sep 17 00:00:00 2001 From: Sujuan Chen Date: Mon, 18 Sep 2023 12:29:13 +0200 Subject: net: ethernet: mtk_wed: introduce WED support for MT7988 Similar to MT7986 and MT7622, enable Wireless Ethernet Ditpatcher for MT7988 in order to offload traffic forwarded from LAN/WLAN to WLAN/LAN Co-developed-by: Lorenzo Bianconi Signed-off-by: Lorenzo Bianconi Signed-off-by: Sujuan Chen Signed-off-by: Paolo Abeni --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 1 + drivers/net/ethernet/mediatek/mtk_eth_soc.h | 2 +- drivers/net/ethernet/mediatek/mtk_ppe_offload.c | 3 + drivers/net/ethernet/mediatek/mtk_wed.c | 442 +++++++++++++++++------- drivers/net/ethernet/mediatek/mtk_wed.h | 29 ++ drivers/net/ethernet/mediatek/mtk_wed_mcu.c | 33 +- drivers/net/ethernet/mediatek/mtk_wed_regs.h | 223 +++++++++++- drivers/net/ethernet/mediatek/mtk_wed_wo.h | 2 + include/linux/soc/mediatek/mtk_wed.h | 8 +- 9 files changed, 603 insertions(+), 140 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 3cffd1bd3067..697620c6354b 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -197,6 +197,7 @@ static const struct mtk_reg_map mt7988_reg_map = { .wdma_base = { [0] = 0x4800, [1] = 0x4c00, + [2] = 0x5000, }, .pse_iq_sta = 0x0180, .pse_oq_sta = 0x01a0, diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.h b/drivers/net/ethernet/mediatek/mtk_eth_soc.h index 403219d987ef..9ae3b8a71d0e 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h @@ -1132,7 +1132,7 @@ struct mtk_reg_map { u32 gdm1_cnt; u32 gdma_to_ppe; u32 ppe_base; - u32 wdma_base[2]; + u32 wdma_base[3]; u32 pse_iq_sta; u32 pse_oq_sta; }; diff --git a/drivers/net/ethernet/mediatek/mtk_ppe_offload.c b/drivers/net/ethernet/mediatek/mtk_ppe_offload.c index ef3980840695..95f76975f258 100644 --- a/drivers/net/ethernet/mediatek/mtk_ppe_offload.c +++ b/drivers/net/ethernet/mediatek/mtk_ppe_offload.c @@ -201,6 +201,9 @@ mtk_flow_set_output_device(struct mtk_eth *eth, struct mtk_foe_entry *foe, case 1: pse_port = PSE_WDMA1_PORT; break; + case 2: + pse_port = PSE_WDMA2_PORT; + break; default: return -EINVAL; } diff --git a/drivers/net/ethernet/mediatek/mtk_wed.c b/drivers/net/ethernet/mediatek/mtk_wed.c index c2ff2d6405f6..b6ca12686beb 100644 --- a/drivers/net/ethernet/mediatek/mtk_wed.c +++ b/drivers/net/ethernet/mediatek/mtk_wed.c @@ -17,17 +17,19 @@ #include #include #include "mtk_eth_soc.h" -#include "mtk_wed_regs.h" #include "mtk_wed.h" #include "mtk_ppe.h" #include "mtk_wed_wo.h" #define MTK_PCIE_BASE(n) (0x1a143000 + (n) * 0x2000) -#define MTK_WED_PKT_SIZE 1900 +#define MTK_WED_PKT_SIZE 1920 #define MTK_WED_BUF_SIZE 2048 +#define MTK_WED_PAGE_BUF_SIZE 128 #define MTK_WED_BUF_PER_PAGE (PAGE_SIZE / 2048) +#define MTK_WED_RX_PAGE_BUF_PER_PAGE (PAGE_SIZE / 128) #define MTK_WED_RX_RING_SIZE 1536 +#define MTK_WED_RX_PG_BM_CNT 8192 #define MTK_WED_TX_RING_SIZE 2048 #define MTK_WED_WDMA_RING_SIZE 1024 @@ -41,7 +43,10 @@ #define MTK_WED_RRO_QUE_CNT 8192 #define MTK_WED_MIOD_ENTRY_CNT 128 -static struct mtk_wed_hw *hw_list[2]; +#define MTK_WED_TX_BM_DMA_SIZE 65536 +#define MTK_WED_TX_BM_PKT_CNT 32768 + +static struct mtk_wed_hw *hw_list[3]; static DEFINE_MUTEX(hw_lock); struct mtk_wed_flow_block_priv { @@ -56,6 +61,7 @@ static const struct mtk_wed_soc_data mt7622_data = { .reset_idx_tx_mask = GENMASK(3, 0), .reset_idx_rx_mask = GENMASK(17, 16), }, + .tx_ring_desc_size = sizeof(struct mtk_wdma_desc), .wdma_desc_size = sizeof(struct mtk_wdma_desc), }; @@ -66,6 +72,18 @@ static const struct mtk_wed_soc_data mt7986_data = { .reset_idx_tx_mask = GENMASK(1, 0), .reset_idx_rx_mask = GENMASK(7, 6), }, + .tx_ring_desc_size = sizeof(struct mtk_wdma_desc), + .wdma_desc_size = 2 * sizeof(struct mtk_wdma_desc), +}; + +static const struct mtk_wed_soc_data mt7988_data = { + .regmap = { + .tx_bm_tkid = 0x0c8, + .wpdma_rx_ring0 = 0x7d0, + .reset_idx_tx_mask = GENMASK(1, 0), + .reset_idx_rx_mask = GENMASK(7, 6), + }, + .tx_ring_desc_size = sizeof(struct mtk_wed_bm_desc), .wdma_desc_size = 2 * sizeof(struct mtk_wdma_desc), }; @@ -320,33 +338,38 @@ out: static int mtk_wed_tx_buffer_alloc(struct mtk_wed_device *dev) { + u32 desc_size = dev->hw->soc->tx_ring_desc_size; + int i, page_idx = 0, n_pages, ring_size; + int token = dev->wlan.token_start; struct mtk_wed_buf *page_list; - struct mtk_wdma_desc *desc; dma_addr_t desc_phys; - int token = dev->wlan.token_start; - int ring_size; - int n_pages; - int i, page_idx; + void *desc_ptr; - ring_size = dev->wlan.nbuf & ~(MTK_WED_BUF_PER_PAGE - 1); - n_pages = ring_size / MTK_WED_BUF_PER_PAGE; + if (!mtk_wed_is_v3_or_greater(dev->hw)) { + ring_size = dev->wlan.nbuf & ~(MTK_WED_BUF_PER_PAGE - 1); + dev->tx_buf_ring.size = ring_size; + } else { + dev->tx_buf_ring.size = MTK_WED_TX_BM_DMA_SIZE; + ring_size = MTK_WED_TX_BM_PKT_CNT; + } + n_pages = dev->tx_buf_ring.size / MTK_WED_BUF_PER_PAGE; page_list = kcalloc(n_pages, sizeof(*page_list), GFP_KERNEL); if (!page_list) return -ENOMEM; - dev->tx_buf_ring.size = ring_size; dev->tx_buf_ring.pages = page_list; - desc = dma_alloc_coherent(dev->hw->dev, ring_size * sizeof(*desc), - &desc_phys, GFP_KERNEL); - if (!desc) + desc_ptr = dma_alloc_coherent(dev->hw->dev, + dev->tx_buf_ring.size * desc_size, + &desc_phys, GFP_KERNEL); + if (!desc_ptr) return -ENOMEM; - dev->tx_buf_ring.desc = desc; + dev->tx_buf_ring.desc = desc_ptr; dev->tx_buf_ring.desc_phys = desc_phys; - for (i = 0, page_idx = 0; i < ring_size; i += MTK_WED_BUF_PER_PAGE) { + for (i = 0; i < ring_size; i += MTK_WED_BUF_PER_PAGE) { dma_addr_t page_phys, buf_phys; struct page *page; void *buf; @@ -372,28 +395,31 @@ mtk_wed_tx_buffer_alloc(struct mtk_wed_device *dev) buf_phys = page_phys; for (s = 0; s < MTK_WED_BUF_PER_PAGE; s++) { - u32 txd_size; - u32 ctrl; - - txd_size = dev->wlan.init_buf(buf, buf_phys, token++); + struct mtk_wdma_desc *desc = desc_ptr; desc->buf0 = cpu_to_le32(buf_phys); - desc->buf1 = cpu_to_le32(buf_phys + txd_size); - - if (mtk_wed_is_v1(dev->hw)) - ctrl = FIELD_PREP(MTK_WDMA_DESC_CTRL_LEN0, txd_size) | - FIELD_PREP(MTK_WDMA_DESC_CTRL_LEN1, - MTK_WED_BUF_SIZE - txd_size) | - MTK_WDMA_DESC_CTRL_LAST_SEG1; - else - ctrl = FIELD_PREP(MTK_WDMA_DESC_CTRL_LEN0, txd_size) | - FIELD_PREP(MTK_WDMA_DESC_CTRL_LEN1_V2, - MTK_WED_BUF_SIZE - txd_size) | - MTK_WDMA_DESC_CTRL_LAST_SEG0; - desc->ctrl = cpu_to_le32(ctrl); - desc->info = 0; - desc++; - + if (!mtk_wed_is_v3_or_greater(dev->hw)) { + u32 txd_size, ctrl; + + txd_size = dev->wlan.init_buf(buf, buf_phys, + token++); + desc->buf1 = cpu_to_le32(buf_phys + txd_size); + ctrl = FIELD_PREP(MTK_WDMA_DESC_CTRL_LEN0, txd_size); + if (mtk_wed_is_v1(dev->hw)) + ctrl |= MTK_WDMA_DESC_CTRL_LAST_SEG1 | + FIELD_PREP(MTK_WDMA_DESC_CTRL_LEN1, + MTK_WED_BUF_SIZE - txd_size); + else + ctrl |= MTK_WDMA_DESC_CTRL_LAST_SEG0 | + FIELD_PREP(MTK_WDMA_DESC_CTRL_LEN1_V2, + MTK_WED_BUF_SIZE - txd_size); + desc->ctrl = cpu_to_le32(ctrl); + desc->info = 0; + } else { + desc->ctrl = cpu_to_le32(token << 16); + } + + desc_ptr += desc_size; buf += MTK_WED_BUF_SIZE; buf_phys += MTK_WED_BUF_SIZE; } @@ -409,31 +435,31 @@ static void mtk_wed_free_tx_buffer(struct mtk_wed_device *dev) { struct mtk_wed_buf *page_list = dev->tx_buf_ring.pages; - struct mtk_wdma_desc *desc = dev->tx_buf_ring.desc; - int page_idx; - int i; + struct mtk_wed_hw *hw = dev->hw; + int i, page_idx = 0; if (!page_list) return; - if (!desc) + if (!dev->tx_buf_ring.desc) goto free_pagelist; - for (i = 0, page_idx = 0; i < dev->tx_buf_ring.size; - i += MTK_WED_BUF_PER_PAGE) { - dma_addr_t buf_addr = page_list[page_idx].phy_addr; + for (i = 0; i < dev->tx_buf_ring.size; i += MTK_WED_BUF_PER_PAGE) { + dma_addr_t page_phy = page_list[page_idx].phy_addr; void *page = page_list[page_idx++].p; if (!page) break; - dma_unmap_page(dev->hw->dev, buf_addr, PAGE_SIZE, + dma_unmap_page(dev->hw->dev, page_phy, PAGE_SIZE, DMA_BIDIRECTIONAL); __free_page(page); } - dma_free_coherent(dev->hw->dev, dev->tx_buf_ring.size * sizeof(*desc), - desc, dev->tx_buf_ring.desc_phys); + dma_free_coherent(dev->hw->dev, + dev->tx_buf_ring.size * hw->soc->tx_ring_desc_size, + dev->tx_buf_ring.desc, + dev->tx_buf_ring.desc_phys); free_pagelist: kfree(page_list); @@ -518,13 +544,23 @@ mtk_wed_set_ext_int(struct mtk_wed_device *dev, bool en) { u32 mask = MTK_WED_EXT_INT_STATUS_ERROR_MASK; - if (mtk_wed_is_v1(dev->hw)) + switch (dev->hw->version) { + case 1: mask |= MTK_WED_EXT_INT_STATUS_TX_DRV_R_RESP_ERR; - else + break; + case 2: mask |= MTK_WED_EXT_INT_STATUS_RX_FBUF_LO_TH | MTK_WED_EXT_INT_STATUS_RX_FBUF_HI_TH | MTK_WED_EXT_INT_STATUS_RX_DRV_COHERENT | MTK_WED_EXT_INT_STATUS_TX_DMA_W_RESP_ERR; + break; + case 3: + mask = MTK_WED_EXT_INT_STATUS_RX_DRV_COHERENT | + MTK_WED_EXT_INT_STATUS_TKID_WO_PYLD; + break; + default: + break; + } if (!dev->hw->num_flows) mask &= ~MTK_WED_EXT_INT_STATUS_TKID_WO_PYLD; @@ -536,6 +572,9 @@ mtk_wed_set_ext_int(struct mtk_wed_device *dev, bool en) static void mtk_wed_set_512_support(struct mtk_wed_device *dev, bool enable) { + if (!mtk_wed_is_v2(dev->hw)) + return; + if (enable) { wed_w32(dev, MTK_WED_TXDP_CTRL, MTK_WED_TXDP_DW9_OVERWR); wed_w32(dev, MTK_WED_TXP_DW1, @@ -610,6 +649,14 @@ mtk_wed_dma_disable(struct mtk_wed_device *dev) MTK_WED_WPDMA_RX_D_RX_DRV_EN); wed_clr(dev, MTK_WED_WDMA_GLO_CFG, MTK_WED_WDMA_GLO_CFG_TX_DDONE_CHK); + + if (mtk_wed_is_v3_or_greater(dev->hw) && + mtk_wed_get_rx_capa(dev)) { + wdma_clr(dev, MTK_WDMA_PREF_TX_CFG, + MTK_WDMA_PREF_TX_CFG_PREF_EN); + wdma_clr(dev, MTK_WDMA_PREF_RX_CFG, + MTK_WDMA_PREF_RX_CFG_PREF_EN); + } } mtk_wed_set_512_support(dev, false); @@ -652,6 +699,14 @@ mtk_wed_deinit(struct mtk_wed_device *dev) MTK_WED_CTRL_RX_ROUTE_QM_EN | MTK_WED_CTRL_WED_RX_BM_EN | MTK_WED_CTRL_RX_RRO_QM_EN); + + if (mtk_wed_is_v3_or_greater(dev->hw)) { + wed_clr(dev, MTK_WED_CTRL, MTK_WED_CTRL_TX_AMSDU_EN); + wed_clr(dev, MTK_WED_RESET, MTK_WED_RESET_TX_AMSDU); + wed_clr(dev, MTK_WED_PCIE_INT_CTRL, + MTK_WED_PCIE_INT_CTRL_MSK_EN_POLA | + MTK_WED_PCIE_INT_CTRL_MSK_IRQ_FILTER); + } } static void @@ -701,21 +756,37 @@ mtk_wed_detach(struct mtk_wed_device *dev) mutex_unlock(&hw_lock); } -#define PCIE_BASE_ADDR0 0x11280000 static void mtk_wed_bus_init(struct mtk_wed_device *dev) { switch (dev->wlan.bus_type) { case MTK_WED_BUS_PCIE: { struct device_node *np = dev->hw->eth->dev->of_node; - struct regmap *regs; - regs = syscon_regmap_lookup_by_phandle(np, - "mediatek,wed-pcie"); - if (IS_ERR(regs)) - break; + if (mtk_wed_is_v2(dev->hw)) { + struct regmap *regs; + + regs = syscon_regmap_lookup_by_phandle(np, + "mediatek,wed-pcie"); + if (IS_ERR(regs)) + break; - regmap_update_bits(regs, 0, BIT(0), BIT(0)); + regmap_update_bits(regs, 0, BIT(0), BIT(0)); + } + + if (dev->wlan.msi) { + wed_w32(dev, MTK_WED_PCIE_CFG_INTM, + dev->hw->pcie_base | 0xc08); + wed_w32(dev, MTK_WED_PCIE_CFG_BASE, + dev->hw->pcie_base | 0xc04); + wed_w32(dev, MTK_WED_PCIE_INT_TRIGGER, BIT(8)); + } else { + wed_w32(dev, MTK_WED_PCIE_CFG_INTM, + dev->hw->pcie_base | 0x180); + wed_w32(dev, MTK_WED_PCIE_CFG_BASE, + dev->hw->pcie_base | 0x184); + wed_w32(dev, MTK_WED_PCIE_INT_TRIGGER, BIT(24)); + } wed_w32(dev, MTK_WED_PCIE_INT_CTRL, FIELD_PREP(MTK_WED_PCIE_INT_CTRL_POLL_EN, 2)); @@ -723,19 +794,9 @@ mtk_wed_bus_init(struct mtk_wed_device *dev) /* pcie interrupt control: pola/source selection */ wed_set(dev, MTK_WED_PCIE_INT_CTRL, MTK_WED_PCIE_INT_CTRL_MSK_EN_POLA | - FIELD_PREP(MTK_WED_PCIE_INT_CTRL_SRC_SEL, 1)); - wed_r32(dev, MTK_WED_PCIE_INT_CTRL); - - wed_w32(dev, MTK_WED_PCIE_CFG_INTM, PCIE_BASE_ADDR0 | 0x180); - wed_w32(dev, MTK_WED_PCIE_CFG_BASE, PCIE_BASE_ADDR0 | 0x184); - - /* pcie interrupt status trigger register */ - wed_w32(dev, MTK_WED_PCIE_INT_TRIGGER, BIT(24)); - wed_r32(dev, MTK_WED_PCIE_INT_TRIGGER); - - /* pola setting */ - wed_set(dev, MTK_WED_PCIE_INT_CTRL, - MTK_WED_PCIE_INT_CTRL_MSK_EN_POLA); + MTK_WED_PCIE_INT_CTRL_MSK_IRQ_FILTER | + FIELD_PREP(MTK_WED_PCIE_INT_CTRL_SRC_SEL, + dev->hw->index)); break; } case MTK_WED_BUS_AXI: @@ -773,18 +834,19 @@ mtk_wed_set_wpdma(struct mtk_wed_device *dev) static void mtk_wed_hw_init_early(struct mtk_wed_device *dev) { - u32 mask, set; + u32 set = FIELD_PREP(MTK_WED_WDMA_GLO_CFG_BT_SIZE, 2); + u32 mask = MTK_WED_WDMA_GLO_CFG_BT_SIZE; mtk_wed_deinit(dev); mtk_wed_reset(dev, MTK_WED_RESET_WED); mtk_wed_set_wpdma(dev); - mask = MTK_WED_WDMA_GLO_CFG_BT_SIZE | - MTK_WED_WDMA_GLO_CFG_DYNAMIC_DMAD_RECYCLE | - MTK_WED_WDMA_GLO_CFG_RX_DIS_FSM_AUTO_IDLE; - set = FIELD_PREP(MTK_WED_WDMA_GLO_CFG_BT_SIZE, 2) | - MTK_WED_WDMA_GLO_CFG_DYNAMIC_SKIP_DMAD_PREP | - MTK_WED_WDMA_GLO_CFG_IDLE_DMAD_SUPPLY; + if (!mtk_wed_is_v3_or_greater(dev->hw)) { + mask |= MTK_WED_WDMA_GLO_CFG_DYNAMIC_DMAD_RECYCLE | + MTK_WED_WDMA_GLO_CFG_RX_DIS_FSM_AUTO_IDLE; + set |= MTK_WED_WDMA_GLO_CFG_DYNAMIC_SKIP_DMAD_PREP | + MTK_WED_WDMA_GLO_CFG_IDLE_DMAD_SUPPLY; + } wed_m32(dev, MTK_WED_WDMA_GLO_CFG, mask, set); if (mtk_wed_is_v1(dev->hw)) { @@ -932,11 +994,18 @@ mtk_wed_route_qm_hw_init(struct mtk_wed_device *dev) } /* configure RX_ROUTE_QM */ - wed_clr(dev, MTK_WED_RTQM_GLO_CFG, MTK_WED_RTQM_Q_RST); - wed_clr(dev, MTK_WED_RTQM_GLO_CFG, MTK_WED_RTQM_TXDMAD_FPORT); - wed_set(dev, MTK_WED_RTQM_GLO_CFG, - FIELD_PREP(MTK_WED_RTQM_TXDMAD_FPORT, 0x3 + dev->hw->index)); - wed_clr(dev, MTK_WED_RTQM_GLO_CFG, MTK_WED_RTQM_Q_RST); + if (mtk_wed_is_v2(dev->hw)) { + wed_clr(dev, MTK_WED_RTQM_GLO_CFG, MTK_WED_RTQM_Q_RST); + wed_clr(dev, MTK_WED_RTQM_GLO_CFG, MTK_WED_RTQM_TXDMAD_FPORT); + wed_set(dev, MTK_WED_RTQM_GLO_CFG, + FIELD_PREP(MTK_WED_RTQM_TXDMAD_FPORT, + 0x3 + dev->hw->index)); + wed_clr(dev, MTK_WED_RTQM_GLO_CFG, MTK_WED_RTQM_Q_RST); + } else { + wed_set(dev, MTK_WED_RTQM_ENQ_CFG0, + FIELD_PREP(MTK_WED_RTQM_ENQ_CFG_TXDMAD_FPORT, + 0x3 + dev->hw->index)); + } /* enable RX_ROUTE_QM */ wed_set(dev, MTK_WED_CTRL, MTK_WED_CTRL_RX_ROUTE_QM_EN); } @@ -949,22 +1018,30 @@ mtk_wed_hw_init(struct mtk_wed_device *dev) dev->init_done = true; mtk_wed_set_ext_int(dev, false); - wed_w32(dev, MTK_WED_TX_BM_CTRL, - MTK_WED_TX_BM_CTRL_PAUSE | - FIELD_PREP(MTK_WED_TX_BM_CTRL_VLD_GRP_NUM, - dev->tx_buf_ring.size / 128) | - FIELD_PREP(MTK_WED_TX_BM_CTRL_RSV_GRP_NUM, - MTK_WED_TX_RING_SIZE / 256)); wed_w32(dev, MTK_WED_TX_BM_BASE, dev->tx_buf_ring.desc_phys); - wed_w32(dev, MTK_WED_TX_BM_BUF_LEN, MTK_WED_PKT_SIZE); if (mtk_wed_is_v1(dev->hw)) { + wed_w32(dev, MTK_WED_TX_BM_CTRL, + MTK_WED_TX_BM_CTRL_PAUSE | + FIELD_PREP(MTK_WED_TX_BM_CTRL_VLD_GRP_NUM, + dev->tx_buf_ring.size / 128) | + FIELD_PREP(MTK_WED_TX_BM_CTRL_RSV_GRP_NUM, + MTK_WED_TX_RING_SIZE / 256)); wed_w32(dev, MTK_WED_TX_BM_DYN_THR, FIELD_PREP(MTK_WED_TX_BM_DYN_THR_LO, 1) | MTK_WED_TX_BM_DYN_THR_HI); - } else { + } else if (mtk_wed_is_v2(dev->hw)) { + wed_w32(dev, MTK_WED_TX_BM_CTRL, + MTK_WED_TX_BM_CTRL_PAUSE | + FIELD_PREP(MTK_WED_TX_BM_CTRL_VLD_GRP_NUM, + dev->tx_buf_ring.size / 128) | + FIELD_PREP(MTK_WED_TX_BM_CTRL_RSV_GRP_NUM, + MTK_WED_TX_RING_SIZE / 256)); + wed_w32(dev, MTK_WED_TX_TKID_DYN_THR, + FIELD_PREP(MTK_WED_TX_TKID_DYN_THR_LO, 0) | + MTK_WED_TX_TKID_DYN_THR_HI); wed_w32(dev, MTK_WED_TX_BM_DYN_THR, FIELD_PREP(MTK_WED_TX_BM_DYN_THR_LO_V2, 0) | MTK_WED_TX_BM_DYN_THR_HI_V2); @@ -974,9 +1051,6 @@ mtk_wed_hw_init(struct mtk_wed_device *dev) dev->tx_buf_ring.size / 128) | FIELD_PREP(MTK_WED_TX_TKID_CTRL_RSV_GRP_NUM, dev->tx_buf_ring.size / 128)); - wed_w32(dev, MTK_WED_TX_TKID_DYN_THR, - FIELD_PREP(MTK_WED_TX_TKID_DYN_THR_LO, 0) | - MTK_WED_TX_TKID_DYN_THR_HI); } wed_w32(dev, dev->hw->soc->regmap.tx_bm_tkid, @@ -986,26 +1060,62 @@ mtk_wed_hw_init(struct mtk_wed_device *dev) mtk_wed_reset(dev, MTK_WED_RESET_TX_BM); + if (mtk_wed_is_v3_or_greater(dev->hw)) { + /* switch to new bm architecture */ + wed_clr(dev, MTK_WED_TX_BM_CTRL, + MTK_WED_TX_BM_CTRL_LEGACY_EN); + + wed_w32(dev, MTK_WED_TX_TKID_CTRL, + MTK_WED_TX_TKID_CTRL_PAUSE | + FIELD_PREP(MTK_WED_TX_TKID_CTRL_VLD_GRP_NUM_V3, + dev->wlan.nbuf / 128) | + FIELD_PREP(MTK_WED_TX_TKID_CTRL_RSV_GRP_NUM_V3, + dev->wlan.nbuf / 128)); + /* return SKBID + SDP back to bm */ + wed_set(dev, MTK_WED_TX_TKID_CTRL, + MTK_WED_TX_TKID_CTRL_FREE_FORMAT); + + wed_w32(dev, MTK_WED_TX_BM_INIT_PTR, + MTK_WED_TX_BM_PKT_CNT | + MTK_WED_TX_BM_INIT_SW_TAIL_IDX); + } + if (mtk_wed_is_v1(dev->hw)) { wed_set(dev, MTK_WED_CTRL, MTK_WED_CTRL_WED_TX_BM_EN | MTK_WED_CTRL_WED_TX_FREE_AGENT_EN); - } else { - wed_clr(dev, MTK_WED_TX_TKID_CTRL, MTK_WED_TX_TKID_CTRL_PAUSE); - if (mtk_wed_get_rx_capa(dev)) { - /* rx hw init */ - wed_w32(dev, MTK_WED_WPDMA_RX_D_RST_IDX, - MTK_WED_WPDMA_RX_D_RST_CRX_IDX | - MTK_WED_WPDMA_RX_D_RST_DRV_IDX); - wed_w32(dev, MTK_WED_WPDMA_RX_D_RST_IDX, 0); - - mtk_wed_rx_buffer_hw_init(dev); - mtk_wed_rro_hw_init(dev); - mtk_wed_route_qm_hw_init(dev); - } + } else if (mtk_wed_get_rx_capa(dev)) { + /* rx hw init */ + wed_w32(dev, MTK_WED_WPDMA_RX_D_RST_IDX, + MTK_WED_WPDMA_RX_D_RST_CRX_IDX | + MTK_WED_WPDMA_RX_D_RST_DRV_IDX); + wed_w32(dev, MTK_WED_WPDMA_RX_D_RST_IDX, 0); + + /* reset prefetch index of ring */ + wed_set(dev, MTK_WED_WPDMA_RX_D_PREF_RX0_SIDX, + MTK_WED_WPDMA_RX_D_PREF_SIDX_IDX_CLR); + wed_clr(dev, MTK_WED_WPDMA_RX_D_PREF_RX0_SIDX, + MTK_WED_WPDMA_RX_D_PREF_SIDX_IDX_CLR); + + wed_set(dev, MTK_WED_WPDMA_RX_D_PREF_RX1_SIDX, + MTK_WED_WPDMA_RX_D_PREF_SIDX_IDX_CLR); + wed_clr(dev, MTK_WED_WPDMA_RX_D_PREF_RX1_SIDX, + MTK_WED_WPDMA_RX_D_PREF_SIDX_IDX_CLR); + + /* reset prefetch FIFO of ring */ + wed_set(dev, MTK_WED_WPDMA_RX_D_PREF_FIFO_CFG, + MTK_WED_WPDMA_RX_D_PREF_FIFO_CFG_R0_CLR | + MTK_WED_WPDMA_RX_D_PREF_FIFO_CFG_R1_CLR); + wed_w32(dev, MTK_WED_WPDMA_RX_D_PREF_FIFO_CFG, 0); + + mtk_wed_rx_buffer_hw_init(dev); + mtk_wed_rro_hw_init(dev); + mtk_wed_route_qm_hw_init(dev); } wed_clr(dev, MTK_WED_TX_BM_CTRL, MTK_WED_TX_BM_CTRL_PAUSE); + if (!mtk_wed_is_v1(dev->hw)) + wed_clr(dev, MTK_WED_TX_TKID_CTRL, MTK_WED_TX_TKID_CTRL_PAUSE); } static void @@ -1303,6 +1413,24 @@ mtk_wed_wdma_tx_ring_setup(struct mtk_wed_device *dev, int idx, int size, dev->hw->soc->wdma_desc_size, true)) return -ENOMEM; + if (mtk_wed_is_v3_or_greater(dev->hw)) { + struct mtk_wdma_desc *desc = wdma->desc; + int i; + + for (i = 0; i < MTK_WED_WDMA_RING_SIZE; i++) { + desc->buf0 = 0; + desc->ctrl = cpu_to_le32(MTK_WDMA_DESC_CTRL_DMA_DONE); + desc->buf1 = 0; + desc->info = cpu_to_le32(MTK_WDMA_TXD0_DESC_INFO_DMA_DONE); + desc++; + desc->buf0 = 0; + desc->ctrl = cpu_to_le32(MTK_WDMA_DESC_CTRL_DMA_DONE); + desc->buf1 = 0; + desc->info = cpu_to_le32(MTK_WDMA_TXD1_DESC_INFO_DMA_DONE); + desc++; + } + } + wdma_w32(dev, MTK_WDMA_RING_TX(idx) + MTK_WED_RING_OFS_BASE, wdma->desc_phys); wdma_w32(dev, MTK_WDMA_RING_TX(idx) + MTK_WED_RING_OFS_COUNT, @@ -1368,6 +1496,9 @@ mtk_wed_configure_irq(struct mtk_wed_device *dev, u32 irq_mask) wed_clr(dev, MTK_WED_WDMA_INT_CTRL, wdma_mask); } else { + if (mtk_wed_is_v3_or_greater(dev->hw)) + wed_set(dev, MTK_WED_CTRL, MTK_WED_CTRL_TX_TKID_ALI_EN); + /* initail tx interrupt trigger */ wed_w32(dev, MTK_WED_WPDMA_INT_CTRL_TX, MTK_WED_WPDMA_INT_CTRL_TX0_DONE_EN | @@ -1420,33 +1551,60 @@ mtk_wed_dma_enable(struct mtk_wed_device *dev) { int i; - wed_set(dev, MTK_WED_WPDMA_INT_CTRL, MTK_WED_WPDMA_INT_CTRL_SUBRT_ADV); + if (!mtk_wed_is_v3_or_greater(dev->hw)) { + wed_set(dev, MTK_WED_WPDMA_INT_CTRL, + MTK_WED_WPDMA_INT_CTRL_SUBRT_ADV); + wed_set(dev, MTK_WED_WPDMA_GLO_CFG, + MTK_WED_WPDMA_GLO_CFG_TX_DRV_EN | + MTK_WED_WPDMA_GLO_CFG_RX_DRV_EN); + wdma_set(dev, MTK_WDMA_GLO_CFG, + MTK_WDMA_GLO_CFG_TX_DMA_EN | + MTK_WDMA_GLO_CFG_RX_INFO1_PRERES | + MTK_WDMA_GLO_CFG_RX_INFO2_PRERES); + wed_set(dev, MTK_WED_WPDMA_CTRL, MTK_WED_WPDMA_CTRL_SDL1_FIXED); + } else { + wed_set(dev, MTK_WED_WPDMA_GLO_CFG, + MTK_WED_WPDMA_GLO_CFG_TX_DRV_EN | + MTK_WED_WPDMA_GLO_CFG_RX_DRV_EN | + MTK_WED_WPDMA_GLO_CFG_RX_DDONE2_WR); + wdma_set(dev, MTK_WDMA_GLO_CFG, MTK_WDMA_GLO_CFG_TX_DMA_EN); + } wed_set(dev, MTK_WED_GLO_CFG, MTK_WED_GLO_CFG_TX_DMA_EN | MTK_WED_GLO_CFG_RX_DMA_EN); - wed_set(dev, MTK_WED_WPDMA_GLO_CFG, - MTK_WED_WPDMA_GLO_CFG_TX_DRV_EN | - MTK_WED_WPDMA_GLO_CFG_RX_DRV_EN); + wed_set(dev, MTK_WED_WDMA_GLO_CFG, MTK_WED_WDMA_GLO_CFG_RX_DRV_EN); - wdma_set(dev, MTK_WDMA_GLO_CFG, - MTK_WDMA_GLO_CFG_TX_DMA_EN | - MTK_WDMA_GLO_CFG_RX_INFO1_PRERES | - MTK_WDMA_GLO_CFG_RX_INFO2_PRERES); - if (mtk_wed_is_v1(dev->hw)) { wdma_set(dev, MTK_WDMA_GLO_CFG, MTK_WDMA_GLO_CFG_RX_INFO3_PRERES); return; } - wed_set(dev, MTK_WED_WPDMA_CTRL, - MTK_WED_WPDMA_CTRL_SDL1_FIXED); wed_set(dev, MTK_WED_WPDMA_GLO_CFG, MTK_WED_WPDMA_GLO_CFG_RX_DRV_R0_PKT_PROC | MTK_WED_WPDMA_GLO_CFG_RX_DRV_R0_CRX_SYNC); + + if (mtk_wed_is_v3_or_greater(dev->hw)) { + wed_set(dev, MTK_WED_WDMA_RX_PREF_CFG, + FIELD_PREP(MTK_WED_WDMA_RX_PREF_BURST_SIZE, 0x10) | + FIELD_PREP(MTK_WED_WDMA_RX_PREF_LOW_THRES, 0x8)); + wed_clr(dev, MTK_WED_WDMA_RX_PREF_CFG, + MTK_WED_WDMA_RX_PREF_DDONE2_EN); + wed_set(dev, MTK_WED_WDMA_RX_PREF_CFG, MTK_WED_WDMA_RX_PREF_EN); + + wed_clr(dev, MTK_WED_WPDMA_GLO_CFG, + MTK_WED_WPDMA_GLO_CFG_TX_DDONE_CHK_LAST); + wed_set(dev, MTK_WED_WPDMA_GLO_CFG, + MTK_WED_WPDMA_GLO_CFG_TX_DDONE_CHK | + MTK_WED_WPDMA_GLO_CFG_RX_DRV_EVENT_PKT_FMT_CHK | + MTK_WED_WPDMA_GLO_CFG_RX_DRV_UNS_VER_FORCE_4); + + wdma_set(dev, MTK_WDMA_PREF_RX_CFG, MTK_WDMA_PREF_RX_CFG_PREF_EN); + } + wed_clr(dev, MTK_WED_WPDMA_GLO_CFG, MTK_WED_WPDMA_GLO_CFG_TX_TKID_KEEP | MTK_WED_WPDMA_GLO_CFG_TX_DMAD_DW3_PREV); @@ -1458,11 +1616,22 @@ mtk_wed_dma_enable(struct mtk_wed_device *dev) MTK_WED_WDMA_GLO_CFG_TX_DRV_EN | MTK_WED_WDMA_GLO_CFG_TX_DDONE_CHK); + wed_clr(dev, MTK_WED_WPDMA_RX_D_GLO_CFG, MTK_WED_WPDMA_RX_D_RXD_READ_LEN); wed_set(dev, MTK_WED_WPDMA_RX_D_GLO_CFG, MTK_WED_WPDMA_RX_D_RX_DRV_EN | FIELD_PREP(MTK_WED_WPDMA_RX_D_RXD_READ_LEN, 0x18) | - FIELD_PREP(MTK_WED_WPDMA_RX_D_INIT_PHASE_RXEN_SEL, - 0x2)); + FIELD_PREP(MTK_WED_WPDMA_RX_D_INIT_PHASE_RXEN_SEL, 0x2)); + + if (mtk_wed_is_v3_or_greater(dev->hw)) { + wed_set(dev, MTK_WED_WPDMA_RX_D_PREF_CFG, + MTK_WED_WPDMA_RX_D_PREF_EN | + FIELD_PREP(MTK_WED_WPDMA_RX_D_PREF_BURST_SIZE, 0x10) | + FIELD_PREP(MTK_WED_WPDMA_RX_D_PREF_LOW_THRES, 0x8)); + + wed_set(dev, MTK_WED_RRO_RX_D_CFG(2), MTK_WED_RRO_RX_D_DRV_EN); + wdma_set(dev, MTK_WDMA_PREF_TX_CFG, MTK_WDMA_PREF_TX_CFG_PREF_EN); + wdma_set(dev, MTK_WDMA_WRBK_TX_CFG, MTK_WDMA_WRBK_TX_CFG_WRBK_EN); + } for (i = 0; i < MTK_WED_RX_QUEUES; i++) mtk_wed_check_wfdma_rx_fill(dev, i); @@ -1502,6 +1671,12 @@ mtk_wed_start(struct mtk_wed_device *dev, u32 irq_mask) wed_r32(dev, MTK_WED_EXT_INT_MASK1); wed_r32(dev, MTK_WED_EXT_INT_MASK2); + if (mtk_wed_is_v3_or_greater(dev->hw)) { + wed_w32(dev, MTK_WED_EXT_INT_MASK3, + MTK_WED_EXT_INT_STATUS_WPDMA_MID_RDY); + wed_r32(dev, MTK_WED_EXT_INT_MASK3); + } + if (mtk_wed_rro_cfg(dev)) return; } @@ -1553,6 +1728,7 @@ mtk_wed_attach(struct mtk_wed_device *dev) dev->irq = hw->irq; dev->wdma_idx = hw->index; dev->version = hw->version; + dev->hw->pcie_base = mtk_wed_get_pcie_base(dev); if (hw->eth->dma_dev == hw->eth->dev && of_dma_is_coherent(hw->eth->dev->of_node)) @@ -1620,6 +1796,23 @@ mtk_wed_tx_ring_setup(struct mtk_wed_device *dev, int idx, void __iomem *regs, ring->reg_base = MTK_WED_RING_TX(idx); ring->wpdma = regs; + if (mtk_wed_is_v3_or_greater(dev->hw) && idx == 1) { + /* reset prefetch index */ + wed_set(dev, MTK_WED_WDMA_RX_PREF_CFG, + MTK_WED_WDMA_RX_PREF_RX0_SIDX_CLR | + MTK_WED_WDMA_RX_PREF_RX1_SIDX_CLR); + + wed_clr(dev, MTK_WED_WDMA_RX_PREF_CFG, + MTK_WED_WDMA_RX_PREF_RX0_SIDX_CLR | + MTK_WED_WDMA_RX_PREF_RX1_SIDX_CLR); + + /* reset prefetch FIFO */ + wed_w32(dev, MTK_WED_WDMA_RX_PREF_FIFO_CFG, + MTK_WED_WDMA_RX_PREF_FIFO_RX0_CLR | + MTK_WED_WDMA_RX_PREF_FIFO_RX1_CLR); + wed_w32(dev, MTK_WED_WDMA_RX_PREF_FIFO_CFG, 0); + } + /* WED -> WPDMA */ wpdma_tx_w32(dev, idx, MTK_WED_RING_OFS_BASE, ring->desc_phys); wpdma_tx_w32(dev, idx, MTK_WED_RING_OFS_COUNT, MTK_WED_TX_RING_SIZE); @@ -1694,15 +1887,13 @@ mtk_wed_rx_ring_setup(struct mtk_wed_device *dev, int idx, void __iomem *regs, static u32 mtk_wed_irq_get(struct mtk_wed_device *dev, u32 mask) { - u32 val, ext_mask = MTK_WED_EXT_INT_STATUS_ERROR_MASK; + u32 val, ext_mask; - if (mtk_wed_is_v1(dev->hw)) - ext_mask |= MTK_WED_EXT_INT_STATUS_TX_DRV_R_RESP_ERR; + if (mtk_wed_is_v3_or_greater(dev->hw)) + ext_mask = MTK_WED_EXT_INT_STATUS_RX_DRV_COHERENT | + MTK_WED_EXT_INT_STATUS_TKID_WO_PYLD; else - ext_mask |= MTK_WED_EXT_INT_STATUS_RX_FBUF_LO_TH | - MTK_WED_EXT_INT_STATUS_RX_FBUF_HI_TH | - MTK_WED_EXT_INT_STATUS_RX_DRV_COHERENT | - MTK_WED_EXT_INT_STATUS_TX_DMA_W_RESP_ERR; + ext_mask = MTK_WED_EXT_INT_STATUS_ERROR_MASK; val = wed_r32(dev, MTK_WED_EXT_INT_STATUS); wed_w32(dev, MTK_WED_EXT_INT_STATUS, val); @@ -1943,6 +2134,9 @@ void mtk_wed_add_hw(struct device_node *np, struct mtk_eth *eth, case 2: hw->soc = &mt7986_data; break; + case 3: + hw->soc = &mt7988_data; + break; default: case 1: hw->mirror = syscon_regmap_lookup_by_phandle(eth_np, diff --git a/drivers/net/ethernet/mediatek/mtk_wed.h b/drivers/net/ethernet/mediatek/mtk_wed.h index afaf5a46fbb3..27d336db4d4d 100644 --- a/drivers/net/ethernet/mediatek/mtk_wed.h +++ b/drivers/net/ethernet/mediatek/mtk_wed.h @@ -9,6 +9,8 @@ #include #include +#include "mtk_wed_regs.h" + struct mtk_eth; struct mtk_wed_wo; @@ -19,6 +21,7 @@ struct mtk_wed_soc_data { u32 reset_idx_tx_mask; u32 reset_idx_rx_mask; } regmap; + u32 tx_ring_desc_size; u32 wdma_desc_size; }; @@ -35,6 +38,7 @@ struct mtk_wed_hw { struct dentry *debugfs_dir; struct mtk_wed_device *wed_dev; struct mtk_wed_wo *wed_wo; + u32 pcie_base; u32 debugfs_reg; u32 num_flows; u8 version; @@ -61,6 +65,16 @@ static inline bool mtk_wed_is_v2(struct mtk_wed_hw *hw) return hw->version == 2; } +static inline bool mtk_wed_is_v3(struct mtk_wed_hw *hw) +{ + return hw->version == 3; +} + +static inline bool mtk_wed_is_v3_or_greater(struct mtk_wed_hw *hw) +{ + return hw->version > 2; +} + static inline void wed_w32(struct mtk_wed_device *dev, u32 reg, u32 val) { @@ -143,6 +157,21 @@ wpdma_txfree_w32(struct mtk_wed_device *dev, u32 reg, u32 val) writel(val, dev->txfree_ring.wpdma + reg); } +static inline u32 mtk_wed_get_pcie_base(struct mtk_wed_device *dev) +{ + if (!mtk_wed_is_v3_or_greater(dev->hw)) + return MTK_WED_PCIE_BASE; + + switch (dev->hw->index) { + case 1: + return MTK_WED_PCIE_BASE1; + case 2: + return MTK_WED_PCIE_BASE2; + default: + return MTK_WED_PCIE_BASE0; + } +} + void mtk_wed_add_hw(struct device_node *np, struct mtk_eth *eth, void __iomem *wdma, phys_addr_t wdma_phy, int index); diff --git a/drivers/net/ethernet/mediatek/mtk_wed_mcu.c b/drivers/net/ethernet/mediatek/mtk_wed_mcu.c index e53531252bd9..65a78e274009 100644 --- a/drivers/net/ethernet/mediatek/mtk_wed_mcu.c +++ b/drivers/net/ethernet/mediatek/mtk_wed_mcu.c @@ -331,10 +331,22 @@ mtk_wed_mcu_load_firmware(struct mtk_wed_wo *wo) wo->hw->index + 1); /* load firmware */ - if (of_device_is_compatible(wo->hw->node, "mediatek,mt7981-wed")) - fw_name = MT7981_FIRMWARE_WO; - else - fw_name = wo->hw->index ? MT7986_FIRMWARE_WO1 : MT7986_FIRMWARE_WO0; + switch (wo->hw->version) { + case 2: + if (of_device_is_compatible(wo->hw->node, + "mediatek,mt7981-wed")) + fw_name = MT7981_FIRMWARE_WO; + else + fw_name = wo->hw->index ? MT7986_FIRMWARE_WO1 + : MT7986_FIRMWARE_WO0; + break; + case 3: + fw_name = wo->hw->index ? MT7988_FIRMWARE_WO1 + : MT7988_FIRMWARE_WO0; + break; + default: + return -EINVAL; + } ret = request_firmware(&fw, fw_name, wo->hw->dev); if (ret) @@ -355,15 +367,16 @@ mtk_wed_mcu_load_firmware(struct mtk_wed_wo *wo) } /* set the start address */ - boot_cr = wo->hw->index ? MTK_WO_MCU_CFG_LS_WA_BOOT_ADDR_ADDR - : MTK_WO_MCU_CFG_LS_WM_BOOT_ADDR_ADDR; + if (!mtk_wed_is_v3_or_greater(wo->hw) && wo->hw->index) + boot_cr = MTK_WO_MCU_CFG_LS_WA_BOOT_ADDR_ADDR; + else + boot_cr = MTK_WO_MCU_CFG_LS_WM_BOOT_ADDR_ADDR; wo_w32(wo, boot_cr, mem_region[MTK_WED_WO_REGION_EMI].phy_addr >> 16); /* wo firmware reset */ wo_w32(wo, MTK_WO_MCU_CFG_LS_WF_MCCR_CLR_ADDR, 0xc00); - val = wo_r32(wo, MTK_WO_MCU_CFG_LS_WF_MCU_CFG_WM_WA_ADDR); - val |= wo->hw->index ? MTK_WO_MCU_CFG_LS_WF_WM_WA_WA_CPU_RSTB_MASK - : MTK_WO_MCU_CFG_LS_WF_WM_WA_WM_CPU_RSTB_MASK; + val = wo_r32(wo, MTK_WO_MCU_CFG_LS_WF_MCU_CFG_WM_WA_ADDR) | + MTK_WO_MCU_CFG_LS_WF_WM_WA_WM_CPU_RSTB_MASK; wo_w32(wo, MTK_WO_MCU_CFG_LS_WF_MCU_CFG_WM_WA_ADDR, val); out: release_firmware(fw); @@ -398,3 +411,5 @@ int mtk_wed_mcu_init(struct mtk_wed_wo *wo) MODULE_FIRMWARE(MT7981_FIRMWARE_WO); MODULE_FIRMWARE(MT7986_FIRMWARE_WO0); MODULE_FIRMWARE(MT7986_FIRMWARE_WO1); +MODULE_FIRMWARE(MT7988_FIRMWARE_WO0); +MODULE_FIRMWARE(MT7988_FIRMWARE_WO1); diff --git a/drivers/net/ethernet/mediatek/mtk_wed_regs.h b/drivers/net/ethernet/mediatek/mtk_wed_regs.h index 2253f4eb5bc1..a4d3cf64d090 100644 --- a/drivers/net/ethernet/mediatek/mtk_wed_regs.h +++ b/drivers/net/ethernet/mediatek/mtk_wed_regs.h @@ -13,6 +13,9 @@ #define MTK_WDMA_DESC_CTRL_LAST_SEG0 BIT(30) #define MTK_WDMA_DESC_CTRL_DMA_DONE BIT(31) +#define MTK_WDMA_TXD0_DESC_INFO_DMA_DONE BIT(29) +#define MTK_WDMA_TXD1_DESC_INFO_DMA_DONE BIT(31) + struct mtk_wdma_desc { __le32 buf0; __le32 ctrl; @@ -37,6 +40,7 @@ struct mtk_wdma_desc { #define MTK_WED_RESET_WDMA_INT_AGENT BIT(19) #define MTK_WED_RESET_RX_RRO_QM BIT(20) #define MTK_WED_RESET_RX_ROUTE_QM BIT(21) +#define MTK_WED_RESET_TX_AMSDU BIT(22) #define MTK_WED_RESET_WED BIT(31) #define MTK_WED_CTRL 0x00c @@ -44,6 +48,9 @@ struct mtk_wdma_desc { #define MTK_WED_CTRL_WPDMA_INT_AGENT_BUSY BIT(1) #define MTK_WED_CTRL_WDMA_INT_AGENT_EN BIT(2) #define MTK_WED_CTRL_WDMA_INT_AGENT_BUSY BIT(3) +#define MTK_WED_CTRL_WED_RX_IND_CMD_EN BIT(5) +#define MTK_WED_CTRL_WED_RX_PG_BM_EN BIT(6) +#define MTK_WED_CTRL_WED_RX_PG_BM_BUSY BIT(7) #define MTK_WED_CTRL_WED_TX_BM_EN BIT(8) #define MTK_WED_CTRL_WED_TX_BM_BUSY BIT(9) #define MTK_WED_CTRL_WED_TX_FREE_AGENT_EN BIT(10) @@ -54,9 +61,14 @@ struct mtk_wdma_desc { #define MTK_WED_CTRL_RX_RRO_QM_BUSY BIT(15) #define MTK_WED_CTRL_RX_ROUTE_QM_EN BIT(16) #define MTK_WED_CTRL_RX_ROUTE_QM_BUSY BIT(17) +#define MTK_WED_CTRL_TX_TKID_ALI_EN BIT(20) +#define MTK_WED_CTRL_TX_TKID_ALI_BUSY BIT(21) +#define MTK_WED_CTRL_TX_AMSDU_EN BIT(22) +#define MTK_WED_CTRL_TX_AMSDU_BUSY BIT(23) #define MTK_WED_CTRL_FINAL_DIDX_READ BIT(24) #define MTK_WED_CTRL_ETH_DMAD_FMT BIT(25) #define MTK_WED_CTRL_MIB_READ_CLEAR BIT(28) +#define MTK_WED_CTRL_FLD_MIB_RD_CLR BIT(28) #define MTK_WED_EXT_INT_STATUS 0x020 #define MTK_WED_EXT_INT_STATUS_TF_LEN_ERR BIT(0) @@ -89,6 +101,7 @@ struct mtk_wdma_desc { #define MTK_WED_EXT_INT_MASK 0x028 #define MTK_WED_EXT_INT_MASK1 0x02c #define MTK_WED_EXT_INT_MASK2 0x030 +#define MTK_WED_EXT_INT_MASK3 0x034 #define MTK_WED_STATUS 0x060 #define MTK_WED_STATUS_TX GENMASK(15, 8) @@ -96,9 +109,14 @@ struct mtk_wdma_desc { #define MTK_WED_TX_BM_CTRL 0x080 #define MTK_WED_TX_BM_CTRL_VLD_GRP_NUM GENMASK(6, 0) #define MTK_WED_TX_BM_CTRL_RSV_GRP_NUM GENMASK(22, 16) +#define MTK_WED_TX_BM_CTRL_LEGACY_EN BIT(26) +#define MTK_WED_TX_TKID_CTRL_FREE_FORMAT BIT(27) #define MTK_WED_TX_BM_CTRL_PAUSE BIT(28) #define MTK_WED_TX_BM_BASE 0x084 +#define MTK_WED_TX_BM_INIT_PTR 0x088 +#define MTK_WED_TX_BM_SW_TAIL_IDX GENMASK(16, 0) +#define MTK_WED_TX_BM_INIT_SW_TAIL_IDX BIT(16) #define MTK_WED_TX_BM_TKID_START GENMASK(15, 0) #define MTK_WED_TX_BM_TKID_END GENMASK(31, 16) @@ -122,6 +140,9 @@ struct mtk_wdma_desc { #define MTK_WED_TX_TKID_CTRL_RSV_GRP_NUM GENMASK(22, 16) #define MTK_WED_TX_TKID_CTRL_PAUSE BIT(28) +#define MTK_WED_TX_TKID_CTRL_VLD_GRP_NUM_V3 GENMASK(7, 0) +#define MTK_WED_TX_TKID_CTRL_RSV_GRP_NUM_V3 GENMASK(23, 16) + #define MTK_WED_TX_TKID_DYN_THR 0x0e0 #define MTK_WED_TX_TKID_DYN_THR_LO GENMASK(6, 0) #define MTK_WED_TX_TKID_DYN_THR_HI GENMASK(22, 16) @@ -199,12 +220,15 @@ struct mtk_wdma_desc { #define MTK_WED_WPDMA_GLO_CFG_RX_DRV_R1_PKT_PROC BIT(5) #define MTK_WED_WPDMA_GLO_CFG_RX_DRV_R0_CRX_SYNC BIT(6) #define MTK_WED_WPDMA_GLO_CFG_RX_DRV_R1_CRX_SYNC BIT(7) -#define MTK_WED_WPDMA_GLO_CFG_RX_DRV_EVENT_PKT_FMT_VER GENMASK(18, 16) +#define MTK_WED_WPDMA_GLO_CFG_RX_DRV_EVENT_PKT_FMT_VER GENMASK(15, 12) +#define MTK_WED_WPDMA_GLO_CFG_RX_DRV_UNS_VER_FORCE_4 BIT(18) #define MTK_WED_WPDMA_GLO_CFG_RX_DRV_UNSUPPORT_FMT BIT(19) -#define MTK_WED_WPDMA_GLO_CFG_RX_DRV_UEVENT_PKT_FMT_CHK BIT(20) +#define MTK_WED_WPDMA_GLO_CFG_RX_DRV_EVENT_PKT_FMT_CHK BIT(20) #define MTK_WED_WPDMA_GLO_CFG_RX_DDONE2_WR BIT(21) #define MTK_WED_WPDMA_GLO_CFG_TX_TKID_KEEP BIT(24) +#define MTK_WED_WPDMA_GLO_CFG_TX_DDONE_CHK_LAST BIT(25) #define MTK_WED_WPDMA_GLO_CFG_TX_DMAD_DW3_PREV BIT(28) +#define MTK_WED_WPDMA_GLO_CFG_TX_DDONE_CHK BIT(30) #define MTK_WED_WPDMA_RESET_IDX 0x50c #define MTK_WED_WPDMA_RESET_IDX_TX GENMASK(3, 0) @@ -250,9 +274,10 @@ struct mtk_wdma_desc { #define MTK_WED_PCIE_INT_TRIGGER_STATUS BIT(16) #define MTK_WED_PCIE_INT_CTRL 0x57c -#define MTK_WED_PCIE_INT_CTRL_MSK_EN_POLA BIT(20) -#define MTK_WED_PCIE_INT_CTRL_SRC_SEL GENMASK(17, 16) #define MTK_WED_PCIE_INT_CTRL_POLL_EN GENMASK(13, 12) +#define MTK_WED_PCIE_INT_CTRL_SRC_SEL GENMASK(17, 16) +#define MTK_WED_PCIE_INT_CTRL_MSK_EN_POLA BIT(20) +#define MTK_WED_PCIE_INT_CTRL_MSK_IRQ_FILTER BIT(21) #define MTK_WED_WPDMA_CFG_BASE 0x580 #define MTK_WED_WPDMA_CFG_INT_MASK 0x584 @@ -286,6 +311,20 @@ struct mtk_wdma_desc { #define MTK_WED_WPDMA_RX_D_PROCESSED_MIB(_n) (0x784 + (_n) * 4) #define MTK_WED_WPDMA_RX_D_COHERENT_MIB 0x78c +#define MTK_WED_WPDMA_RX_D_PREF_CFG 0x7b4 +#define MTK_WED_WPDMA_RX_D_PREF_EN BIT(0) +#define MTK_WED_WPDMA_RX_D_PREF_BURST_SIZE GENMASK(12, 8) +#define MTK_WED_WPDMA_RX_D_PREF_LOW_THRES GENMASK(21, 16) + +#define MTK_WED_WPDMA_RX_D_PREF_RX0_SIDX 0x7b8 +#define MTK_WED_WPDMA_RX_D_PREF_SIDX_IDX_CLR BIT(15) + +#define MTK_WED_WPDMA_RX_D_PREF_RX1_SIDX 0x7bc + +#define MTK_WED_WPDMA_RX_D_PREF_FIFO_CFG 0x7c0 +#define MTK_WED_WPDMA_RX_D_PREF_FIFO_CFG_R0_CLR BIT(0) +#define MTK_WED_WPDMA_RX_D_PREF_FIFO_CFG_R1_CLR BIT(16) + #define MTK_WED_WDMA_RING_TX 0x800 #define MTK_WED_WDMA_TX_MIB 0x810 @@ -293,6 +332,18 @@ struct mtk_wdma_desc { #define MTK_WED_WDMA_RING_RX(_n) (0x900 + (_n) * 0x10) #define MTK_WED_WDMA_RX_THRES(_n) (0x940 + (_n) * 0x4) +#define MTK_WED_WDMA_RX_PREF_CFG 0x950 +#define MTK_WED_WDMA_RX_PREF_EN BIT(0) +#define MTK_WED_WDMA_RX_PREF_BURST_SIZE GENMASK(12, 8) +#define MTK_WED_WDMA_RX_PREF_LOW_THRES GENMASK(21, 16) +#define MTK_WED_WDMA_RX_PREF_RX0_SIDX_CLR BIT(24) +#define MTK_WED_WDMA_RX_PREF_RX1_SIDX_CLR BIT(25) +#define MTK_WED_WDMA_RX_PREF_DDONE2_EN BIT(26) + +#define MTK_WED_WDMA_RX_PREF_FIFO_CFG 0x95C +#define MTK_WED_WDMA_RX_PREF_FIFO_RX0_CLR BIT(0) +#define MTK_WED_WDMA_RX_PREF_FIFO_RX1_CLR BIT(16) + #define MTK_WED_WDMA_GLO_CFG 0xa04 #define MTK_WED_WDMA_GLO_CFG_TX_DRV_EN BIT(0) #define MTK_WED_WDMA_GLO_CFG_TX_DDONE_CHK BIT(1) @@ -325,6 +376,7 @@ struct mtk_wdma_desc { #define MTK_WED_WDMA_INT_TRIGGER_RX_DONE GENMASK(17, 16) #define MTK_WED_WDMA_INT_CTRL 0xa2c +#define MTK_WED_WDMA_INT_POLL_PRD GENMASK(7, 0) #define MTK_WED_WDMA_INT_CTRL_POLL_SRC_SEL GENMASK(17, 16) #define MTK_WED_WDMA_CFG_BASE 0xaa0 @@ -388,6 +440,18 @@ struct mtk_wdma_desc { #define MTK_WDMA_INT_GRP1 0x250 #define MTK_WDMA_INT_GRP2 0x254 +#define MTK_WDMA_PREF_TX_CFG 0x2d0 +#define MTK_WDMA_PREF_TX_CFG_PREF_EN BIT(0) + +#define MTK_WDMA_PREF_RX_CFG 0x2dc +#define MTK_WDMA_PREF_RX_CFG_PREF_EN BIT(0) + +#define MTK_WDMA_WRBK_TX_CFG 0x300 +#define MTK_WDMA_WRBK_TX_CFG_WRBK_EN BIT(30) + +#define MTK_WDMA_WRBK_RX_CFG 0x344 +#define MTK_WDMA_WRBK_RX_CFG_WRBK_EN BIT(30) + #define MTK_PCIE_MIRROR_MAP(n) ((n) ? 0x4 : 0x0) #define MTK_PCIE_MIRROR_MAP_EN BIT(0) #define MTK_PCIE_MIRROR_MAP_WED_ID BIT(1) @@ -401,6 +465,30 @@ struct mtk_wdma_desc { #define MTK_WED_RTQM_Q_DBG_BYPASS BIT(5) #define MTK_WED_RTQM_TXDMAD_FPORT GENMASK(23, 20) +#define MTK_WED_RTQM_IGRS0_I2HW_DMAD_CNT 0xb1c +#define MTK_WED_RTQM_IGRS0_I2H_DMAD_CNT(_n) (0xb20 + (_n) * 0x4) +#define MTK_WED_RTQM_IGRS0_I2HW_PKT_CNT 0xb28 +#define MTK_WED_RTQM_IGRS0_I2H_PKT_CNT(_n) (0xb2c + (_n) * 0x4) +#define MTK_WED_RTQM_IGRS0_FDROP_CNT 0xb34 + +#define MTK_WED_RTQM_IGRS1_I2HW_DMAD_CNT 0xb44 +#define MTK_WED_RTQM_IGRS1_I2H_DMAD_CNT(_n) (0xb48 + (_n) * 0x4) +#define MTK_WED_RTQM_IGRS1_I2HW_PKT_CNT 0xb50 +#define MTK_WED_RTQM_IGRS1_I2H_PKT_CNT(_n) (0xb54 + (_n) * 0x4) +#define MTK_WED_RTQM_IGRS1_FDROP_CNT 0xb5c + +#define MTK_WED_RTQM_IGRS2_I2HW_DMAD_CNT 0xb6c +#define MTK_WED_RTQM_IGRS2_I2H_DMAD_CNT(_n) (0xb70 + (_n) * 0x4) +#define MTK_WED_RTQM_IGRS2_I2HW_PKT_CNT 0xb78 +#define MTK_WED_RTQM_IGRS2_I2H_PKT_CNT(_n) (0xb7c + (_n) * 0x4) +#define MTK_WED_RTQM_IGRS2_FDROP_CNT 0xb84 + +#define MTK_WED_RTQM_IGRS3_I2HW_DMAD_CNT 0xb94 +#define MTK_WED_RTQM_IGRS3_I2H_DMAD_CNT(_n) (0xb98 + (_n) * 0x4) +#define MTK_WED_RTQM_IGRS3_I2HW_PKT_CNT 0xba0 +#define MTK_WED_RTQM_IGRS3_I2H_PKT_CNT(_n) (0xba4 + (_n) * 0x4) +#define MTK_WED_RTQM_IGRS3_FDROP_CNT 0xbac + #define MTK_WED_RTQM_R2H_MIB(_n) (0xb70 + (_n) * 0x4) #define MTK_WED_RTQM_R2Q_MIB(_n) (0xb78 + (_n) * 0x4) #define MTK_WED_RTQM_Q2N_MIB 0xb80 @@ -409,6 +497,24 @@ struct mtk_wdma_desc { #define MTK_WED_RTQM_Q2B_MIB 0xb8c #define MTK_WED_RTQM_PFDBK_MIB 0xb90 +#define MTK_WED_RTQM_ENQ_CFG0 0xbb8 +#define MTK_WED_RTQM_ENQ_CFG_TXDMAD_FPORT GENMASK(15, 12) + +#define MTK_WED_RTQM_FDROP_MIB 0xb84 +#define MTK_WED_RTQM_ENQ_I2Q_DMAD_CNT 0xbbc +#define MTK_WED_RTQM_ENQ_I2N_DMAD_CNT 0xbc0 +#define MTK_WED_RTQM_ENQ_I2Q_PKT_CNT 0xbc4 +#define MTK_WED_RTQM_ENQ_I2N_PKT_CNT 0xbc8 +#define MTK_WED_RTQM_ENQ_USED_ENTRY_CNT 0xbcc +#define MTK_WED_RTQM_ENQ_ERR_CNT 0xbd0 + +#define MTK_WED_RTQM_DEQ_DMAD_CNT 0xbd8 +#define MTK_WED_RTQM_DEQ_Q2I_DMAD_CNT 0xbdc +#define MTK_WED_RTQM_DEQ_PKT_CNT 0xbe0 +#define MTK_WED_RTQM_DEQ_Q2I_PKT_CNT 0xbe4 +#define MTK_WED_RTQM_DEQ_USED_PFDBK_CNT 0xbe8 +#define MTK_WED_RTQM_DEQ_ERR_CNT 0xbec + #define MTK_WED_RROQM_GLO_CFG 0xc04 #define MTK_WED_RROQM_RST_IDX 0xc08 #define MTK_WED_RROQM_RST_IDX_MIOD BIT(0) @@ -458,7 +564,116 @@ struct mtk_wdma_desc { #define MTK_WED_RX_BM_INTF 0xd9c #define MTK_WED_RX_BM_ERR_STS 0xda8 +#define MTK_RRO_IND_CMD_SIGNATURE 0xe00 +#define MTK_RRO_IND_CMD_DMA_IDX GENMASK(11, 0) +#define MTK_RRO_IND_CMD_MAGIC_CNT GENMASK(30, 28) + +#define MTK_WED_IND_CMD_RX_CTRL0 0xe04 +#define MTK_WED_IND_CMD_PROC_IDX GENMASK(11, 0) +#define MTK_WED_IND_CMD_PREFETCH_FREE_CNT GENMASK(19, 16) +#define MTK_WED_IND_CMD_MAGIC_CNT GENMASK(30, 28) + +#define MTK_WED_IND_CMD_RX_CTRL1 0xe08 +#define MTK_WED_IND_CMD_RX_CTRL2 0xe0c +#define MTK_WED_IND_CMD_MAX_CNT GENMASK(11, 0) +#define MTK_WED_IND_CMD_BASE_M GENMASK(19, 16) + +#define MTK_WED_RRO_CFG0 0xe10 +#define MTK_WED_RRO_CFG1 0xe14 +#define MTK_WED_RRO_CFG1_MAX_WIN_SZ GENMASK(31, 29) +#define MTK_WED_RRO_CFG1_ACK_SN_BASE_M GENMASK(19, 16) +#define MTK_WED_RRO_CFG1_PARTICL_SE_ID GENMASK(11, 0) + +#define MTK_WED_ADDR_ELEM_CFG0 0xe18 +#define MTK_WED_ADDR_ELEM_CFG1 0xe1c +#define MTK_WED_ADDR_ELEM_PREFETCH_FREE_CNT GENMASK(19, 16) + +#define MTK_WED_ADDR_ELEM_TBL_CFG 0xe20 +#define MTK_WED_ADDR_ELEM_TBL_OFFSET GENMASK(6, 0) +#define MTK_WED_ADDR_ELEM_TBL_RD_RDY BIT(28) +#define MTK_WED_ADDR_ELEM_TBL_WR_RDY BIT(29) +#define MTK_WED_ADDR_ELEM_TBL_RD BIT(30) +#define MTK_WED_ADDR_ELEM_TBL_WR BIT(31) + +#define MTK_WED_RADDR_ELEM_TBL_WDATA 0xe24 +#define MTK_WED_RADDR_ELEM_TBL_RDATA 0xe28 + +#define MTK_WED_PN_CHECK_CFG 0xe30 +#define MTK_WED_PN_CHECK_SE_ID GENMASK(11, 0) +#define MTK_WED_PN_CHECK_RD_RDY BIT(28) +#define MTK_WED_PN_CHECK_WR_RDY BIT(29) +#define MTK_WED_PN_CHECK_RD BIT(30) +#define MTK_WED_PN_CHECK_WR BIT(31) + +#define MTK_WED_PN_CHECK_WDATA_M 0xe38 +#define MTK_WED_PN_CHECK_IS_FIRST BIT(17) + +#define MTK_WED_RRO_MSDU_PG_RING_CFG(_n) (0xe44 + (_n) * 0x8) + +#define MTK_WED_RRO_MSDU_PG_RING2_CFG 0xe58 +#define MTK_WED_RRO_MSDU_PG_DRV_CLR BIT(26) +#define MTK_WED_RRO_MSDU_PG_DRV_EN BIT(31) + +#define MTK_WED_RRO_MSDU_PG_CTRL0(_n) (0xe5c + (_n) * 0xc) +#define MTK_WED_RRO_MSDU_PG_CTRL1(_n) (0xe60 + (_n) * 0xc) +#define MTK_WED_RRO_MSDU_PG_CTRL2(_n) (0xe64 + (_n) * 0xc) + +#define MTK_WED_RRO_RX_D_RX(_n) (0xe80 + (_n) * 0x10) + +#define MTK_WED_RRO_RX_MAGIC_CNT BIT(13) + +#define MTK_WED_RRO_RX_D_CFG(_n) (0xea0 + (_n) * 0x4) +#define MTK_WED_RRO_RX_D_DRV_CLR BIT(26) +#define MTK_WED_RRO_RX_D_DRV_EN BIT(31) + +#define MTK_WED_RRO_PG_BM_RX_DMAM 0xeb0 +#define MTK_WED_RRO_PG_BM_RX_SDL0 GENMASK(13, 0) + +#define MTK_WED_RRO_PG_BM_BASE 0xeb4 +#define MTK_WED_RRO_PG_BM_INIT_PTR 0xeb8 +#define MTK_WED_RRO_PG_BM_SW_TAIL_IDX GENMASK(15, 0) +#define MTK_WED_RRO_PG_BM_INIT_SW_TAIL_IDX BIT(16) + +#define MTK_WED_WPDMA_INT_CTRL_RRO_RX 0xeec +#define MTK_WED_WPDMA_INT_CTRL_RRO_RX0_EN BIT(0) +#define MTK_WED_WPDMA_INT_CTRL_RRO_RX0_CLR BIT(1) +#define MTK_WED_WPDMA_INT_CTRL_RRO_RX0_DONE_TRIG GENMASK(6, 2) +#define MTK_WED_WPDMA_INT_CTRL_RRO_RX1_EN BIT(8) +#define MTK_WED_WPDMA_INT_CTRL_RRO_RX1_CLR BIT(9) +#define MTK_WED_WPDMA_INT_CTRL_RRO_RX1_DONE_TRIG GENMASK(14, 10) + +#define MTK_WED_WPDMA_INT_CTRL_RRO_MSDU_PG 0xef4 +#define MTK_WED_WPDMA_INT_CTRL_RRO_PG0_EN BIT(0) +#define MTK_WED_WPDMA_INT_CTRL_RRO_PG0_CLR BIT(1) +#define MTK_WED_WPDMA_INT_CTRL_RRO_PG0_DONE_TRIG GENMASK(6, 2) +#define MTK_WED_WPDMA_INT_CTRL_RRO_PG1_EN BIT(8) +#define MTK_WED_WPDMA_INT_CTRL_RRO_PG1_CLR BIT(9) +#define MTK_WED_WPDMA_INT_CTRL_RRO_PG1_DONE_TRIG GENMASK(14, 10) +#define MTK_WED_WPDMA_INT_CTRL_RRO_PG2_EN BIT(16) +#define MTK_WED_WPDMA_INT_CTRL_RRO_PG2_CLR BIT(17) +#define MTK_WED_WPDMA_INT_CTRL_RRO_PG2_DONE_TRIG GENMASK(22, 18) + +#define MTK_WED_RX_IND_CMD_CNT0 0xf20 +#define MTK_WED_RX_IND_CMD_DBG_CNT_EN BIT(31) + +#define MTK_WED_RX_IND_CMD_CNT(_n) (0xf20 + (_n) * 0x4) +#define MTK_WED_IND_CMD_MAGIC_CNT_FAIL_CNT GENMASK(15, 0) + +#define MTK_WED_RX_ADDR_ELEM_CNT(_n) (0xf48 + (_n) * 0x4) +#define MTK_WED_ADDR_ELEM_SIG_FAIL_CNT GENMASK(15, 0) +#define MTK_WED_ADDR_ELEM_FIRST_SIG_FAIL_CNT GENMASK(31, 16) +#define MTK_WED_ADDR_ELEM_ACKSN_CNT GENMASK(27, 0) + +#define MTK_WED_RX_MSDU_PG_CNT(_n) (0xf5c + (_n) * 0x4) + +#define MTK_WED_RX_PN_CHK_CNT 0xf70 +#define MTK_WED_PN_CHK_FAIL_CNT GENMASK(15, 0) + #define MTK_WED_WOCPU_VIEW_MIOD_BASE 0x8000 #define MTK_WED_PCIE_INT_MASK 0x0 +#define MTK_WED_PCIE_BASE 0x11280000 +#define MTK_WED_PCIE_BASE0 0x11300000 +#define MTK_WED_PCIE_BASE1 0x11310000 +#define MTK_WED_PCIE_BASE2 0x11290000 #endif diff --git a/drivers/net/ethernet/mediatek/mtk_wed_wo.h b/drivers/net/ethernet/mediatek/mtk_wed_wo.h index 8ed81761bf10..87a67fa3868d 100644 --- a/drivers/net/ethernet/mediatek/mtk_wed_wo.h +++ b/drivers/net/ethernet/mediatek/mtk_wed_wo.h @@ -91,6 +91,8 @@ enum mtk_wed_dummy_cr_idx { #define MT7981_FIRMWARE_WO "mediatek/mt7981_wo.bin" #define MT7986_FIRMWARE_WO0 "mediatek/mt7986_wo_0.bin" #define MT7986_FIRMWARE_WO1 "mediatek/mt7986_wo_1.bin" +#define MT7988_FIRMWARE_WO0 "mediatek/mt7988_wo_0.bin" +#define MT7988_FIRMWARE_WO1 "mediatek/mt7988_wo_1.bin" #define MTK_WO_MCU_CFG_LS_BASE 0 #define MTK_WO_MCU_CFG_LS_HW_VER_ADDR (MTK_WO_MCU_CFG_LS_BASE + 0x000) diff --git a/include/linux/soc/mediatek/mtk_wed.h b/include/linux/soc/mediatek/mtk_wed.h index 5f00dc26582b..5b096f9f1975 100644 --- a/include/linux/soc/mediatek/mtk_wed.h +++ b/include/linux/soc/mediatek/mtk_wed.h @@ -138,6 +138,8 @@ struct mtk_wed_device { u32 wpdma_rx; bool wcid_512; + bool hw_rro; + bool msi; u16 token_start; unsigned int nbuf; @@ -211,10 +213,12 @@ mtk_wed_device_attach(struct mtk_wed_device *dev) return ret; } -static inline bool -mtk_wed_get_rx_capa(struct mtk_wed_device *dev) +static inline bool mtk_wed_get_rx_capa(struct mtk_wed_device *dev) { #ifdef CONFIG_NET_MEDIATEK_SOC_WED + if (dev->version == 3) + return dev->wlan.hw_rro; + return dev->version != 1; #else return false; -- cgit v1.2.3 From b230812b9dda125e69ab0a5a11cda88d9c0d18a9 Mon Sep 17 00:00:00 2001 From: Sujuan Chen Date: Mon, 18 Sep 2023 12:29:15 +0200 Subject: net: ethernet: mtk_wed: introduce partial AMSDU offload support for MT7988 Introduce partial AMSDU offload support for MT7988 SoC in order to merge in hw packets belonging to the same AMSDU before passing them to the WLAN nic. Co-developed-by: Lorenzo Bianconi Signed-off-by: Lorenzo Bianconi Signed-off-by: Sujuan Chen Signed-off-by: Paolo Abeni --- drivers/net/ethernet/mediatek/mtk_ppe.c | 4 +- drivers/net/ethernet/mediatek/mtk_ppe.h | 19 +-- drivers/net/ethernet/mediatek/mtk_ppe_offload.c | 3 +- drivers/net/ethernet/mediatek/mtk_wed.c | 154 +++++++++++++++++++++--- drivers/net/ethernet/mediatek/mtk_wed.h | 7 ++ drivers/net/ethernet/mediatek/mtk_wed_regs.h | 76 ++++++++++++ include/linux/netdevice.h | 1 + include/linux/soc/mediatek/mtk_wed.h | 12 ++ 8 files changed, 248 insertions(+), 28 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mediatek/mtk_ppe.c b/drivers/net/ethernet/mediatek/mtk_ppe.c index 86f32f486043..b2a5d9c3733d 100644 --- a/drivers/net/ethernet/mediatek/mtk_ppe.c +++ b/drivers/net/ethernet/mediatek/mtk_ppe.c @@ -425,7 +425,8 @@ int mtk_foe_entry_set_pppoe(struct mtk_eth *eth, struct mtk_foe_entry *entry, } int mtk_foe_entry_set_wdma(struct mtk_eth *eth, struct mtk_foe_entry *entry, - int wdma_idx, int txq, int bss, int wcid) + int wdma_idx, int txq, int bss, int wcid, + bool amsdu_en) { struct mtk_foe_mac_info *l2 = mtk_foe_entry_l2(eth, entry); u32 *ib2 = mtk_foe_entry_ib2(eth, entry); @@ -437,6 +438,7 @@ int mtk_foe_entry_set_wdma(struct mtk_eth *eth, struct mtk_foe_entry *entry, MTK_FOE_IB2_WDMA_WINFO_V2; l2->w3info = FIELD_PREP(MTK_FOE_WINFO_WCID_V3, wcid) | FIELD_PREP(MTK_FOE_WINFO_BSS_V3, bss); + l2->amsdu = FIELD_PREP(MTK_FOE_WINFO_AMSDU_EN, amsdu_en); break; case 2: *ib2 &= ~MTK_FOE_IB2_PORT_MG_V2; diff --git a/drivers/net/ethernet/mediatek/mtk_ppe.h b/drivers/net/ethernet/mediatek/mtk_ppe.h index e3d0ec72bc69..691806bca372 100644 --- a/drivers/net/ethernet/mediatek/mtk_ppe.h +++ b/drivers/net/ethernet/mediatek/mtk_ppe.h @@ -88,13 +88,13 @@ enum { #define MTK_FOE_WINFO_BSS_V3 GENMASK(23, 16) #define MTK_FOE_WINFO_WCID_V3 GENMASK(15, 0) -#define MTK_FOE_WINFO_PAO_USR_INFO GENMASK(15, 0) -#define MTK_FOE_WINFO_PAO_TID GENMASK(19, 16) -#define MTK_FOE_WINFO_PAO_IS_FIXEDRATE BIT(20) -#define MTK_FOE_WINFO_PAO_IS_PRIOR BIT(21) -#define MTK_FOE_WINFO_PAO_IS_SP BIT(22) -#define MTK_FOE_WINFO_PAO_HF BIT(23) -#define MTK_FOE_WINFO_PAO_AMSDU_EN BIT(24) +#define MTK_FOE_WINFO_AMSDU_USR_INFO GENMASK(15, 0) +#define MTK_FOE_WINFO_AMSDU_TID GENMASK(19, 16) +#define MTK_FOE_WINFO_AMSDU_IS_FIXEDRATE BIT(20) +#define MTK_FOE_WINFO_AMSDU_IS_PRIOR BIT(21) +#define MTK_FOE_WINFO_AMSDU_IS_SP BIT(22) +#define MTK_FOE_WINFO_AMSDU_HF BIT(23) +#define MTK_FOE_WINFO_AMSDU_EN BIT(24) enum { MTK_FOE_STATE_INVALID, @@ -123,7 +123,7 @@ struct mtk_foe_mac_info { /* netsys_v3 */ u32 w3info; - u32 wpao; + u32 amsdu; }; /* software-only entry type */ @@ -392,7 +392,8 @@ int mtk_foe_entry_set_vlan(struct mtk_eth *eth, struct mtk_foe_entry *entry, int mtk_foe_entry_set_pppoe(struct mtk_eth *eth, struct mtk_foe_entry *entry, int sid); int mtk_foe_entry_set_wdma(struct mtk_eth *eth, struct mtk_foe_entry *entry, - int wdma_idx, int txq, int bss, int wcid); + int wdma_idx, int txq, int bss, int wcid, + bool amsdu_en); int mtk_foe_entry_set_queue(struct mtk_eth *eth, struct mtk_foe_entry *entry, unsigned int queue); int mtk_foe_entry_commit(struct mtk_ppe *ppe, struct mtk_flow_entry *entry); diff --git a/drivers/net/ethernet/mediatek/mtk_ppe_offload.c b/drivers/net/ethernet/mediatek/mtk_ppe_offload.c index 95f76975f258..e073d2b5542c 100644 --- a/drivers/net/ethernet/mediatek/mtk_ppe_offload.c +++ b/drivers/net/ethernet/mediatek/mtk_ppe_offload.c @@ -111,6 +111,7 @@ mtk_flow_get_wdma_info(struct net_device *dev, const u8 *addr, struct mtk_wdma_i info->queue = path->mtk_wdma.queue; info->bss = path->mtk_wdma.bss; info->wcid = path->mtk_wdma.wcid; + info->amsdu = path->mtk_wdma.amsdu; return 0; } @@ -192,7 +193,7 @@ mtk_flow_set_output_device(struct mtk_eth *eth, struct mtk_foe_entry *foe, if (mtk_flow_get_wdma_info(dev, dest_mac, &info) == 0) { mtk_foe_entry_set_wdma(eth, foe, info.wdma_idx, info.queue, - info.bss, info.wcid); + info.bss, info.wcid, info.amsdu); if (mtk_is_netsys_v2_or_greater(eth)) { switch (info.wdma_idx) { case 0: diff --git a/drivers/net/ethernet/mediatek/mtk_wed.c b/drivers/net/ethernet/mediatek/mtk_wed.c index 18cbf028f6ed..d4b41ccfbad5 100644 --- a/drivers/net/ethernet/mediatek/mtk_wed.c +++ b/drivers/net/ethernet/mediatek/mtk_wed.c @@ -30,6 +30,8 @@ #define MTK_WED_RX_PAGE_BUF_PER_PAGE (PAGE_SIZE / 128) #define MTK_WED_RX_RING_SIZE 1536 #define MTK_WED_RX_PG_BM_CNT 8192 +#define MTK_WED_AMSDU_BUF_SIZE (PAGE_SIZE << 4) +#define MTK_WED_AMSDU_NPAGES 32 #define MTK_WED_TX_RING_SIZE 2048 #define MTK_WED_WDMA_RING_SIZE 1024 @@ -173,6 +175,23 @@ mtk_wdma_rx_reset(struct mtk_wed_device *dev) return ret; } +static u32 +mtk_wed_check_busy(struct mtk_wed_device *dev, u32 reg, u32 mask) +{ + return !!(wed_r32(dev, reg) & mask); +} + +static int +mtk_wed_poll_busy(struct mtk_wed_device *dev, u32 reg, u32 mask) +{ + int sleep = 15000; + int timeout = 100 * sleep; + u32 val; + + return read_poll_timeout(mtk_wed_check_busy, val, !val, sleep, + timeout, false, dev, reg, mask); +} + static void mtk_wdma_tx_reset(struct mtk_wed_device *dev) { @@ -335,6 +354,118 @@ out: return hw; } +static int +mtk_wed_amsdu_buffer_alloc(struct mtk_wed_device *dev) +{ + struct mtk_wed_hw *hw = dev->hw; + struct mtk_wed_amsdu *wed_amsdu; + int i; + + if (!mtk_wed_is_v3_or_greater(hw)) + return 0; + + wed_amsdu = devm_kcalloc(hw->dev, MTK_WED_AMSDU_NPAGES, + sizeof(*wed_amsdu), GFP_KERNEL); + if (!wed_amsdu) + return -ENOMEM; + + for (i = 0; i < MTK_WED_AMSDU_NPAGES; i++) { + void *ptr; + + /* each segment is 64K */ + ptr = (void *)__get_free_pages(GFP_KERNEL | __GFP_NOWARN | + __GFP_ZERO | __GFP_COMP | + GFP_DMA32, + get_order(MTK_WED_AMSDU_BUF_SIZE)); + if (!ptr) + goto error; + + wed_amsdu[i].txd = ptr; + wed_amsdu[i].txd_phy = dma_map_single(hw->dev, ptr, + MTK_WED_AMSDU_BUF_SIZE, + DMA_TO_DEVICE); + if (dma_mapping_error(hw->dev, wed_amsdu[i].txd_phy)) + goto error; + } + dev->hw->wed_amsdu = wed_amsdu; + + return 0; + +error: + for (i--; i >= 0; i--) + dma_unmap_single(hw->dev, wed_amsdu[i].txd_phy, + MTK_WED_AMSDU_BUF_SIZE, DMA_TO_DEVICE); + return -ENOMEM; +} + +static void +mtk_wed_amsdu_free_buffer(struct mtk_wed_device *dev) +{ + struct mtk_wed_amsdu *wed_amsdu = dev->hw->wed_amsdu; + int i; + + if (!wed_amsdu) + return; + + for (i = 0; i < MTK_WED_AMSDU_NPAGES; i++) { + dma_unmap_single(dev->hw->dev, wed_amsdu[i].txd_phy, + MTK_WED_AMSDU_BUF_SIZE, DMA_TO_DEVICE); + free_pages((unsigned long)wed_amsdu[i].txd, + get_order(MTK_WED_AMSDU_BUF_SIZE)); + } +} + +static int +mtk_wed_amsdu_init(struct mtk_wed_device *dev) +{ + struct mtk_wed_amsdu *wed_amsdu = dev->hw->wed_amsdu; + int i, ret; + + if (!wed_amsdu) + return 0; + + for (i = 0; i < MTK_WED_AMSDU_NPAGES; i++) + wed_w32(dev, MTK_WED_AMSDU_HIFTXD_BASE_L(i), + wed_amsdu[i].txd_phy); + + /* init all sta parameter */ + wed_w32(dev, MTK_WED_AMSDU_STA_INFO_INIT, MTK_WED_AMSDU_STA_RMVL | + MTK_WED_AMSDU_STA_WTBL_HDRT_MODE | + FIELD_PREP(MTK_WED_AMSDU_STA_MAX_AMSDU_LEN, + dev->wlan.amsdu_max_len >> 8) | + FIELD_PREP(MTK_WED_AMSDU_STA_MAX_AMSDU_NUM, + dev->wlan.amsdu_max_subframes)); + + wed_w32(dev, MTK_WED_AMSDU_STA_INFO, MTK_WED_AMSDU_STA_INFO_DO_INIT); + + ret = mtk_wed_poll_busy(dev, MTK_WED_AMSDU_STA_INFO, + MTK_WED_AMSDU_STA_INFO_DO_INIT); + if (ret) { + dev_err(dev->hw->dev, "amsdu initialization failed\n"); + return ret; + } + + /* init partial amsdu offload txd src */ + wed_set(dev, MTK_WED_AMSDU_HIFTXD_CFG, + FIELD_PREP(MTK_WED_AMSDU_HIFTXD_SRC, dev->hw->index)); + + /* init qmem */ + wed_set(dev, MTK_WED_AMSDU_PSE, MTK_WED_AMSDU_PSE_RESET); + ret = mtk_wed_poll_busy(dev, MTK_WED_MON_AMSDU_QMEM_STS1, BIT(29)); + if (ret) { + pr_info("%s: amsdu qmem initialization failed\n", __func__); + return ret; + } + + /* eagle E1 PCIE1 tx ring 22 flow control issue */ + if (dev->wlan.id == 0x7991) + wed_clr(dev, MTK_WED_AMSDU_FIFO, MTK_WED_AMSDU_IS_PRIOR0_RING); + + wed_set(dev, MTK_WED_CTRL, MTK_WED_CTRL_TX_AMSDU_EN); + + return 0; +} + static int mtk_wed_tx_buffer_alloc(struct mtk_wed_device *dev) { @@ -709,6 +840,7 @@ __mtk_wed_detach(struct mtk_wed_device *dev) mtk_wdma_rx_reset(dev); mtk_wed_reset(dev, MTK_WED_RESET_WED); + mtk_wed_amsdu_free_buffer(dev); mtk_wed_free_tx_buffer(dev); mtk_wed_free_tx_rings(dev); @@ -1129,23 +1261,6 @@ mtk_wed_ring_reset(struct mtk_wed_ring *ring, int size, bool tx) } } -static u32 -mtk_wed_check_busy(struct mtk_wed_device *dev, u32 reg, u32 mask) -{ - return !!(wed_r32(dev, reg) & mask); -} - -static int -mtk_wed_poll_busy(struct mtk_wed_device *dev, u32 reg, u32 mask) -{ - int sleep = 15000; - int timeout = 100 * sleep; - u32 val; - - return read_poll_timeout(mtk_wed_check_busy, val, !val, sleep, - timeout, false, dev, reg, mask); -} - static int mtk_wed_rx_reset(struct mtk_wed_device *dev) { @@ -1692,6 +1807,7 @@ mtk_wed_start(struct mtk_wed_device *dev, u32 irq_mask) } mtk_wed_set_512_support(dev, dev->wlan.wcid_512); + mtk_wed_amsdu_init(dev); mtk_wed_dma_enable(dev); dev->running = true; @@ -1748,6 +1864,10 @@ mtk_wed_attach(struct mtk_wed_device *dev) if (ret) goto out; + ret = mtk_wed_amsdu_buffer_alloc(dev); + if (ret) + goto out; + if (mtk_wed_get_rx_capa(dev)) { ret = mtk_wed_rro_alloc(dev); if (ret) diff --git a/drivers/net/ethernet/mediatek/mtk_wed.h b/drivers/net/ethernet/mediatek/mtk_wed.h index 27d336db4d4d..c1f0479d7a71 100644 --- a/drivers/net/ethernet/mediatek/mtk_wed.h +++ b/drivers/net/ethernet/mediatek/mtk_wed.h @@ -25,6 +25,11 @@ struct mtk_wed_soc_data { u32 wdma_desc_size; }; +struct mtk_wed_amsdu { + void *txd; + dma_addr_t txd_phy; +}; + struct mtk_wed_hw { const struct mtk_wed_soc_data *soc; struct device_node *node; @@ -38,6 +43,7 @@ struct mtk_wed_hw { struct dentry *debugfs_dir; struct mtk_wed_device *wed_dev; struct mtk_wed_wo *wed_wo; + struct mtk_wed_amsdu *wed_amsdu; u32 pcie_base; u32 debugfs_reg; u32 num_flows; @@ -52,6 +58,7 @@ struct mtk_wdma_info { u8 queue; u16 wcid; u8 bss; + u8 amsdu; }; #ifdef CONFIG_NET_MEDIATEK_SOC_WED diff --git a/drivers/net/ethernet/mediatek/mtk_wed_regs.h b/drivers/net/ethernet/mediatek/mtk_wed_regs.h index a4d3cf64d090..5a7e4a11a54e 100644 --- a/drivers/net/ethernet/mediatek/mtk_wed_regs.h +++ b/drivers/net/ethernet/mediatek/mtk_wed_regs.h @@ -672,6 +672,82 @@ struct mtk_wdma_desc { #define MTK_WED_WOCPU_VIEW_MIOD_BASE 0x8000 #define MTK_WED_PCIE_INT_MASK 0x0 +#define MTK_WED_AMSDU_FIFO 0x1800 +#define MTK_WED_AMSDU_IS_PRIOR0_RING BIT(10) + +#define MTK_WED_AMSDU_STA_INFO 0x01810 +#define MTK_WED_AMSDU_STA_INFO_DO_INIT BIT(0) +#define MTK_WED_AMSDU_STA_INFO_SET_INIT BIT(1) + +#define MTK_WED_AMSDU_STA_INFO_INIT 0x01814 +#define MTK_WED_AMSDU_STA_WTBL_HDRT_MODE BIT(0) +#define MTK_WED_AMSDU_STA_RMVL BIT(1) +#define MTK_WED_AMSDU_STA_MAX_AMSDU_LEN GENMASK(7, 2) +#define MTK_WED_AMSDU_STA_MAX_AMSDU_NUM GENMASK(11, 8) + +#define MTK_WED_AMSDU_HIFTXD_BASE_L(_n) (0x1980 + (_n) * 0x4) + +#define MTK_WED_AMSDU_PSE 0x1910 +#define MTK_WED_AMSDU_PSE_RESET BIT(16) + +#define MTK_WED_AMSDU_HIFTXD_CFG 0x1968 +#define MTK_WED_AMSDU_HIFTXD_SRC GENMASK(16, 15) + +#define MTK_WED_MON_AMSDU_FIFO_DMAD 0x1a34 + +#define MTK_WED_MON_AMSDU_ENG_DMAD(_n) (0x1a80 + (_n) * 0x50) +#define MTK_WED_MON_AMSDU_ENG_QFPL(_n) (0x1a84 + (_n) * 0x50) +#define MTK_WED_MON_AMSDU_ENG_QENI(_n) (0x1a88 + (_n) * 0x50) +#define MTK_WED_MON_AMSDU_ENG_QENO(_n) (0x1a8c + (_n) * 0x50) +#define MTK_WED_MON_AMSDU_ENG_MERG(_n) (0x1a90 + (_n) * 0x50) + +#define MTK_WED_MON_AMSDU_ENG_CNT8(_n) (0x1a94 + (_n) * 0x50) +#define MTK_WED_AMSDU_ENG_MAX_QGPP_CNT GENMASK(10, 0) +#define MTK_WED_AMSDU_ENG_MAX_PL_CNT GENMASK(27, 16) + +#define MTK_WED_MON_AMSDU_ENG_CNT9(_n) (0x1a98 + (_n) * 0x50) +#define MTK_WED_AMSDU_ENG_CUR_ENTRY GENMASK(10, 0) +#define MTK_WED_AMSDU_ENG_MAX_BUF_MERGED GENMASK(20, 16) +#define MTK_WED_AMSDU_ENG_MAX_MSDU_MERGED GENMASK(28, 24) + +#define MTK_WED_MON_AMSDU_QMEM_STS1 0x1e04 + +#define MTK_WED_MON_AMSDU_QMEM_CNT(_n) (0x1e0c + (_n) * 0x4) +#define MTK_WED_AMSDU_QMEM_FQ_CNT GENMASK(27, 16) +#define MTK_WED_AMSDU_QMEM_SP_QCNT GENMASK(11, 0) +#define MTK_WED_AMSDU_QMEM_TID0_QCNT GENMASK(27, 16) +#define MTK_WED_AMSDU_QMEM_TID1_QCNT GENMASK(11, 0) +#define MTK_WED_AMSDU_QMEM_TID2_QCNT GENMASK(27, 16) +#define MTK_WED_AMSDU_QMEM_TID3_QCNT GENMASK(11, 0) +#define MTK_WED_AMSDU_QMEM_TID4_QCNT GENMASK(27, 16) +#define MTK_WED_AMSDU_QMEM_TID5_QCNT GENMASK(11, 0) +#define MTK_WED_AMSDU_QMEM_TID6_QCNT GENMASK(27, 16) +#define MTK_WED_AMSDU_QMEM_TID7_QCNT GENMASK(11, 0) + +#define MTK_WED_MON_AMSDU_QMEM_PTR(_n) (0x1e20 + (_n) * 0x4) +#define MTK_WED_AMSDU_QMEM_FQ_HEAD GENMASK(27, 16) +#define MTK_WED_AMSDU_QMEM_SP_QHEAD GENMASK(11, 0) +#define MTK_WED_AMSDU_QMEM_TID0_QHEAD GENMASK(27, 16) +#define MTK_WED_AMSDU_QMEM_TID1_QHEAD GENMASK(11, 0) +#define MTK_WED_AMSDU_QMEM_TID2_QHEAD GENMASK(27, 16) +#define MTK_WED_AMSDU_QMEM_TID3_QHEAD GENMASK(11, 0) +#define MTK_WED_AMSDU_QMEM_TID4_QHEAD GENMASK(27, 16) +#define MTK_WED_AMSDU_QMEM_TID5_QHEAD GENMASK(11, 0) +#define MTK_WED_AMSDU_QMEM_TID6_QHEAD GENMASK(27, 16) +#define MTK_WED_AMSDU_QMEM_TID7_QHEAD GENMASK(11, 0) +#define MTK_WED_AMSDU_QMEM_FQ_TAIL GENMASK(27, 16) +#define MTK_WED_AMSDU_QMEM_SP_QTAIL GENMASK(11, 0) +#define MTK_WED_AMSDU_QMEM_TID0_QTAIL GENMASK(27, 16) +#define MTK_WED_AMSDU_QMEM_TID1_QTAIL GENMASK(11, 0) +#define MTK_WED_AMSDU_QMEM_TID2_QTAIL GENMASK(27, 16) +#define MTK_WED_AMSDU_QMEM_TID3_QTAIL GENMASK(11, 0) +#define MTK_WED_AMSDU_QMEM_TID4_QTAIL GENMASK(27, 16) +#define MTK_WED_AMSDU_QMEM_TID5_QTAIL GENMASK(11, 0) +#define MTK_WED_AMSDU_QMEM_TID6_QTAIL GENMASK(27, 16) +#define MTK_WED_AMSDU_QMEM_TID7_QTAIL GENMASK(11, 0) + +#define MTK_WED_MON_AMSDU_HIFTXD_FETCH_MSDU(_n) (0x1ec4 + (_n) * 0x4) + #define MTK_WED_PCIE_BASE 0x11280000 #define MTK_WED_PCIE_BASE0 0x11300000 #define MTK_WED_PCIE_BASE1 0x11310000 diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index db3d8429d50d..7e520c14eb8c 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -919,6 +919,7 @@ struct net_device_path { u8 queue; u16 wcid; u8 bss; + u8 amsdu; } mtk_wdma; }; }; diff --git a/include/linux/soc/mediatek/mtk_wed.h b/include/linux/soc/mediatek/mtk_wed.h index 5b096f9f1975..90d9c9ead3bc 100644 --- a/include/linux/soc/mediatek/mtk_wed.h +++ b/include/linux/soc/mediatek/mtk_wed.h @@ -128,6 +128,7 @@ struct mtk_wed_device { enum mtk_wed_bus_tye bus_type; void __iomem *base; u32 phy_base; + u32 id; u32 wpdma_phys; u32 wpdma_int; @@ -146,10 +147,12 @@ struct mtk_wed_device { unsigned int rx_nbuf; unsigned int rx_npkt; unsigned int rx_size; + unsigned int amsdu_max_len; u8 tx_tbit[MTK_WED_TX_QUEUES]; u8 rx_tbit[MTK_WED_RX_QUEUES]; u8 txfree_tbit; + u8 amsdu_max_subframes; u32 (*init_buf)(void *ptr, dma_addr_t phys, int token_id); int (*offload_enable)(struct mtk_wed_device *wed); @@ -225,6 +228,15 @@ static inline bool mtk_wed_get_rx_capa(struct mtk_wed_device *dev) #endif } +static inline bool mtk_wed_is_amsdu_supported(struct mtk_wed_device *dev) +{ +#ifdef CONFIG_NET_MEDIATEK_SOC_WED + return dev->version == 3; +#else + return false; +#endif +} + #ifdef CONFIG_NET_MEDIATEK_SOC_WED #define mtk_wed_device_active(_dev) !!(_dev)->ops #define mtk_wed_device_detach(_dev) (_dev)->ops->detach(_dev) -- cgit v1.2.3 From 6757d345dd7dba795f5af44d4442d55a83c4b1b4 Mon Sep 17 00:00:00 2001 From: Sujuan Chen Date: Mon, 18 Sep 2023 12:29:16 +0200 Subject: net: ethernet: mtk_wed: introduce hw_rro support for MT7988 MT7988 SoC support 802.11 receive reordering offload in hw while MT7986 SoC implements it through the firmware running on the mcu. Co-developed-by: Lorenzo Bianconi Signed-off-by: Lorenzo Bianconi Signed-off-by: Sujuan Chen Signed-off-by: Paolo Abeni --- drivers/net/ethernet/mediatek/mtk_wed.c | 299 +++++++++++++++++++++++++++++++- include/linux/soc/mediatek/mtk_wed.h | 45 +++++ 2 files changed, 342 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mediatek/mtk_wed.c b/drivers/net/ethernet/mediatek/mtk_wed.c index d4b41ccfbad5..2a0be1f2d43e 100644 --- a/drivers/net/ethernet/mediatek/mtk_wed.c +++ b/drivers/net/ethernet/mediatek/mtk_wed.c @@ -27,7 +27,7 @@ #define MTK_WED_BUF_SIZE 2048 #define MTK_WED_PAGE_BUF_SIZE 128 #define MTK_WED_BUF_PER_PAGE (PAGE_SIZE / 2048) -#define MTK_WED_RX_PAGE_BUF_PER_PAGE (PAGE_SIZE / 128) +#define MTK_WED_RX_BUF_PER_PAGE (PAGE_SIZE / MTK_WED_PAGE_BUF_SIZE) #define MTK_WED_RX_RING_SIZE 1536 #define MTK_WED_RX_PG_BM_CNT 8192 #define MTK_WED_AMSDU_BUF_SIZE (PAGE_SIZE << 4) @@ -596,6 +596,68 @@ free_pagelist: kfree(page_list); } +static int +mtk_wed_hwrro_buffer_alloc(struct mtk_wed_device *dev) +{ + int n_pages = MTK_WED_RX_PG_BM_CNT / MTK_WED_RX_BUF_PER_PAGE; + struct mtk_wed_buf *page_list; + struct mtk_wed_bm_desc *desc; + dma_addr_t desc_phys; + int i, page_idx = 0; + + if (!dev->wlan.hw_rro) + return 0; + + page_list = kcalloc(n_pages, sizeof(*page_list), GFP_KERNEL); + if (!page_list) + return -ENOMEM; + + dev->hw_rro.size = dev->wlan.rx_nbuf & ~(MTK_WED_BUF_PER_PAGE - 1); + dev->hw_rro.pages = page_list; + desc = dma_alloc_coherent(dev->hw->dev, + dev->wlan.rx_nbuf * sizeof(*desc), + &desc_phys, GFP_KERNEL); + if (!desc) + return -ENOMEM; + + dev->hw_rro.desc = desc; + dev->hw_rro.desc_phys = desc_phys; + + for (i = 0; i < MTK_WED_RX_PG_BM_CNT; i += MTK_WED_RX_BUF_PER_PAGE) { + dma_addr_t page_phys, buf_phys; + struct page *page; + int s; + + page = __dev_alloc_page(GFP_KERNEL); + if (!page) + return -ENOMEM; + + page_phys = dma_map_page(dev->hw->dev, page, 0, PAGE_SIZE, + DMA_BIDIRECTIONAL); + if (dma_mapping_error(dev->hw->dev, page_phys)) { + __free_page(page); + return -ENOMEM; + } + + page_list[page_idx].p = page; + page_list[page_idx++].phy_addr = page_phys; + dma_sync_single_for_cpu(dev->hw->dev, page_phys, PAGE_SIZE, + DMA_BIDIRECTIONAL); + + buf_phys = page_phys; + for (s = 0; s < MTK_WED_RX_BUF_PER_PAGE; s++) { + desc->buf0 = cpu_to_le32(buf_phys); + buf_phys += MTK_WED_PAGE_BUF_SIZE; + desc++; + } + + dma_sync_single_for_device(dev->hw->dev, page_phys, PAGE_SIZE, + DMA_BIDIRECTIONAL); + } + + return 0; +} + static int mtk_wed_rx_buffer_alloc(struct mtk_wed_device *dev) { @@ -613,7 +675,42 @@ mtk_wed_rx_buffer_alloc(struct mtk_wed_device *dev) dev->rx_buf_ring.desc_phys = desc_phys; dev->wlan.init_rx_buf(dev, dev->wlan.rx_npkt); - return 0; + return mtk_wed_hwrro_buffer_alloc(dev); +} + +static void +mtk_wed_hwrro_free_buffer(struct mtk_wed_device *dev) +{ + struct mtk_wed_buf *page_list = dev->hw_rro.pages; + struct mtk_wed_bm_desc *desc = dev->hw_rro.desc; + int i, page_idx = 0; + + if (!dev->wlan.hw_rro) + return; + + if (!page_list) + return; + + if (!desc) + goto free_pagelist; + + for (i = 0; i < MTK_WED_RX_PG_BM_CNT; i += MTK_WED_RX_BUF_PER_PAGE) { + dma_addr_t buf_addr = page_list[page_idx].phy_addr; + void *page = page_list[page_idx++].p; + + if (!page) + break; + + dma_unmap_page(dev->hw->dev, buf_addr, PAGE_SIZE, + DMA_BIDIRECTIONAL); + __free_page(page); + } + + dma_free_coherent(dev->hw->dev, dev->hw_rro.size * sizeof(*desc), + desc, dev->hw_rro.desc_phys); + +free_pagelist: + kfree(page_list); } static void @@ -627,6 +724,28 @@ mtk_wed_free_rx_buffer(struct mtk_wed_device *dev) dev->wlan.release_rx_buf(dev); dma_free_coherent(dev->hw->dev, dev->rx_buf_ring.size * sizeof(*desc), desc, dev->rx_buf_ring.desc_phys); + + mtk_wed_hwrro_free_buffer(dev); +} + +static void +mtk_wed_hwrro_init(struct mtk_wed_device *dev) +{ + if (!mtk_wed_get_rx_capa(dev) || !dev->wlan.hw_rro) + return; + + wed_set(dev, MTK_WED_RRO_PG_BM_RX_DMAM, + FIELD_PREP(MTK_WED_RRO_PG_BM_RX_SDL0, 128)); + + wed_w32(dev, MTK_WED_RRO_PG_BM_BASE, dev->hw_rro.desc_phys); + + wed_w32(dev, MTK_WED_RRO_PG_BM_INIT_PTR, + MTK_WED_RRO_PG_BM_INIT_SW_TAIL_IDX | + FIELD_PREP(MTK_WED_RRO_PG_BM_SW_TAIL_IDX, + MTK_WED_RX_PG_BM_CNT)); + + /* enable rx_page_bm to fetch dmad */ + wed_set(dev, MTK_WED_CTRL, MTK_WED_CTRL_WED_RX_PG_BM_EN); } static void @@ -640,6 +759,8 @@ mtk_wed_rx_buffer_hw_init(struct mtk_wed_device *dev) wed_w32(dev, MTK_WED_RX_BM_DYN_ALLOC_TH, FIELD_PREP(MTK_WED_RX_BM_DYN_ALLOC_TH_H, 0xffff)); wed_set(dev, MTK_WED_CTRL, MTK_WED_CTRL_WED_RX_BM_EN); + + mtk_wed_hwrro_init(dev); } static void @@ -935,6 +1056,8 @@ mtk_wed_bus_init(struct mtk_wed_device *dev) static void mtk_wed_set_wpdma(struct mtk_wed_device *dev) { + int i; + if (mtk_wed_is_v1(dev->hw)) { wed_w32(dev, MTK_WED_WPDMA_CFG_BASE, dev->wlan.wpdma_phys); return; @@ -952,6 +1075,15 @@ mtk_wed_set_wpdma(struct mtk_wed_device *dev) wed_w32(dev, MTK_WED_WPDMA_RX_GLO_CFG, dev->wlan.wpdma_rx_glo); wed_w32(dev, dev->hw->soc->regmap.wpdma_rx_ring0, dev->wlan.wpdma_rx); + + if (!dev->wlan.hw_rro) + return; + + wed_w32(dev, MTK_WED_RRO_RX_D_CFG(0), dev->wlan.wpdma_rx_rro[0]); + wed_w32(dev, MTK_WED_RRO_RX_D_CFG(1), dev->wlan.wpdma_rx_rro[1]); + for (i = 0; i < MTK_WED_RX_PAGE_QUEUES; i++) + wed_w32(dev, MTK_WED_RRO_MSDU_PG_RING_CFG(i), + dev->wlan.wpdma_rx_pg + i * 0x10); } static void @@ -1762,6 +1894,165 @@ mtk_wed_dma_enable(struct mtk_wed_device *dev) } } +static void +mtk_wed_start_hw_rro(struct mtk_wed_device *dev, u32 irq_mask, bool reset) +{ + int i; + + wed_w32(dev, MTK_WED_WPDMA_INT_MASK, irq_mask); + wed_w32(dev, MTK_WED_INT_MASK, irq_mask); + + if (!mtk_wed_get_rx_capa(dev) || !dev->wlan.hw_rro) + return; + + wed_set(dev, MTK_WED_RRO_RX_D_CFG(2), MTK_WED_RRO_MSDU_PG_DRV_CLR); + wed_w32(dev, MTK_WED_RRO_MSDU_PG_RING2_CFG, + MTK_WED_RRO_MSDU_PG_DRV_CLR); + + wed_w32(dev, MTK_WED_WPDMA_INT_CTRL_RRO_RX, + MTK_WED_WPDMA_INT_CTRL_RRO_RX0_EN | + MTK_WED_WPDMA_INT_CTRL_RRO_RX0_CLR | + MTK_WED_WPDMA_INT_CTRL_RRO_RX1_EN | + MTK_WED_WPDMA_INT_CTRL_RRO_RX1_CLR | + FIELD_PREP(MTK_WED_WPDMA_INT_CTRL_RRO_RX0_DONE_TRIG, + dev->wlan.rro_rx_tbit[0]) | + FIELD_PREP(MTK_WED_WPDMA_INT_CTRL_RRO_RX1_DONE_TRIG, + dev->wlan.rro_rx_tbit[1])); + + wed_w32(dev, MTK_WED_WPDMA_INT_CTRL_RRO_MSDU_PG, + MTK_WED_WPDMA_INT_CTRL_RRO_PG0_EN | + MTK_WED_WPDMA_INT_CTRL_RRO_PG0_CLR | + MTK_WED_WPDMA_INT_CTRL_RRO_PG1_EN | + MTK_WED_WPDMA_INT_CTRL_RRO_PG1_CLR | + MTK_WED_WPDMA_INT_CTRL_RRO_PG2_EN | + MTK_WED_WPDMA_INT_CTRL_RRO_PG2_CLR | + FIELD_PREP(MTK_WED_WPDMA_INT_CTRL_RRO_PG0_DONE_TRIG, + dev->wlan.rx_pg_tbit[0]) | + FIELD_PREP(MTK_WED_WPDMA_INT_CTRL_RRO_PG1_DONE_TRIG, + dev->wlan.rx_pg_tbit[1]) | + FIELD_PREP(MTK_WED_WPDMA_INT_CTRL_RRO_PG2_DONE_TRIG, + dev->wlan.rx_pg_tbit[2])); + + /* RRO_MSDU_PG_RING2_CFG1_FLD_DRV_EN should be enabled after + * WM FWDL completed, otherwise RRO_MSDU_PG ring may broken + */ + wed_set(dev, MTK_WED_RRO_MSDU_PG_RING2_CFG, + MTK_WED_RRO_MSDU_PG_DRV_EN); + + for (i = 0; i < MTK_WED_RX_QUEUES; i++) { + struct mtk_wed_ring *ring = &dev->rx_rro_ring[i]; + + if (!(ring->flags & MTK_WED_RING_CONFIGURED)) + continue; + + if (mtk_wed_check_wfdma_rx_fill(dev, ring)) + dev_err(dev->hw->dev, + "rx_rro_ring(%d) initialization failed\n", i); + } + + for (i = 0; i < MTK_WED_RX_PAGE_QUEUES; i++) { + struct mtk_wed_ring *ring = &dev->rx_page_ring[i]; + + if (!(ring->flags & MTK_WED_RING_CONFIGURED)) + continue; + + if (mtk_wed_check_wfdma_rx_fill(dev, ring)) + dev_err(dev->hw->dev, + "rx_page_ring(%d) initialization failed\n", i); + } +} + +static void +mtk_wed_rro_rx_ring_setup(struct mtk_wed_device *dev, int idx, + void __iomem *regs) +{ + struct mtk_wed_ring *ring = &dev->rx_rro_ring[idx]; + + ring->wpdma = regs; + wed_w32(dev, MTK_WED_RRO_RX_D_RX(idx) + MTK_WED_RING_OFS_BASE, + readl(regs)); + wed_w32(dev, MTK_WED_RRO_RX_D_RX(idx) + MTK_WED_RING_OFS_COUNT, + readl(regs + MTK_WED_RING_OFS_COUNT)); + ring->flags |= MTK_WED_RING_CONFIGURED; +} + +static void +mtk_wed_msdu_pg_rx_ring_setup(struct mtk_wed_device *dev, int idx, void __iomem *regs) +{ + struct mtk_wed_ring *ring = &dev->rx_page_ring[idx]; + + ring->wpdma = regs; + wed_w32(dev, MTK_WED_RRO_MSDU_PG_CTRL0(idx) + MTK_WED_RING_OFS_BASE, + readl(regs)); + wed_w32(dev, MTK_WED_RRO_MSDU_PG_CTRL0(idx) + MTK_WED_RING_OFS_COUNT, + readl(regs + MTK_WED_RING_OFS_COUNT)); + ring->flags |= MTK_WED_RING_CONFIGURED; +} + +static int +mtk_wed_ind_rx_ring_setup(struct mtk_wed_device *dev, void __iomem *regs) +{ + struct mtk_wed_ring *ring = &dev->ind_cmd_ring; + u32 val = readl(regs + MTK_WED_RING_OFS_COUNT); + int i, count = 0; + + ring->wpdma = regs; + wed_w32(dev, MTK_WED_IND_CMD_RX_CTRL1 + MTK_WED_RING_OFS_BASE, + readl(regs) & 0xfffffff0); + + wed_w32(dev, MTK_WED_IND_CMD_RX_CTRL1 + MTK_WED_RING_OFS_COUNT, + readl(regs + MTK_WED_RING_OFS_COUNT)); + + /* ack sn cr */ + wed_w32(dev, MTK_WED_RRO_CFG0, dev->wlan.phy_base + + dev->wlan.ind_cmd.ack_sn_addr); + wed_w32(dev, MTK_WED_RRO_CFG1, + FIELD_PREP(MTK_WED_RRO_CFG1_MAX_WIN_SZ, + dev->wlan.ind_cmd.win_size) | + FIELD_PREP(MTK_WED_RRO_CFG1_PARTICL_SE_ID, + dev->wlan.ind_cmd.particular_sid)); + + /* particular session addr element */ + wed_w32(dev, MTK_WED_ADDR_ELEM_CFG0, + dev->wlan.ind_cmd.particular_se_phys); + + for (i = 0; i < dev->wlan.ind_cmd.se_group_nums; i++) { + wed_w32(dev, MTK_WED_RADDR_ELEM_TBL_WDATA, + dev->wlan.ind_cmd.addr_elem_phys[i] >> 4); + wed_w32(dev, MTK_WED_ADDR_ELEM_TBL_CFG, + MTK_WED_ADDR_ELEM_TBL_WR | (i & 0x7f)); + + val = wed_r32(dev, MTK_WED_ADDR_ELEM_TBL_CFG); + while (!(val & MTK_WED_ADDR_ELEM_TBL_WR_RDY) && count++ < 100) + val = wed_r32(dev, MTK_WED_ADDR_ELEM_TBL_CFG); + if (count >= 100) + dev_err(dev->hw->dev, + "write ba session base failed\n"); + } + + /* pn check init */ + for (i = 0; i < dev->wlan.ind_cmd.particular_sid; i++) { + wed_w32(dev, MTK_WED_PN_CHECK_WDATA_M, + MTK_WED_PN_CHECK_IS_FIRST); + + wed_w32(dev, MTK_WED_PN_CHECK_CFG, MTK_WED_PN_CHECK_WR | + FIELD_PREP(MTK_WED_PN_CHECK_SE_ID, i)); + + count = 0; + val = wed_r32(dev, MTK_WED_PN_CHECK_CFG); + while (!(val & MTK_WED_PN_CHECK_WR_RDY) && count++ < 100) + val = wed_r32(dev, MTK_WED_PN_CHECK_CFG); + if (count >= 100) + dev_err(dev->hw->dev, + "session(%d) initialization failed\n", i); + } + + wed_w32(dev, MTK_WED_RX_IND_CMD_CNT0, MTK_WED_RX_IND_CMD_DBG_CNT_EN); + wed_set(dev, MTK_WED_CTRL, MTK_WED_CTRL_WED_RX_IND_CMD_EN); + + return 0; +} + static void mtk_wed_start(struct mtk_wed_device *dev, u32 irq_mask) { @@ -2216,6 +2507,10 @@ void mtk_wed_add_hw(struct device_node *np, struct mtk_eth *eth, .detach = mtk_wed_detach, .ppe_check = mtk_wed_ppe_check, .setup_tc = mtk_wed_setup_tc, + .start_hw_rro = mtk_wed_start_hw_rro, + .rro_rx_ring_setup = mtk_wed_rro_rx_ring_setup, + .msdu_pg_rx_ring_setup = mtk_wed_msdu_pg_rx_ring_setup, + .ind_rx_ring_setup = mtk_wed_ind_rx_ring_setup, }; struct device_node *eth_np = eth->dev->of_node; struct platform_device *pdev; diff --git a/include/linux/soc/mediatek/mtk_wed.h b/include/linux/soc/mediatek/mtk_wed.h index 90d9c9ead3bc..a476648858a6 100644 --- a/include/linux/soc/mediatek/mtk_wed.h +++ b/include/linux/soc/mediatek/mtk_wed.h @@ -10,6 +10,7 @@ #define MTK_WED_TX_QUEUES 2 #define MTK_WED_RX_QUEUES 2 +#define MTK_WED_RX_PAGE_QUEUES 3 #define WED_WO_STA_REC 0x6 @@ -99,6 +100,9 @@ struct mtk_wed_device { struct mtk_wed_ring txfree_ring; struct mtk_wed_ring tx_wdma[MTK_WED_TX_QUEUES]; struct mtk_wed_ring rx_wdma[MTK_WED_RX_QUEUES]; + struct mtk_wed_ring rx_rro_ring[MTK_WED_RX_QUEUES]; + struct mtk_wed_ring rx_page_ring[MTK_WED_RX_PAGE_QUEUES]; + struct mtk_wed_ring ind_cmd_ring; struct { int size; @@ -119,6 +123,13 @@ struct mtk_wed_device { dma_addr_t fdbk_phys; } rro; + struct { + int size; + struct mtk_wed_buf *pages; + struct mtk_wed_bm_desc *desc; + dma_addr_t desc_phys; + } hw_rro; + /* filled by driver: */ struct { union { @@ -137,6 +148,8 @@ struct mtk_wed_device { u32 wpdma_txfree; u32 wpdma_rx_glo; u32 wpdma_rx; + u32 wpdma_rx_rro[MTK_WED_RX_QUEUES]; + u32 wpdma_rx_pg; bool wcid_512; bool hw_rro; @@ -151,9 +164,20 @@ struct mtk_wed_device { u8 tx_tbit[MTK_WED_TX_QUEUES]; u8 rx_tbit[MTK_WED_RX_QUEUES]; + u8 rro_rx_tbit[MTK_WED_RX_QUEUES]; + u8 rx_pg_tbit[MTK_WED_RX_PAGE_QUEUES]; u8 txfree_tbit; u8 amsdu_max_subframes; + struct { + u8 se_group_nums; + u16 win_size; + u16 particular_sid; + u32 ack_sn_addr; + dma_addr_t particular_se_phys; + dma_addr_t addr_elem_phys[1024]; + } ind_cmd; + u32 (*init_buf)(void *ptr, dma_addr_t phys, int token_id); int (*offload_enable)(struct mtk_wed_device *wed); void (*offload_disable)(struct mtk_wed_device *wed); @@ -192,6 +216,14 @@ struct mtk_wed_ops { void (*irq_set_mask)(struct mtk_wed_device *dev, u32 mask); int (*setup_tc)(struct mtk_wed_device *wed, struct net_device *dev, enum tc_setup_type type, void *type_data); + void (*start_hw_rro)(struct mtk_wed_device *dev, u32 irq_mask, + bool reset); + void (*rro_rx_ring_setup)(struct mtk_wed_device *dev, int ring, + void __iomem *regs); + void (*msdu_pg_rx_ring_setup)(struct mtk_wed_device *dev, int ring, + void __iomem *regs); + int (*ind_rx_ring_setup)(struct mtk_wed_device *dev, + void __iomem *regs); }; extern const struct mtk_wed_ops __rcu *mtk_soc_wed_ops; @@ -263,6 +295,15 @@ static inline bool mtk_wed_is_amsdu_supported(struct mtk_wed_device *dev) #define mtk_wed_device_dma_reset(_dev) (_dev)->ops->reset_dma(_dev) #define mtk_wed_device_setup_tc(_dev, _netdev, _type, _type_data) \ (_dev)->ops->setup_tc(_dev, _netdev, _type, _type_data) +#define mtk_wed_device_start_hw_rro(_dev, _mask, _reset) \ + (_dev)->ops->start_hw_rro(_dev, _mask, _reset) +#define mtk_wed_device_rro_rx_ring_setup(_dev, _ring, _regs) \ + (_dev)->ops->rro_rx_ring_setup(_dev, _ring, _regs) +#define mtk_wed_device_msdu_pg_rx_ring_setup(_dev, _ring, _regs) \ + (_dev)->ops->msdu_pg_rx_ring_setup(_dev, _ring, _regs) +#define mtk_wed_device_ind_rx_ring_setup(_dev, _regs) \ + (_dev)->ops->ind_rx_ring_setup(_dev, _regs) + #else static inline bool mtk_wed_device_active(struct mtk_wed_device *dev) { @@ -282,6 +323,10 @@ static inline bool mtk_wed_device_active(struct mtk_wed_device *dev) #define mtk_wed_device_stop(_dev) do {} while (0) #define mtk_wed_device_dma_reset(_dev) do {} while (0) #define mtk_wed_device_setup_tc(_dev, _netdev, _type, _type_data) -EOPNOTSUPP +#define mtk_wed_device_start_hw_rro(_dev, _mask, _reset) do {} while (0) +#define mtk_wed_device_rro_rx_ring_setup(_dev, _ring, _regs) -ENODEV +#define mtk_wed_device_msdu_pg_rx_ring_setup(_dev, _ring, _regs) -ENODEV +#define mtk_wed_device_ind_rx_ring_setup(_dev, _regs) -ENODEV #endif #endif -- cgit v1.2.3 From 9ea9cb00a82b53ec39630eac718776d37e41b35a Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Thu, 14 Sep 2023 11:21:39 -0400 Subject: mm: memcontrol: fix GFP_NOFS recursion in memory.high enforcement Breno and Josef report a deadlock scenario from cgroup reclaim re-entering the filesystem: [ 361.546690] ====================================================== [ 361.559210] WARNING: possible circular locking dependency detected [ 361.571703] 6.5.0-0_fbk700_debug_rc0_kbuilder_13159_gbf787a128001 #1 Tainted: G S E [ 361.589704] ------------------------------------------------------ [ 361.602277] find/9315 is trying to acquire lock: [ 361.611625] ffff88837ba140c0 (&delayed_node->mutex){+.+.}-{4:4}, at: __btrfs_release_delayed_node+0x68/0x4f0 [ 361.631437] [ 361.631437] but task is already holding lock: [ 361.643243] ffff8881765b8678 (btrfs-tree-01){++++}-{4:4}, at: btrfs_tree_read_lock+0x1e/0x40 [ 362.904457] mutex_lock_nested+0x1c/0x30 [ 362.912414] __btrfs_release_delayed_node+0x68/0x4f0 [ 362.922460] btrfs_evict_inode+0x301/0x770 [ 362.982726] evict+0x17c/0x380 [ 362.988944] prune_icache_sb+0x100/0x1d0 [ 363.005559] super_cache_scan+0x1f8/0x260 [ 363.013695] do_shrink_slab+0x2a2/0x540 [ 363.021489] shrink_slab_memcg+0x237/0x3d0 [ 363.050606] shrink_slab+0xa7/0x240 [ 363.083382] shrink_node_memcgs+0x262/0x3b0 [ 363.091870] shrink_node+0x1a4/0x720 [ 363.099150] shrink_zones+0x1f6/0x5d0 [ 363.148798] do_try_to_free_pages+0x19b/0x5e0 [ 363.157633] try_to_free_mem_cgroup_pages+0x266/0x370 [ 363.190575] reclaim_high+0x16f/0x1f0 [ 363.208409] mem_cgroup_handle_over_high+0x10b/0x270 [ 363.246678] try_charge_memcg+0xaf2/0xc70 [ 363.304151] charge_memcg+0xf0/0x350 [ 363.320070] __mem_cgroup_charge+0x28/0x40 [ 363.328371] __filemap_add_folio+0x870/0xd50 [ 363.371303] filemap_add_folio+0xdd/0x310 [ 363.399696] __filemap_get_folio+0x2fc/0x7d0 [ 363.419086] pagecache_get_page+0xe/0x30 [ 363.427048] alloc_extent_buffer+0x1cd/0x6a0 [ 363.435704] read_tree_block+0x43/0xc0 [ 363.443316] read_block_for_search+0x361/0x510 [ 363.466690] btrfs_search_slot+0xc8c/0x1520 This is caused by the mem_cgroup_handle_over_high() not respecting the gfp_mask of the allocation context. We used to only call this function on resume to userspace, where no locks were held. But c9afe31ec443 ("memcg: synchronously enforce memory.high for large overcharges") added a call from the allocation context without considering the gfp. Link: https://lkml.kernel.org/r/20230914152139.100822-1-hannes@cmpxchg.org Fixes: c9afe31ec443 ("memcg: synchronously enforce memory.high for large overcharges") Signed-off-by: Johannes Weiner Reported-by: Breno Leitao Reported-by: Josef Bacik Acked-by: Shakeel Butt Acked-by: Michal Hocko Cc: Roman Gushchin Cc: Muchun Song Cc: [5.17+] Signed-off-by: Andrew Morton --- include/linux/memcontrol.h | 4 ++-- include/linux/resume_user_mode.h | 2 +- mm/memcontrol.c | 6 +++--- 3 files changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index ab94ad4597d0..e4e24da16d2c 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -920,7 +920,7 @@ unsigned long mem_cgroup_get_zone_lru_size(struct lruvec *lruvec, return READ_ONCE(mz->lru_zone_size[zone_idx][lru]); } -void mem_cgroup_handle_over_high(void); +void mem_cgroup_handle_over_high(gfp_t gfp_mask); unsigned long mem_cgroup_get_max(struct mem_cgroup *memcg); @@ -1458,7 +1458,7 @@ static inline void mem_cgroup_unlock_pages(void) rcu_read_unlock(); } -static inline void mem_cgroup_handle_over_high(void) +static inline void mem_cgroup_handle_over_high(gfp_t gfp_mask) { } diff --git a/include/linux/resume_user_mode.h b/include/linux/resume_user_mode.h index 285189454449..f8f3e958e9cf 100644 --- a/include/linux/resume_user_mode.h +++ b/include/linux/resume_user_mode.h @@ -55,7 +55,7 @@ static inline void resume_user_mode_work(struct pt_regs *regs) } #endif - mem_cgroup_handle_over_high(); + mem_cgroup_handle_over_high(GFP_KERNEL); blkcg_maybe_throttle_current(); rseq_handle_notify_resume(NULL, regs); diff --git a/mm/memcontrol.c b/mm/memcontrol.c index a4d3282493b6..d13dde2f8b56 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -2555,7 +2555,7 @@ static unsigned long calculate_high_delay(struct mem_cgroup *memcg, * Scheduled by try_charge() to be executed from the userland return path * and reclaims memory over the high limit. */ -void mem_cgroup_handle_over_high(void) +void mem_cgroup_handle_over_high(gfp_t gfp_mask) { unsigned long penalty_jiffies; unsigned long pflags; @@ -2583,7 +2583,7 @@ retry_reclaim: */ nr_reclaimed = reclaim_high(memcg, in_retry ? SWAP_CLUSTER_MAX : nr_pages, - GFP_KERNEL); + gfp_mask); /* * memory.high is breached and reclaim is unable to keep up. Throttle @@ -2819,7 +2819,7 @@ done_restock: if (current->memcg_nr_pages_over_high > MEMCG_CHARGE_BATCH && !(current->flags & PF_MEMALLOC) && gfpflags_allow_blocking(gfp_mask)) { - mem_cgroup_handle_over_high(); + mem_cgroup_handle_over_high(gfp_mask); } return 0; } -- cgit v1.2.3 From b724a6418f1f853bcb39c8923bf14a50c7bdbd07 Mon Sep 17 00:00:00 2001 From: Leon Hwang Date: Sun, 17 Sep 2023 23:38:46 +0800 Subject: bpf: Fix tr dereferencing Fix 'tr' dereferencing bug when CONFIG_BPF_JIT is turned off. When CONFIG_BPF_JIT is turned off, 'bpf_trampoline_get()' returns NULL, which is same as the cases when CONFIG_BPF_JIT is turned on. Closes: https://lore.kernel.org/r/202309131936.5Nc8eUD0-lkp@intel.com/ Fixes: f7b12b6fea00 ("bpf: verifier: refactor check_attach_btf_id()") Reported-by: kernel test robot Reported-by: Dan Carpenter Signed-off-by: Leon Hwang Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20230917153846.88732-1-hffilwlqm@gmail.com --- include/linux/bpf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 024e8b28c34b..49f8b691496c 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1307,7 +1307,7 @@ static inline int bpf_trampoline_unlink_prog(struct bpf_tramp_link *link, static inline struct bpf_trampoline *bpf_trampoline_get(u64 key, struct bpf_attach_target_info *tgt_info) { - return ERR_PTR(-EOPNOTSUPP); + return NULL; } static inline void bpf_trampoline_put(struct bpf_trampoline *tr) {} #define DEFINE_BPF_DISPATCHER(name) -- cgit v1.2.3 From 54e1f99d91405417b3ddb6050cfba82733c3aa41 Mon Sep 17 00:00:00 2001 From: Komal Bajaj Date: Wed, 30 Aug 2023 16:26:51 +0530 Subject: nvmem: core: Add stub for nvmem_cell_read_u8 Add the stub nvmem_cell_read_u8() function for drivers running with CONFIG_NVMEM disabled. Signed-off-by: Komal Bajaj Reviewed-by: Mukesh Ojha Link: https://lore.kernel.org/r/20230830105654.28057-4-quic_kbajaj@quicinc.com Signed-off-by: Bjorn Andersson --- include/linux/nvmem-consumer.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nvmem-consumer.h b/include/linux/nvmem-consumer.h index 4523e4e83319..6ec4b9743e25 100644 --- a/include/linux/nvmem-consumer.h +++ b/include/linux/nvmem-consumer.h @@ -127,6 +127,12 @@ static inline int nvmem_cell_write(struct nvmem_cell *cell, return -EOPNOTSUPP; } +static inline int nvmem_cell_read_u8(struct device *dev, + const char *cell_id, u8 *val) +{ + return -EOPNOTSUPP; +} + static inline int nvmem_cell_read_u16(struct device *dev, const char *cell_id, u16 *val) { -- cgit v1.2.3 From 0bc76be64e80b15b975345b6957a87a1893c34f2 Mon Sep 17 00:00:00 2001 From: Komal Bajaj Date: Wed, 30 Aug 2023 16:26:53 +0530 Subject: soc: qcom: llcc: Updating the macro name Update macro name for LLCC_DRE to LLCC_ECC as per the latest specification. Signed-off-by: Komal Bajaj Reviewed-by: Mukesh Ojha Acked-by: Konrad Dybcio Link: https://lore.kernel.org/r/20230830105654.28057-6-quic_kbajaj@quicinc.com Signed-off-by: Bjorn Andersson --- drivers/soc/qcom/llcc-qcom.c | 2 +- include/linux/soc/qcom/llcc-qcom.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/drivers/soc/qcom/llcc-qcom.c b/drivers/soc/qcom/llcc-qcom.c index c31d9e39e864..3bd841e67eba 100644 --- a/drivers/soc/qcom/llcc-qcom.c +++ b/drivers/soc/qcom/llcc-qcom.c @@ -191,7 +191,7 @@ static const struct llcc_slice_config sc8280xp_data[] = { { LLCC_MMUHWT, 13, 1024, 1, 1, 0xfff, 0x0, 0, 0, 0, 0, 1, 0 }, { LLCC_DISP, 16, 6144, 1, 1, 0xfff, 0x0, 0, 0, 0, 1, 0, 0 }, { LLCC_AUDHW, 22, 2048, 1, 1, 0xfff, 0x0, 0, 0, 0, 1, 0, 0 }, - { LLCC_DRE, 26, 1024, 1, 1, 0xfff, 0x0, 0, 0, 0, 1, 0, 0 }, + { LLCC_ECC, 26, 1024, 1, 1, 0xfff, 0x0, 0, 0, 0, 1, 0, 0 }, { LLCC_CVP, 28, 512, 3, 1, 0xfff, 0x0, 0, 0, 0, 1, 0, 0 }, { LLCC_APTCM, 30, 1024, 3, 1, 0x0, 0x1, 1, 0, 0, 1, 0, 0 }, { LLCC_WRCACHE, 31, 1024, 1, 1, 0xfff, 0x0, 0, 0, 0, 0, 1, 0 }, diff --git a/include/linux/soc/qcom/llcc-qcom.h b/include/linux/soc/qcom/llcc-qcom.h index 93417ba1ead4..1a886666bbb6 100644 --- a/include/linux/soc/qcom/llcc-qcom.h +++ b/include/linux/soc/qcom/llcc-qcom.h @@ -30,7 +30,7 @@ #define LLCC_NPU 23 #define LLCC_WLHW 24 #define LLCC_PIMEM 25 -#define LLCC_DRE 26 +#define LLCC_ECC 26 #define LLCC_CVP 28 #define LLCC_MODPE 29 #define LLCC_APTCM 30 -- cgit v1.2.3 From b64d143b752932ef483d0ed8d00958f1832dd6bc Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 14 Sep 2023 16:28:23 +0800 Subject: crypto: hash - Hide CRYPTO_ALG_TYPE_AHASH_MASK Move the macro CRYPTO_ALG_TYPE_AHASH_MASK out of linux/crypto.h and into crypto/ahash.c so that it's not visible to users of the Crypto API. Also remove the unused CRYPTO_ALG_TYPE_HASH_MASK macro. Signed-off-by: Herbert Xu --- crypto/ahash.c | 2 ++ include/linux/crypto.h | 2 -- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/crypto/ahash.c b/crypto/ahash.c index 709ef0940799..213bb3e9f245 100644 --- a/crypto/ahash.c +++ b/crypto/ahash.c @@ -21,6 +21,8 @@ #include "hash.h" +#define CRYPTO_ALG_TYPE_AHASH_MASK 0x0000000e + static const struct crypto_type crypto_ahash_type; struct ahash_request_priv { diff --git a/include/linux/crypto.h b/include/linux/crypto.h index 31f6fee0c36c..a0780deb017a 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -35,8 +35,6 @@ #define CRYPTO_ALG_TYPE_SHASH 0x0000000e #define CRYPTO_ALG_TYPE_AHASH 0x0000000f -#define CRYPTO_ALG_TYPE_HASH_MASK 0x0000000e -#define CRYPTO_ALG_TYPE_AHASH_MASK 0x0000000e #define CRYPTO_ALG_TYPE_ACOMPRESS_MASK 0x0000000e #define CRYPTO_ALG_LARVAL 0x00000010 -- cgit v1.2.3 From 31865c4c4db2b742fec6ccbff80483fa3e7ab9b9 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 14 Sep 2023 16:28:24 +0800 Subject: crypto: skcipher - Add lskcipher Add a new API type lskcipher designed for taking straight kernel pointers instead of SG lists. Its relationship to skcipher will be analogous to that between shash and ahash. Signed-off-by: Herbert Xu --- crypto/Makefile | 6 +- crypto/cryptd.c | 2 +- crypto/lskcipher.c | 594 +++++++++++++++++++++++++++++++++++++ crypto/skcipher.c | 75 +++-- crypto/skcipher.h | 30 ++ include/crypto/internal/skcipher.h | 114 ++++++- include/crypto/skcipher.h | 309 ++++++++++++++++++- include/linux/crypto.h | 1 + 8 files changed, 1086 insertions(+), 45 deletions(-) create mode 100644 crypto/lskcipher.c create mode 100644 crypto/skcipher.h (limited to 'include/linux') diff --git a/crypto/Makefile b/crypto/Makefile index 953a7e105e58..5ac6876f935a 100644 --- a/crypto/Makefile +++ b/crypto/Makefile @@ -16,7 +16,11 @@ obj-$(CONFIG_CRYPTO_ALGAPI2) += crypto_algapi.o obj-$(CONFIG_CRYPTO_AEAD2) += aead.o obj-$(CONFIG_CRYPTO_GENIV) += geniv.o -obj-$(CONFIG_CRYPTO_SKCIPHER2) += skcipher.o +crypto_skcipher-y += lskcipher.o +crypto_skcipher-y += skcipher.o + +obj-$(CONFIG_CRYPTO_SKCIPHER2) += crypto_skcipher.o + obj-$(CONFIG_CRYPTO_SEQIV) += seqiv.o obj-$(CONFIG_CRYPTO_ECHAINIV) += echainiv.o diff --git a/crypto/cryptd.c b/crypto/cryptd.c index bbcc368b6a55..194a92d677b9 100644 --- a/crypto/cryptd.c +++ b/crypto/cryptd.c @@ -929,7 +929,7 @@ static int cryptd_create(struct crypto_template *tmpl, struct rtattr **tb) return PTR_ERR(algt); switch (algt->type & algt->mask & CRYPTO_ALG_TYPE_MASK) { - case CRYPTO_ALG_TYPE_SKCIPHER: + case CRYPTO_ALG_TYPE_LSKCIPHER: return cryptd_create_skcipher(tmpl, tb, algt, &queue); case CRYPTO_ALG_TYPE_HASH: return cryptd_create_hash(tmpl, tb, algt, &queue); diff --git a/crypto/lskcipher.c b/crypto/lskcipher.c new file mode 100644 index 000000000000..3343c6d955da --- /dev/null +++ b/crypto/lskcipher.c @@ -0,0 +1,594 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Linear symmetric key cipher operations. + * + * Generic encrypt/decrypt wrapper for ciphers. + * + * Copyright (c) 2023 Herbert Xu + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include "skcipher.h" + +static inline struct crypto_lskcipher *__crypto_lskcipher_cast( + struct crypto_tfm *tfm) +{ + return container_of(tfm, struct crypto_lskcipher, base); +} + +static inline struct lskcipher_alg *__crypto_lskcipher_alg( + struct crypto_alg *alg) +{ + return container_of(alg, struct lskcipher_alg, co.base); +} + +static inline struct crypto_istat_cipher *lskcipher_get_stat( + struct lskcipher_alg *alg) +{ + return skcipher_get_stat_common(&alg->co); +} + +static inline int crypto_lskcipher_errstat(struct lskcipher_alg *alg, int err) +{ + struct crypto_istat_cipher *istat = lskcipher_get_stat(alg); + + if (!IS_ENABLED(CONFIG_CRYPTO_STATS)) + return err; + + if (err) + atomic64_inc(&istat->err_cnt); + + return err; +} + +static int lskcipher_setkey_unaligned(struct crypto_lskcipher *tfm, + const u8 *key, unsigned int keylen) +{ + unsigned long alignmask = crypto_lskcipher_alignmask(tfm); + struct lskcipher_alg *cipher = crypto_lskcipher_alg(tfm); + u8 *buffer, *alignbuffer; + unsigned long absize; + int ret; + + absize = keylen + alignmask; + buffer = kmalloc(absize, GFP_ATOMIC); + if (!buffer) + return -ENOMEM; + + alignbuffer = (u8 *)ALIGN((unsigned long)buffer, alignmask + 1); + memcpy(alignbuffer, key, keylen); + ret = cipher->setkey(tfm, alignbuffer, keylen); + kfree_sensitive(buffer); + return ret; +} + +int crypto_lskcipher_setkey(struct crypto_lskcipher *tfm, const u8 *key, + unsigned int keylen) +{ + unsigned long alignmask = crypto_lskcipher_alignmask(tfm); + struct lskcipher_alg *cipher = crypto_lskcipher_alg(tfm); + + if (keylen < cipher->co.min_keysize || keylen > cipher->co.max_keysize) + return -EINVAL; + + if ((unsigned long)key & alignmask) + return lskcipher_setkey_unaligned(tfm, key, keylen); + else + return cipher->setkey(tfm, key, keylen); +} +EXPORT_SYMBOL_GPL(crypto_lskcipher_setkey); + +static int crypto_lskcipher_crypt_unaligned( + struct crypto_lskcipher *tfm, const u8 *src, u8 *dst, unsigned len, + u8 *iv, int (*crypt)(struct crypto_lskcipher *tfm, const u8 *src, + u8 *dst, unsigned len, u8 *iv, bool final)) +{ + unsigned ivsize = crypto_lskcipher_ivsize(tfm); + unsigned bs = crypto_lskcipher_blocksize(tfm); + unsigned cs = crypto_lskcipher_chunksize(tfm); + int err; + u8 *tiv; + u8 *p; + + BUILD_BUG_ON(MAX_CIPHER_BLOCKSIZE > PAGE_SIZE || + MAX_CIPHER_ALIGNMASK >= PAGE_SIZE); + + tiv = kmalloc(PAGE_SIZE, GFP_ATOMIC); + if (!tiv) + return -ENOMEM; + + memcpy(tiv, iv, ivsize); + + p = kmalloc(PAGE_SIZE, GFP_ATOMIC); + err = -ENOMEM; + if (!p) + goto out; + + while (len >= bs) { + unsigned chunk = min((unsigned)PAGE_SIZE, len); + int err; + + if (chunk > cs) + chunk &= ~(cs - 1); + + memcpy(p, src, chunk); + err = crypt(tfm, p, p, chunk, tiv, true); + if (err) + goto out; + + memcpy(dst, p, chunk); + src += chunk; + dst += chunk; + len -= chunk; + } + + err = len ? -EINVAL : 0; + +out: + memcpy(iv, tiv, ivsize); + kfree_sensitive(p); + kfree_sensitive(tiv); + return err; +} + +static int crypto_lskcipher_crypt(struct crypto_lskcipher *tfm, const u8 *src, + u8 *dst, unsigned len, u8 *iv, + int (*crypt)(struct crypto_lskcipher *tfm, + const u8 *src, u8 *dst, + unsigned len, u8 *iv, + bool final)) +{ + unsigned long alignmask = crypto_lskcipher_alignmask(tfm); + struct lskcipher_alg *alg = crypto_lskcipher_alg(tfm); + int ret; + + if (((unsigned long)src | (unsigned long)dst | (unsigned long)iv) & + alignmask) { + ret = crypto_lskcipher_crypt_unaligned(tfm, src, dst, len, iv, + crypt); + goto out; + } + + ret = crypt(tfm, src, dst, len, iv, true); + +out: + return crypto_lskcipher_errstat(alg, ret); +} + +int crypto_lskcipher_encrypt(struct crypto_lskcipher *tfm, const u8 *src, + u8 *dst, unsigned len, u8 *iv) +{ + struct lskcipher_alg *alg = crypto_lskcipher_alg(tfm); + + if (IS_ENABLED(CONFIG_CRYPTO_STATS)) { + struct crypto_istat_cipher *istat = lskcipher_get_stat(alg); + + atomic64_inc(&istat->encrypt_cnt); + atomic64_add(len, &istat->encrypt_tlen); + } + + return crypto_lskcipher_crypt(tfm, src, dst, len, iv, alg->encrypt); +} +EXPORT_SYMBOL_GPL(crypto_lskcipher_encrypt); + +int crypto_lskcipher_decrypt(struct crypto_lskcipher *tfm, const u8 *src, + u8 *dst, unsigned len, u8 *iv) +{ + struct lskcipher_alg *alg = crypto_lskcipher_alg(tfm); + + if (IS_ENABLED(CONFIG_CRYPTO_STATS)) { + struct crypto_istat_cipher *istat = lskcipher_get_stat(alg); + + atomic64_inc(&istat->decrypt_cnt); + atomic64_add(len, &istat->decrypt_tlen); + } + + return crypto_lskcipher_crypt(tfm, src, dst, len, iv, alg->decrypt); +} +EXPORT_SYMBOL_GPL(crypto_lskcipher_decrypt); + +int crypto_lskcipher_setkey_sg(struct crypto_skcipher *tfm, const u8 *key, + unsigned int keylen) +{ + struct crypto_lskcipher **ctx = crypto_skcipher_ctx(tfm); + + return crypto_lskcipher_setkey(*ctx, key, keylen); +} + +static int crypto_lskcipher_crypt_sg(struct skcipher_request *req, + int (*crypt)(struct crypto_lskcipher *tfm, + const u8 *src, u8 *dst, + unsigned len, u8 *iv, + bool final)) +{ + struct crypto_skcipher *skcipher = crypto_skcipher_reqtfm(req); + struct crypto_lskcipher **ctx = crypto_skcipher_ctx(skcipher); + struct crypto_lskcipher *tfm = *ctx; + struct skcipher_walk walk; + int err; + + err = skcipher_walk_virt(&walk, req, false); + + while (walk.nbytes) { + err = crypt(tfm, walk.src.virt.addr, walk.dst.virt.addr, + walk.nbytes, walk.iv, walk.nbytes == walk.total); + err = skcipher_walk_done(&walk, err); + } + + return err; +} + +int crypto_lskcipher_encrypt_sg(struct skcipher_request *req) +{ + struct crypto_skcipher *skcipher = crypto_skcipher_reqtfm(req); + struct crypto_lskcipher **ctx = crypto_skcipher_ctx(skcipher); + struct lskcipher_alg *alg = crypto_lskcipher_alg(*ctx); + + return crypto_lskcipher_crypt_sg(req, alg->encrypt); +} + +int crypto_lskcipher_decrypt_sg(struct skcipher_request *req) +{ + struct crypto_skcipher *skcipher = crypto_skcipher_reqtfm(req); + struct crypto_lskcipher **ctx = crypto_skcipher_ctx(skcipher); + struct lskcipher_alg *alg = crypto_lskcipher_alg(*ctx); + + return crypto_lskcipher_crypt_sg(req, alg->decrypt); +} + +static void crypto_lskcipher_exit_tfm(struct crypto_tfm *tfm) +{ + struct crypto_lskcipher *skcipher = __crypto_lskcipher_cast(tfm); + struct lskcipher_alg *alg = crypto_lskcipher_alg(skcipher); + + alg->exit(skcipher); +} + +static int crypto_lskcipher_init_tfm(struct crypto_tfm *tfm) +{ + struct crypto_lskcipher *skcipher = __crypto_lskcipher_cast(tfm); + struct lskcipher_alg *alg = crypto_lskcipher_alg(skcipher); + + if (alg->exit) + skcipher->base.exit = crypto_lskcipher_exit_tfm; + + if (alg->init) + return alg->init(skcipher); + + return 0; +} + +static void crypto_lskcipher_free_instance(struct crypto_instance *inst) +{ + struct lskcipher_instance *skcipher = + container_of(inst, struct lskcipher_instance, s.base); + + skcipher->free(skcipher); +} + +static void __maybe_unused crypto_lskcipher_show( + struct seq_file *m, struct crypto_alg *alg) +{ + struct lskcipher_alg *skcipher = __crypto_lskcipher_alg(alg); + + seq_printf(m, "type : lskcipher\n"); + seq_printf(m, "blocksize : %u\n", alg->cra_blocksize); + seq_printf(m, "min keysize : %u\n", skcipher->co.min_keysize); + seq_printf(m, "max keysize : %u\n", skcipher->co.max_keysize); + seq_printf(m, "ivsize : %u\n", skcipher->co.ivsize); + seq_printf(m, "chunksize : %u\n", skcipher->co.chunksize); +} + +static int __maybe_unused crypto_lskcipher_report( + struct sk_buff *skb, struct crypto_alg *alg) +{ + struct lskcipher_alg *skcipher = __crypto_lskcipher_alg(alg); + struct crypto_report_blkcipher rblkcipher; + + memset(&rblkcipher, 0, sizeof(rblkcipher)); + + strscpy(rblkcipher.type, "lskcipher", sizeof(rblkcipher.type)); + strscpy(rblkcipher.geniv, "", sizeof(rblkcipher.geniv)); + + rblkcipher.blocksize = alg->cra_blocksize; + rblkcipher.min_keysize = skcipher->co.min_keysize; + rblkcipher.max_keysize = skcipher->co.max_keysize; + rblkcipher.ivsize = skcipher->co.ivsize; + + return nla_put(skb, CRYPTOCFGA_REPORT_BLKCIPHER, + sizeof(rblkcipher), &rblkcipher); +} + +static int __maybe_unused crypto_lskcipher_report_stat( + struct sk_buff *skb, struct crypto_alg *alg) +{ + struct lskcipher_alg *skcipher = __crypto_lskcipher_alg(alg); + struct crypto_istat_cipher *istat; + struct crypto_stat_cipher rcipher; + + istat = lskcipher_get_stat(skcipher); + + memset(&rcipher, 0, sizeof(rcipher)); + + strscpy(rcipher.type, "cipher", sizeof(rcipher.type)); + + rcipher.stat_encrypt_cnt = atomic64_read(&istat->encrypt_cnt); + rcipher.stat_encrypt_tlen = atomic64_read(&istat->encrypt_tlen); + rcipher.stat_decrypt_cnt = atomic64_read(&istat->decrypt_cnt); + rcipher.stat_decrypt_tlen = atomic64_read(&istat->decrypt_tlen); + rcipher.stat_err_cnt = atomic64_read(&istat->err_cnt); + + return nla_put(skb, CRYPTOCFGA_STAT_CIPHER, sizeof(rcipher), &rcipher); +} + +static const struct crypto_type crypto_lskcipher_type = { + .extsize = crypto_alg_extsize, + .init_tfm = crypto_lskcipher_init_tfm, + .free = crypto_lskcipher_free_instance, +#ifdef CONFIG_PROC_FS + .show = crypto_lskcipher_show, +#endif +#if IS_ENABLED(CONFIG_CRYPTO_USER) + .report = crypto_lskcipher_report, +#endif +#ifdef CONFIG_CRYPTO_STATS + .report_stat = crypto_lskcipher_report_stat, +#endif + .maskclear = ~CRYPTO_ALG_TYPE_MASK, + .maskset = CRYPTO_ALG_TYPE_MASK, + .type = CRYPTO_ALG_TYPE_LSKCIPHER, + .tfmsize = offsetof(struct crypto_lskcipher, base), +}; + +static void crypto_lskcipher_exit_tfm_sg(struct crypto_tfm *tfm) +{ + struct crypto_lskcipher **ctx = crypto_tfm_ctx(tfm); + + crypto_free_lskcipher(*ctx); +} + +int crypto_init_lskcipher_ops_sg(struct crypto_tfm *tfm) +{ + struct crypto_lskcipher **ctx = crypto_tfm_ctx(tfm); + struct crypto_alg *calg = tfm->__crt_alg; + struct crypto_lskcipher *skcipher; + + if (!crypto_mod_get(calg)) + return -EAGAIN; + + skcipher = crypto_create_tfm(calg, &crypto_lskcipher_type); + if (IS_ERR(skcipher)) { + crypto_mod_put(calg); + return PTR_ERR(skcipher); + } + + *ctx = skcipher; + tfm->exit = crypto_lskcipher_exit_tfm_sg; + + return 0; +} + +int crypto_grab_lskcipher(struct crypto_lskcipher_spawn *spawn, + struct crypto_instance *inst, + const char *name, u32 type, u32 mask) +{ + spawn->base.frontend = &crypto_lskcipher_type; + return crypto_grab_spawn(&spawn->base, inst, name, type, mask); +} +EXPORT_SYMBOL_GPL(crypto_grab_lskcipher); + +struct crypto_lskcipher *crypto_alloc_lskcipher(const char *alg_name, + u32 type, u32 mask) +{ + return crypto_alloc_tfm(alg_name, &crypto_lskcipher_type, type, mask); +} +EXPORT_SYMBOL_GPL(crypto_alloc_lskcipher); + +static int lskcipher_prepare_alg(struct lskcipher_alg *alg) +{ + struct crypto_alg *base = &alg->co.base; + int err; + + err = skcipher_prepare_alg_common(&alg->co); + if (err) + return err; + + if (alg->co.chunksize & (alg->co.chunksize - 1)) + return -EINVAL; + + base->cra_type = &crypto_lskcipher_type; + base->cra_flags |= CRYPTO_ALG_TYPE_LSKCIPHER; + + return 0; +} + +int crypto_register_lskcipher(struct lskcipher_alg *alg) +{ + struct crypto_alg *base = &alg->co.base; + int err; + + err = lskcipher_prepare_alg(alg); + if (err) + return err; + + return crypto_register_alg(base); +} +EXPORT_SYMBOL_GPL(crypto_register_lskcipher); + +void crypto_unregister_lskcipher(struct lskcipher_alg *alg) +{ + crypto_unregister_alg(&alg->co.base); +} +EXPORT_SYMBOL_GPL(crypto_unregister_lskcipher); + +int crypto_register_lskciphers(struct lskcipher_alg *algs, int count) +{ + int i, ret; + + for (i = 0; i < count; i++) { + ret = crypto_register_lskcipher(&algs[i]); + if (ret) + goto err; + } + + return 0; + +err: + for (--i; i >= 0; --i) + crypto_unregister_lskcipher(&algs[i]); + + return ret; +} +EXPORT_SYMBOL_GPL(crypto_register_lskciphers); + +void crypto_unregister_lskciphers(struct lskcipher_alg *algs, int count) +{ + int i; + + for (i = count - 1; i >= 0; --i) + crypto_unregister_lskcipher(&algs[i]); +} +EXPORT_SYMBOL_GPL(crypto_unregister_lskciphers); + +int lskcipher_register_instance(struct crypto_template *tmpl, + struct lskcipher_instance *inst) +{ + int err; + + if (WARN_ON(!inst->free)) + return -EINVAL; + + err = lskcipher_prepare_alg(&inst->alg); + if (err) + return err; + + return crypto_register_instance(tmpl, lskcipher_crypto_instance(inst)); +} +EXPORT_SYMBOL_GPL(lskcipher_register_instance); + +static int lskcipher_setkey_simple(struct crypto_lskcipher *tfm, const u8 *key, + unsigned int keylen) +{ + struct crypto_lskcipher *cipher = lskcipher_cipher_simple(tfm); + + crypto_lskcipher_clear_flags(cipher, CRYPTO_TFM_REQ_MASK); + crypto_lskcipher_set_flags(cipher, crypto_lskcipher_get_flags(tfm) & + CRYPTO_TFM_REQ_MASK); + return crypto_lskcipher_setkey(cipher, key, keylen); +} + +static int lskcipher_init_tfm_simple(struct crypto_lskcipher *tfm) +{ + struct lskcipher_instance *inst = lskcipher_alg_instance(tfm); + struct crypto_lskcipher **ctx = crypto_lskcipher_ctx(tfm); + struct crypto_lskcipher_spawn *spawn; + struct crypto_lskcipher *cipher; + + spawn = lskcipher_instance_ctx(inst); + cipher = crypto_spawn_lskcipher(spawn); + if (IS_ERR(cipher)) + return PTR_ERR(cipher); + + *ctx = cipher; + return 0; +} + +static void lskcipher_exit_tfm_simple(struct crypto_lskcipher *tfm) +{ + struct crypto_lskcipher **ctx = crypto_lskcipher_ctx(tfm); + + crypto_free_lskcipher(*ctx); +} + +static void lskcipher_free_instance_simple(struct lskcipher_instance *inst) +{ + crypto_drop_lskcipher(lskcipher_instance_ctx(inst)); + kfree(inst); +} + +/** + * lskcipher_alloc_instance_simple - allocate instance of simple block cipher + * + * Allocate an lskcipher_instance for a simple block cipher mode of operation, + * e.g. cbc or ecb. The instance context will have just a single crypto_spawn, + * that for the underlying cipher. The {min,max}_keysize, ivsize, blocksize, + * alignmask, and priority are set from the underlying cipher but can be + * overridden if needed. The tfm context defaults to + * struct crypto_lskcipher *, and default ->setkey(), ->init(), and + * ->exit() methods are installed. + * + * @tmpl: the template being instantiated + * @tb: the template parameters + * + * Return: a pointer to the new instance, or an ERR_PTR(). The caller still + * needs to register the instance. + */ +struct lskcipher_instance *lskcipher_alloc_instance_simple( + struct crypto_template *tmpl, struct rtattr **tb) +{ + u32 mask; + struct lskcipher_instance *inst; + struct crypto_lskcipher_spawn *spawn; + struct lskcipher_alg *cipher_alg; + int err; + + err = crypto_check_attr_type(tb, CRYPTO_ALG_TYPE_LSKCIPHER, &mask); + if (err) + return ERR_PTR(err); + + inst = kzalloc(sizeof(*inst) + sizeof(*spawn), GFP_KERNEL); + if (!inst) + return ERR_PTR(-ENOMEM); + + spawn = lskcipher_instance_ctx(inst); + err = crypto_grab_lskcipher(spawn, + lskcipher_crypto_instance(inst), + crypto_attr_alg_name(tb[1]), 0, mask); + if (err) + goto err_free_inst; + cipher_alg = crypto_lskcipher_spawn_alg(spawn); + + err = crypto_inst_setname(lskcipher_crypto_instance(inst), tmpl->name, + &cipher_alg->co.base); + if (err) + goto err_free_inst; + + /* Don't allow nesting. */ + err = -ELOOP; + if ((cipher_alg->co.base.cra_flags & CRYPTO_ALG_INSTANCE)) + goto err_free_inst; + + err = -EINVAL; + if (cipher_alg->co.ivsize) + goto err_free_inst; + + inst->free = lskcipher_free_instance_simple; + + /* Default algorithm properties, can be overridden */ + inst->alg.co.base.cra_blocksize = cipher_alg->co.base.cra_blocksize; + inst->alg.co.base.cra_alignmask = cipher_alg->co.base.cra_alignmask; + inst->alg.co.base.cra_priority = cipher_alg->co.base.cra_priority; + inst->alg.co.min_keysize = cipher_alg->co.min_keysize; + inst->alg.co.max_keysize = cipher_alg->co.max_keysize; + inst->alg.co.ivsize = cipher_alg->co.base.cra_blocksize; + + /* Use struct crypto_lskcipher * by default, can be overridden */ + inst->alg.co.base.cra_ctxsize = sizeof(struct crypto_lskcipher *); + inst->alg.setkey = lskcipher_setkey_simple; + inst->alg.init = lskcipher_init_tfm_simple; + inst->alg.exit = lskcipher_exit_tfm_simple; + + return inst; + +err_free_inst: + lskcipher_free_instance_simple(inst); + return ERR_PTR(err); +} +EXPORT_SYMBOL_GPL(lskcipher_alloc_instance_simple); diff --git a/crypto/skcipher.c b/crypto/skcipher.c index 7b275716cf4e..b9496dc8a609 100644 --- a/crypto/skcipher.c +++ b/crypto/skcipher.c @@ -24,8 +24,9 @@ #include #include #include +#include "skcipher.h" -#include "internal.h" +#define CRYPTO_ALG_TYPE_SKCIPHER_MASK 0x0000000e enum { SKCIPHER_WALK_PHYS = 1 << 0, @@ -43,6 +44,8 @@ struct skcipher_walk_buffer { u8 buffer[]; }; +static const struct crypto_type crypto_skcipher_type; + static int skcipher_walk_next(struct skcipher_walk *walk); static inline void skcipher_map_src(struct skcipher_walk *walk) @@ -89,11 +92,7 @@ static inline struct skcipher_alg *__crypto_skcipher_alg( static inline struct crypto_istat_cipher *skcipher_get_stat( struct skcipher_alg *alg) { -#ifdef CONFIG_CRYPTO_STATS - return &alg->stat; -#else - return NULL; -#endif + return skcipher_get_stat_common(&alg->co); } static inline int crypto_skcipher_errstat(struct skcipher_alg *alg, int err) @@ -468,6 +467,7 @@ static int skcipher_walk_skcipher(struct skcipher_walk *walk, struct skcipher_request *req) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct skcipher_alg *alg = crypto_skcipher_alg(tfm); walk->total = req->cryptlen; walk->nbytes = 0; @@ -485,10 +485,14 @@ static int skcipher_walk_skcipher(struct skcipher_walk *walk, SKCIPHER_WALK_SLEEP : 0; walk->blocksize = crypto_skcipher_blocksize(tfm); - walk->stride = crypto_skcipher_walksize(tfm); walk->ivsize = crypto_skcipher_ivsize(tfm); walk->alignmask = crypto_skcipher_alignmask(tfm); + if (alg->co.base.cra_type != &crypto_skcipher_type) + walk->stride = alg->co.chunksize; + else + walk->stride = alg->walksize; + return skcipher_walk_first(walk); } @@ -616,6 +620,11 @@ int crypto_skcipher_setkey(struct crypto_skcipher *tfm, const u8 *key, unsigned long alignmask = crypto_skcipher_alignmask(tfm); int err; + if (cipher->co.base.cra_type != &crypto_skcipher_type) { + err = crypto_lskcipher_setkey_sg(tfm, key, keylen); + goto out; + } + if (keylen < cipher->min_keysize || keylen > cipher->max_keysize) return -EINVAL; @@ -624,6 +633,7 @@ int crypto_skcipher_setkey(struct crypto_skcipher *tfm, const u8 *key, else err = cipher->setkey(tfm, key, keylen); +out: if (unlikely(err)) { skcipher_set_needkey(tfm); return err; @@ -649,6 +659,8 @@ int crypto_skcipher_encrypt(struct skcipher_request *req) if (crypto_skcipher_get_flags(tfm) & CRYPTO_TFM_NEED_KEY) ret = -ENOKEY; + else if (alg->co.base.cra_type != &crypto_skcipher_type) + ret = crypto_lskcipher_encrypt_sg(req); else ret = alg->encrypt(req); @@ -671,6 +683,8 @@ int crypto_skcipher_decrypt(struct skcipher_request *req) if (crypto_skcipher_get_flags(tfm) & CRYPTO_TFM_NEED_KEY) ret = -ENOKEY; + else if (alg->co.base.cra_type != &crypto_skcipher_type) + ret = crypto_lskcipher_decrypt_sg(req); else ret = alg->decrypt(req); @@ -693,6 +707,9 @@ static int crypto_skcipher_init_tfm(struct crypto_tfm *tfm) skcipher_set_needkey(skcipher); + if (tfm->__crt_alg->cra_type != &crypto_skcipher_type) + return crypto_init_lskcipher_ops_sg(tfm); + if (alg->exit) skcipher->base.exit = crypto_skcipher_exit_tfm; @@ -702,6 +719,14 @@ static int crypto_skcipher_init_tfm(struct crypto_tfm *tfm) return 0; } +static unsigned int crypto_skcipher_extsize(struct crypto_alg *alg) +{ + if (alg->cra_type != &crypto_skcipher_type) + return sizeof(struct crypto_lskcipher *); + + return crypto_alg_extsize(alg); +} + static void crypto_skcipher_free_instance(struct crypto_instance *inst) { struct skcipher_instance *skcipher = @@ -770,7 +795,7 @@ static int __maybe_unused crypto_skcipher_report_stat( } static const struct crypto_type crypto_skcipher_type = { - .extsize = crypto_alg_extsize, + .extsize = crypto_skcipher_extsize, .init_tfm = crypto_skcipher_init_tfm, .free = crypto_skcipher_free_instance, #ifdef CONFIG_PROC_FS @@ -783,7 +808,7 @@ static const struct crypto_type crypto_skcipher_type = { .report_stat = crypto_skcipher_report_stat, #endif .maskclear = ~CRYPTO_ALG_TYPE_MASK, - .maskset = CRYPTO_ALG_TYPE_MASK, + .maskset = CRYPTO_ALG_TYPE_SKCIPHER_MASK, .type = CRYPTO_ALG_TYPE_SKCIPHER, .tfmsize = offsetof(struct crypto_skcipher, base), }; @@ -834,23 +859,18 @@ int crypto_has_skcipher(const char *alg_name, u32 type, u32 mask) } EXPORT_SYMBOL_GPL(crypto_has_skcipher); -static int skcipher_prepare_alg(struct skcipher_alg *alg) +int skcipher_prepare_alg_common(struct skcipher_alg_common *alg) { - struct crypto_istat_cipher *istat = skcipher_get_stat(alg); + struct crypto_istat_cipher *istat = skcipher_get_stat_common(alg); struct crypto_alg *base = &alg->base; - if (alg->ivsize > PAGE_SIZE / 8 || alg->chunksize > PAGE_SIZE / 8 || - alg->walksize > PAGE_SIZE / 8) + if (alg->ivsize > PAGE_SIZE / 8 || alg->chunksize > PAGE_SIZE / 8) return -EINVAL; if (!alg->chunksize) alg->chunksize = base->cra_blocksize; - if (!alg->walksize) - alg->walksize = alg->chunksize; - base->cra_type = &crypto_skcipher_type; base->cra_flags &= ~CRYPTO_ALG_TYPE_MASK; - base->cra_flags |= CRYPTO_ALG_TYPE_SKCIPHER; if (IS_ENABLED(CONFIG_CRYPTO_STATS)) memset(istat, 0, sizeof(*istat)); @@ -858,6 +878,27 @@ static int skcipher_prepare_alg(struct skcipher_alg *alg) return 0; } +static int skcipher_prepare_alg(struct skcipher_alg *alg) +{ + struct crypto_alg *base = &alg->base; + int err; + + err = skcipher_prepare_alg_common(&alg->co); + if (err) + return err; + + if (alg->walksize > PAGE_SIZE / 8) + return -EINVAL; + + if (!alg->walksize) + alg->walksize = alg->chunksize; + + base->cra_type = &crypto_skcipher_type; + base->cra_flags |= CRYPTO_ALG_TYPE_SKCIPHER; + + return 0; +} + int crypto_register_skcipher(struct skcipher_alg *alg) { struct crypto_alg *base = &alg->base; diff --git a/crypto/skcipher.h b/crypto/skcipher.h new file mode 100644 index 000000000000..6f1295f0fef2 --- /dev/null +++ b/crypto/skcipher.h @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Cryptographic API. + * + * Copyright (c) 2023 Herbert Xu + */ +#ifndef _LOCAL_CRYPTO_SKCIPHER_H +#define _LOCAL_CRYPTO_SKCIPHER_H + +#include +#include "internal.h" + +static inline struct crypto_istat_cipher *skcipher_get_stat_common( + struct skcipher_alg_common *alg) +{ +#ifdef CONFIG_CRYPTO_STATS + return &alg->stat; +#else + return NULL; +#endif +} + +int crypto_lskcipher_setkey_sg(struct crypto_skcipher *tfm, const u8 *key, + unsigned int keylen); +int crypto_lskcipher_encrypt_sg(struct skcipher_request *req); +int crypto_lskcipher_decrypt_sg(struct skcipher_request *req); +int crypto_init_lskcipher_ops_sg(struct crypto_tfm *tfm); +int skcipher_prepare_alg_common(struct skcipher_alg_common *alg); + +#endif /* _LOCAL_CRYPTO_SKCIPHER_H */ diff --git a/include/crypto/internal/skcipher.h b/include/crypto/internal/skcipher.h index fb3d9e899f52..4382fd707b8a 100644 --- a/include/crypto/internal/skcipher.h +++ b/include/crypto/internal/skcipher.h @@ -36,10 +36,25 @@ struct skcipher_instance { }; }; +struct lskcipher_instance { + void (*free)(struct lskcipher_instance *inst); + union { + struct { + char head[offsetof(struct lskcipher_alg, co.base)]; + struct crypto_instance base; + } s; + struct lskcipher_alg alg; + }; +}; + struct crypto_skcipher_spawn { struct crypto_spawn base; }; +struct crypto_lskcipher_spawn { + struct crypto_spawn base; +}; + struct skcipher_walk { union { struct { @@ -80,6 +95,12 @@ static inline struct crypto_instance *skcipher_crypto_instance( return &inst->s.base; } +static inline struct crypto_instance *lskcipher_crypto_instance( + struct lskcipher_instance *inst) +{ + return &inst->s.base; +} + static inline struct skcipher_instance *skcipher_alg_instance( struct crypto_skcipher *skcipher) { @@ -87,11 +108,23 @@ static inline struct skcipher_instance *skcipher_alg_instance( struct skcipher_instance, alg); } +static inline struct lskcipher_instance *lskcipher_alg_instance( + struct crypto_lskcipher *lskcipher) +{ + return container_of(crypto_lskcipher_alg(lskcipher), + struct lskcipher_instance, alg); +} + static inline void *skcipher_instance_ctx(struct skcipher_instance *inst) { return crypto_instance_ctx(skcipher_crypto_instance(inst)); } +static inline void *lskcipher_instance_ctx(struct lskcipher_instance *inst) +{ + return crypto_instance_ctx(lskcipher_crypto_instance(inst)); +} + static inline void skcipher_request_complete(struct skcipher_request *req, int err) { crypto_request_complete(&req->base, err); @@ -101,29 +134,56 @@ int crypto_grab_skcipher(struct crypto_skcipher_spawn *spawn, struct crypto_instance *inst, const char *name, u32 type, u32 mask); +int crypto_grab_lskcipher(struct crypto_lskcipher_spawn *spawn, + struct crypto_instance *inst, + const char *name, u32 type, u32 mask); + static inline void crypto_drop_skcipher(struct crypto_skcipher_spawn *spawn) { crypto_drop_spawn(&spawn->base); } +static inline void crypto_drop_lskcipher(struct crypto_lskcipher_spawn *spawn) +{ + crypto_drop_spawn(&spawn->base); +} + static inline struct skcipher_alg *crypto_skcipher_spawn_alg( struct crypto_skcipher_spawn *spawn) { return container_of(spawn->base.alg, struct skcipher_alg, base); } +static inline struct lskcipher_alg *crypto_lskcipher_spawn_alg( + struct crypto_lskcipher_spawn *spawn) +{ + return container_of(spawn->base.alg, struct lskcipher_alg, co.base); +} + static inline struct skcipher_alg *crypto_spawn_skcipher_alg( struct crypto_skcipher_spawn *spawn) { return crypto_skcipher_spawn_alg(spawn); } +static inline struct lskcipher_alg *crypto_spawn_lskcipher_alg( + struct crypto_lskcipher_spawn *spawn) +{ + return crypto_lskcipher_spawn_alg(spawn); +} + static inline struct crypto_skcipher *crypto_spawn_skcipher( struct crypto_skcipher_spawn *spawn) { return crypto_spawn_tfm2(&spawn->base); } +static inline struct crypto_lskcipher *crypto_spawn_lskcipher( + struct crypto_lskcipher_spawn *spawn) +{ + return crypto_spawn_tfm2(&spawn->base); +} + static inline void crypto_skcipher_set_reqsize( struct crypto_skcipher *skcipher, unsigned int reqsize) { @@ -144,6 +204,13 @@ void crypto_unregister_skciphers(struct skcipher_alg *algs, int count); int skcipher_register_instance(struct crypto_template *tmpl, struct skcipher_instance *inst); +int crypto_register_lskcipher(struct lskcipher_alg *alg); +void crypto_unregister_lskcipher(struct lskcipher_alg *alg); +int crypto_register_lskciphers(struct lskcipher_alg *algs, int count); +void crypto_unregister_lskciphers(struct lskcipher_alg *algs, int count); +int lskcipher_register_instance(struct crypto_template *tmpl, + struct lskcipher_instance *inst); + int skcipher_walk_done(struct skcipher_walk *walk, int err); int skcipher_walk_virt(struct skcipher_walk *walk, struct skcipher_request *req, @@ -166,6 +233,11 @@ static inline void *crypto_skcipher_ctx(struct crypto_skcipher *tfm) return crypto_tfm_ctx(&tfm->base); } +static inline void *crypto_lskcipher_ctx(struct crypto_lskcipher *tfm) +{ + return crypto_tfm_ctx(&tfm->base); +} + static inline void *crypto_skcipher_ctx_dma(struct crypto_skcipher *tfm) { return crypto_tfm_ctx_dma(&tfm->base); @@ -209,21 +281,16 @@ static inline unsigned int crypto_skcipher_alg_walksize( return alg->walksize; } -/** - * crypto_skcipher_walksize() - obtain walk size - * @tfm: cipher handle - * - * In some cases, algorithms can only perform optimally when operating on - * multiple blocks in parallel. This is reflected by the walksize, which - * must be a multiple of the chunksize (or equal if the concern does not - * apply) - * - * Return: walk size in bytes - */ -static inline unsigned int crypto_skcipher_walksize( - struct crypto_skcipher *tfm) +static inline unsigned int crypto_lskcipher_alg_min_keysize( + struct lskcipher_alg *alg) +{ + return alg->co.min_keysize; +} + +static inline unsigned int crypto_lskcipher_alg_max_keysize( + struct lskcipher_alg *alg) { - return crypto_skcipher_alg_walksize(crypto_skcipher_alg(tfm)); + return alg->co.max_keysize; } /* Helpers for simple block cipher modes of operation */ @@ -249,5 +316,24 @@ static inline struct crypto_alg *skcipher_ialg_simple( return crypto_spawn_cipher_alg(spawn); } +static inline struct crypto_lskcipher *lskcipher_cipher_simple( + struct crypto_lskcipher *tfm) +{ + struct crypto_lskcipher **ctx = crypto_lskcipher_ctx(tfm); + + return *ctx; +} + +struct lskcipher_instance *lskcipher_alloc_instance_simple( + struct crypto_template *tmpl, struct rtattr **tb); + +static inline struct lskcipher_alg *lskcipher_ialg_simple( + struct lskcipher_instance *inst) +{ + struct crypto_lskcipher_spawn *spawn = lskcipher_instance_ctx(inst); + + return crypto_lskcipher_spawn_alg(spawn); +} + #endif /* _CRYPTO_INTERNAL_SKCIPHER_H */ diff --git a/include/crypto/skcipher.h b/include/crypto/skcipher.h index 080d1ba3611d..a648ef5ce897 100644 --- a/include/crypto/skcipher.h +++ b/include/crypto/skcipher.h @@ -49,6 +49,10 @@ struct crypto_sync_skcipher { struct crypto_skcipher base; }; +struct crypto_lskcipher { + struct crypto_tfm base; +}; + /* * struct crypto_istat_cipher - statistics for cipher algorithm * @encrypt_cnt: number of encrypt requests @@ -65,6 +69,43 @@ struct crypto_istat_cipher { atomic64_t err_cnt; }; +#ifdef CONFIG_CRYPTO_STATS +#define SKCIPHER_ALG_COMMON_STAT struct crypto_istat_cipher stat; +#else +#define SKCIPHER_ALG_COMMON_STAT +#endif + +/* + * struct skcipher_alg_common - common properties of skcipher_alg + * @min_keysize: Minimum key size supported by the transformation. This is the + * smallest key length supported by this transformation algorithm. + * This must be set to one of the pre-defined values as this is + * not hardware specific. Possible values for this field can be + * found via git grep "_MIN_KEY_SIZE" include/crypto/ + * @max_keysize: Maximum key size supported by the transformation. This is the + * largest key length supported by this transformation algorithm. + * This must be set to one of the pre-defined values as this is + * not hardware specific. Possible values for this field can be + * found via git grep "_MAX_KEY_SIZE" include/crypto/ + * @ivsize: IV size applicable for transformation. The consumer must provide an + * IV of exactly that size to perform the encrypt or decrypt operation. + * @chunksize: Equal to the block size except for stream ciphers such as + * CTR where it is set to the underlying block size. + * @stat: Statistics for cipher algorithm + * @base: Definition of a generic crypto algorithm. + */ +#define SKCIPHER_ALG_COMMON { \ + unsigned int min_keysize; \ + unsigned int max_keysize; \ + unsigned int ivsize; \ + unsigned int chunksize; \ + \ + SKCIPHER_ALG_COMMON_STAT \ + \ + struct crypto_alg base; \ +} +struct skcipher_alg_common SKCIPHER_ALG_COMMON; + /** * struct skcipher_alg - symmetric key cipher definition * @min_keysize: Minimum key size supported by the transformation. This is the @@ -120,6 +161,7 @@ struct crypto_istat_cipher { * in parallel. Should be a multiple of chunksize. * @stat: Statistics for cipher algorithm * @base: Definition of a generic crypto algorithm. + * @co: see struct skcipher_alg_common * * All fields except @ivsize are mandatory and must be filled. */ @@ -131,17 +173,55 @@ struct skcipher_alg { int (*init)(struct crypto_skcipher *tfm); void (*exit)(struct crypto_skcipher *tfm); - unsigned int min_keysize; - unsigned int max_keysize; - unsigned int ivsize; - unsigned int chunksize; unsigned int walksize; -#ifdef CONFIG_CRYPTO_STATS - struct crypto_istat_cipher stat; -#endif + union { + struct SKCIPHER_ALG_COMMON; + struct skcipher_alg_common co; + }; +}; - struct crypto_alg base; +/** + * struct lskcipher_alg - linear symmetric key cipher definition + * @setkey: Set key for the transformation. This function is used to either + * program a supplied key into the hardware or store the key in the + * transformation context for programming it later. Note that this + * function does modify the transformation context. This function can + * be called multiple times during the existence of the transformation + * object, so one must make sure the key is properly reprogrammed into + * the hardware. This function is also responsible for checking the key + * length for validity. In case a software fallback was put in place in + * the @cra_init call, this function might need to use the fallback if + * the algorithm doesn't support all of the key sizes. + * @encrypt: Encrypt a number of bytes. This function is used to encrypt + * the supplied data. This function shall not modify + * the transformation context, as this function may be called + * in parallel with the same transformation object. Data + * may be left over if length is not a multiple of blocks + * and there is more to come (final == false). The number of + * left-over bytes should be returned in case of success. + * @decrypt: Decrypt a number of bytes. This is a reverse counterpart to + * @encrypt and the conditions are exactly the same. + * @init: Initialize the cryptographic transformation object. This function + * is used to initialize the cryptographic transformation object. + * This function is called only once at the instantiation time, right + * after the transformation context was allocated. + * @exit: Deinitialize the cryptographic transformation object. This is a + * counterpart to @init, used to remove various changes set in + * @init. + * @co: see struct skcipher_alg_common + */ +struct lskcipher_alg { + int (*setkey)(struct crypto_lskcipher *tfm, const u8 *key, + unsigned int keylen); + int (*encrypt)(struct crypto_lskcipher *tfm, const u8 *src, + u8 *dst, unsigned len, u8 *iv, bool final); + int (*decrypt)(struct crypto_lskcipher *tfm, const u8 *src, + u8 *dst, unsigned len, u8 *iv, bool final); + int (*init)(struct crypto_lskcipher *tfm); + void (*exit)(struct crypto_lskcipher *tfm); + + struct skcipher_alg_common co; }; #define MAX_SYNC_SKCIPHER_REQSIZE 384 @@ -213,12 +293,36 @@ struct crypto_skcipher *crypto_alloc_skcipher(const char *alg_name, struct crypto_sync_skcipher *crypto_alloc_sync_skcipher(const char *alg_name, u32 type, u32 mask); + +/** + * crypto_alloc_lskcipher() - allocate linear symmetric key cipher handle + * @alg_name: is the cra_name / name or cra_driver_name / driver name of the + * lskcipher + * @type: specifies the type of the cipher + * @mask: specifies the mask for the cipher + * + * Allocate a cipher handle for an lskcipher. The returned struct + * crypto_lskcipher is the cipher handle that is required for any subsequent + * API invocation for that lskcipher. + * + * Return: allocated cipher handle in case of success; IS_ERR() is true in case + * of an error, PTR_ERR() returns the error code. + */ +struct crypto_lskcipher *crypto_alloc_lskcipher(const char *alg_name, + u32 type, u32 mask); + static inline struct crypto_tfm *crypto_skcipher_tfm( struct crypto_skcipher *tfm) { return &tfm->base; } +static inline struct crypto_tfm *crypto_lskcipher_tfm( + struct crypto_lskcipher *tfm) +{ + return &tfm->base; +} + /** * crypto_free_skcipher() - zeroize and free cipher handle * @tfm: cipher handle to be freed @@ -235,6 +339,17 @@ static inline void crypto_free_sync_skcipher(struct crypto_sync_skcipher *tfm) crypto_free_skcipher(&tfm->base); } +/** + * crypto_free_lskcipher() - zeroize and free cipher handle + * @tfm: cipher handle to be freed + * + * If @tfm is a NULL or error pointer, this function does nothing. + */ +static inline void crypto_free_lskcipher(struct crypto_lskcipher *tfm) +{ + crypto_destroy_tfm(tfm, crypto_lskcipher_tfm(tfm)); +} + /** * crypto_has_skcipher() - Search for the availability of an skcipher. * @alg_name: is the cra_name / name or cra_driver_name / driver name of the @@ -253,6 +368,19 @@ static inline const char *crypto_skcipher_driver_name( return crypto_tfm_alg_driver_name(crypto_skcipher_tfm(tfm)); } +static inline const char *crypto_lskcipher_driver_name( + struct crypto_lskcipher *tfm) +{ + return crypto_tfm_alg_driver_name(crypto_lskcipher_tfm(tfm)); +} + +static inline struct skcipher_alg_common *crypto_skcipher_alg_common( + struct crypto_skcipher *tfm) +{ + return container_of(crypto_skcipher_tfm(tfm)->__crt_alg, + struct skcipher_alg_common, base); +} + static inline struct skcipher_alg *crypto_skcipher_alg( struct crypto_skcipher *tfm) { @@ -260,11 +388,24 @@ static inline struct skcipher_alg *crypto_skcipher_alg( struct skcipher_alg, base); } +static inline struct lskcipher_alg *crypto_lskcipher_alg( + struct crypto_lskcipher *tfm) +{ + return container_of(crypto_lskcipher_tfm(tfm)->__crt_alg, + struct lskcipher_alg, co.base); +} + static inline unsigned int crypto_skcipher_alg_ivsize(struct skcipher_alg *alg) { return alg->ivsize; } +static inline unsigned int crypto_lskcipher_alg_ivsize( + struct lskcipher_alg *alg) +{ + return alg->co.ivsize; +} + /** * crypto_skcipher_ivsize() - obtain IV size * @tfm: cipher handle @@ -276,7 +417,7 @@ static inline unsigned int crypto_skcipher_alg_ivsize(struct skcipher_alg *alg) */ static inline unsigned int crypto_skcipher_ivsize(struct crypto_skcipher *tfm) { - return crypto_skcipher_alg(tfm)->ivsize; + return crypto_skcipher_alg_common(tfm)->ivsize; } static inline unsigned int crypto_sync_skcipher_ivsize( @@ -285,6 +426,21 @@ static inline unsigned int crypto_sync_skcipher_ivsize( return crypto_skcipher_ivsize(&tfm->base); } +/** + * crypto_lskcipher_ivsize() - obtain IV size + * @tfm: cipher handle + * + * The size of the IV for the lskcipher referenced by the cipher handle is + * returned. This IV size may be zero if the cipher does not need an IV. + * + * Return: IV size in bytes + */ +static inline unsigned int crypto_lskcipher_ivsize( + struct crypto_lskcipher *tfm) +{ + return crypto_lskcipher_alg(tfm)->co.ivsize; +} + /** * crypto_skcipher_blocksize() - obtain block size of cipher * @tfm: cipher handle @@ -301,12 +457,34 @@ static inline unsigned int crypto_skcipher_blocksize( return crypto_tfm_alg_blocksize(crypto_skcipher_tfm(tfm)); } +/** + * crypto_lskcipher_blocksize() - obtain block size of cipher + * @tfm: cipher handle + * + * The block size for the lskcipher referenced with the cipher handle is + * returned. The caller may use that information to allocate appropriate + * memory for the data returned by the encryption or decryption operation + * + * Return: block size of cipher + */ +static inline unsigned int crypto_lskcipher_blocksize( + struct crypto_lskcipher *tfm) +{ + return crypto_tfm_alg_blocksize(crypto_lskcipher_tfm(tfm)); +} + static inline unsigned int crypto_skcipher_alg_chunksize( struct skcipher_alg *alg) { return alg->chunksize; } +static inline unsigned int crypto_lskcipher_alg_chunksize( + struct lskcipher_alg *alg) +{ + return alg->co.chunksize; +} + /** * crypto_skcipher_chunksize() - obtain chunk size * @tfm: cipher handle @@ -321,7 +499,24 @@ static inline unsigned int crypto_skcipher_alg_chunksize( static inline unsigned int crypto_skcipher_chunksize( struct crypto_skcipher *tfm) { - return crypto_skcipher_alg_chunksize(crypto_skcipher_alg(tfm)); + return crypto_skcipher_alg_common(tfm)->chunksize; +} + +/** + * crypto_lskcipher_chunksize() - obtain chunk size + * @tfm: cipher handle + * + * The block size is set to one for ciphers such as CTR. However, + * you still need to provide incremental updates in multiples of + * the underlying block size as the IV does not have sub-block + * granularity. This is known in this API as the chunk size. + * + * Return: chunk size in bytes + */ +static inline unsigned int crypto_lskcipher_chunksize( + struct crypto_lskcipher *tfm) +{ + return crypto_lskcipher_alg_chunksize(crypto_lskcipher_alg(tfm)); } static inline unsigned int crypto_sync_skcipher_blocksize( @@ -336,6 +531,12 @@ static inline unsigned int crypto_skcipher_alignmask( return crypto_tfm_alg_alignmask(crypto_skcipher_tfm(tfm)); } +static inline unsigned int crypto_lskcipher_alignmask( + struct crypto_lskcipher *tfm) +{ + return crypto_tfm_alg_alignmask(crypto_lskcipher_tfm(tfm)); +} + static inline u32 crypto_skcipher_get_flags(struct crypto_skcipher *tfm) { return crypto_tfm_get_flags(crypto_skcipher_tfm(tfm)); @@ -371,6 +572,23 @@ static inline void crypto_sync_skcipher_clear_flags( crypto_skcipher_clear_flags(&tfm->base, flags); } +static inline u32 crypto_lskcipher_get_flags(struct crypto_lskcipher *tfm) +{ + return crypto_tfm_get_flags(crypto_lskcipher_tfm(tfm)); +} + +static inline void crypto_lskcipher_set_flags(struct crypto_lskcipher *tfm, + u32 flags) +{ + crypto_tfm_set_flags(crypto_lskcipher_tfm(tfm), flags); +} + +static inline void crypto_lskcipher_clear_flags(struct crypto_lskcipher *tfm, + u32 flags) +{ + crypto_tfm_clear_flags(crypto_lskcipher_tfm(tfm), flags); +} + /** * crypto_skcipher_setkey() - set key for cipher * @tfm: cipher handle @@ -396,16 +614,47 @@ static inline int crypto_sync_skcipher_setkey(struct crypto_sync_skcipher *tfm, return crypto_skcipher_setkey(&tfm->base, key, keylen); } +/** + * crypto_lskcipher_setkey() - set key for cipher + * @tfm: cipher handle + * @key: buffer holding the key + * @keylen: length of the key in bytes + * + * The caller provided key is set for the lskcipher referenced by the cipher + * handle. + * + * Note, the key length determines the cipher type. Many block ciphers implement + * different cipher modes depending on the key size, such as AES-128 vs AES-192 + * vs. AES-256. When providing a 16 byte key for an AES cipher handle, AES-128 + * is performed. + * + * Return: 0 if the setting of the key was successful; < 0 if an error occurred + */ +int crypto_lskcipher_setkey(struct crypto_lskcipher *tfm, + const u8 *key, unsigned int keylen); + static inline unsigned int crypto_skcipher_min_keysize( struct crypto_skcipher *tfm) { - return crypto_skcipher_alg(tfm)->min_keysize; + return crypto_skcipher_alg_common(tfm)->min_keysize; } static inline unsigned int crypto_skcipher_max_keysize( struct crypto_skcipher *tfm) { - return crypto_skcipher_alg(tfm)->max_keysize; + return crypto_skcipher_alg_common(tfm)->max_keysize; +} + +static inline unsigned int crypto_lskcipher_min_keysize( + struct crypto_lskcipher *tfm) +{ + return crypto_lskcipher_alg(tfm)->co.min_keysize; +} + +static inline unsigned int crypto_lskcipher_max_keysize( + struct crypto_lskcipher *tfm) +{ + return crypto_lskcipher_alg(tfm)->co.max_keysize; } /** @@ -457,6 +706,42 @@ int crypto_skcipher_encrypt(struct skcipher_request *req); */ int crypto_skcipher_decrypt(struct skcipher_request *req); +/** + * crypto_lskcipher_encrypt() - encrypt plaintext + * @tfm: lskcipher handle + * @src: source buffer + * @dst: destination buffer + * @len: number of bytes to process + * @iv: IV for the cipher operation which must comply with the IV size defined + * by crypto_lskcipher_ivsize + * + * Encrypt plaintext data using the lskcipher handle. + * + * Return: >=0 if the cipher operation was successful, if positive + * then this many bytes have been left unprocessed; + * < 0 if an error occurred + */ +int crypto_lskcipher_encrypt(struct crypto_lskcipher *tfm, const u8 *src, + u8 *dst, unsigned len, u8 *iv); + +/** + * crypto_lskcipher_decrypt() - decrypt ciphertext + * @tfm: lskcipher handle + * @src: source buffer + * @dst: destination buffer + * @len: number of bytes to process + * @iv: IV for the cipher operation which must comply with the IV size defined + * by crypto_lskcipher_ivsize + * + * Decrypt ciphertext data using the lskcipher handle. + * + * Return: >=0 if the cipher operation was successful, if positive + * then this many bytes have been left unprocessed; + * < 0 if an error occurred + */ +int crypto_lskcipher_decrypt(struct crypto_lskcipher *tfm, const u8 *src, + u8 *dst, unsigned len, u8 *iv); + /** * DOC: Symmetric Key Cipher Request Handle * diff --git a/include/linux/crypto.h b/include/linux/crypto.h index a0780deb017a..f3c3a3b27fac 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -24,6 +24,7 @@ #define CRYPTO_ALG_TYPE_CIPHER 0x00000001 #define CRYPTO_ALG_TYPE_COMPRESS 0x00000002 #define CRYPTO_ALG_TYPE_AEAD 0x00000003 +#define CRYPTO_ALG_TYPE_LSKCIPHER 0x00000004 #define CRYPTO_ALG_TYPE_SKCIPHER 0x00000005 #define CRYPTO_ALG_TYPE_AKCIPHER 0x00000006 #define CRYPTO_ALG_TYPE_SIG 0x00000007 -- cgit v1.2.3 From 2a86f1b56a30e242caf7ee1268af68f4f49ce847 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Wed, 20 Sep 2023 14:26:29 +0800 Subject: kasan: Cleanup the __HAVE_ARCH_SHADOW_MAP usage As Linus suggested, __HAVE_ARCH_XYZ is "stupid" and "having historical uses of it doesn't make it good". So migrate __HAVE_ARCH_SHADOW_MAP to separate macros named after the respective functions. Suggested-by: Linus Torvalds Reviewed-by: WANG Xuerui Reviewed-by: Andrey Konovalov Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/kasan.h | 10 ++++++++-- include/linux/kasan.h | 2 +- mm/kasan/kasan.h | 8 +++----- 3 files changed, 12 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/arch/loongarch/include/asm/kasan.h b/arch/loongarch/include/asm/kasan.h index deeff8158f45..a12ecab37da7 100644 --- a/arch/loongarch/include/asm/kasan.h +++ b/arch/loongarch/include/asm/kasan.h @@ -10,8 +10,6 @@ #include #include -#define __HAVE_ARCH_SHADOW_MAP - #define KASAN_SHADOW_SCALE_SHIFT 3 #define KASAN_SHADOW_OFFSET _AC(CONFIG_KASAN_SHADOW_OFFSET, UL) @@ -68,6 +66,7 @@ static __always_inline bool kasan_arch_is_ready(void) return !kasan_early_stage; } +#define kasan_mem_to_shadow kasan_mem_to_shadow static inline void *kasan_mem_to_shadow(const void *addr) { if (!kasan_arch_is_ready()) { @@ -97,6 +96,7 @@ static inline void *kasan_mem_to_shadow(const void *addr) } } +#define kasan_shadow_to_mem kasan_shadow_to_mem static inline const void *kasan_shadow_to_mem(const void *shadow_addr) { unsigned long addr = (unsigned long)shadow_addr; @@ -119,6 +119,12 @@ static inline const void *kasan_shadow_to_mem(const void *shadow_addr) } } +#define addr_has_metadata addr_has_metadata +static __always_inline bool addr_has_metadata(const void *addr) +{ + return (kasan_mem_to_shadow((void *)addr) != NULL); +} + void kasan_init(void); asmlinkage void kasan_early_init(void); diff --git a/include/linux/kasan.h b/include/linux/kasan.h index 3df5499f7936..842623d708c2 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -54,7 +54,7 @@ extern p4d_t kasan_early_shadow_p4d[MAX_PTRS_PER_P4D]; int kasan_populate_early_shadow(const void *shadow_start, const void *shadow_end); -#ifndef __HAVE_ARCH_SHADOW_MAP +#ifndef kasan_mem_to_shadow static inline void *kasan_mem_to_shadow(const void *addr) { return (void *)((unsigned long)addr >> KASAN_SHADOW_SCALE_SHIFT) diff --git a/mm/kasan/kasan.h b/mm/kasan/kasan.h index f70e3d7a602e..d37831b8511c 100644 --- a/mm/kasan/kasan.h +++ b/mm/kasan/kasan.h @@ -291,7 +291,7 @@ struct kasan_stack_ring { #if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS) -#ifndef __HAVE_ARCH_SHADOW_MAP +#ifndef kasan_shadow_to_mem static inline const void *kasan_shadow_to_mem(const void *shadow_addr) { return (void *)(((unsigned long)shadow_addr - KASAN_SHADOW_OFFSET) @@ -299,15 +299,13 @@ static inline const void *kasan_shadow_to_mem(const void *shadow_addr) } #endif +#ifndef addr_has_metadata static __always_inline bool addr_has_metadata(const void *addr) { -#ifdef __HAVE_ARCH_SHADOW_MAP - return (kasan_mem_to_shadow((void *)addr) != NULL); -#else return (kasan_reset_tag(addr) >= kasan_shadow_to_mem((void *)KASAN_SHADOW_START)); -#endif } +#endif /** * kasan_check_range - Check memory region, and report if invalid access. -- cgit v1.2.3 From 653b7eb9d74426397c95061fd57da3063625af65 Mon Sep 17 00:00:00 2001 From: Erez Shitrit Date: Mon, 28 Aug 2023 14:20:00 +0300 Subject: net/mlx5: Bridge, Enable mcast in smfs steering mode In order to have mcast offloads the driver needs the following: It should know if that mcast comes from wire port, in addition the flow should not be marked as any specific source, that way it will give the flexibility for the driver not to be depended on the way iterator implemented in the FW. Signed-off-by: Erez Shitrit Reviewed-by: Moshe Shemesh Reviewed-by: Vlad Buslov Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_mcast.c | 11 ++--------- include/linux/mlx5/fs.h | 1 + 2 files changed, 3 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_mcast.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_mcast.c index 7a01714b3780..a7ed87e9d842 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_mcast.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_mcast.c @@ -78,6 +78,8 @@ mlx5_esw_bridge_mdb_flow_create(u16 esw_owner_vhca_id, struct mlx5_esw_bridge_md xa_for_each(&entry->ports, idx, port) { dests[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; dests[i].ft = port->mcast.ft; + if (port->vport_num == MLX5_VPORT_UPLINK) + dests[i].ft->flags |= MLX5_FLOW_TABLE_UPLINK_VPORT; i++; } @@ -585,10 +587,6 @@ mlx5_esw_bridge_mcast_vlan_flow_create(u16 vlan_proto, struct mlx5_esw_bridge_po if (!rule_spec) return ERR_PTR(-ENOMEM); - if (MLX5_CAP_ESW_FLOWTABLE(bridge->br_offloads->esw->dev, flow_source) && - port->vport_num == MLX5_VPORT_UPLINK) - rule_spec->flow_context.flow_source = - MLX5_FLOW_CONTEXT_FLOW_SOURCE_LOCAL_VPORT; rule_spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT; @@ -660,11 +658,6 @@ mlx5_esw_bridge_mcast_fwd_flow_create(struct mlx5_esw_bridge_port *port) if (!rule_spec) return ERR_PTR(-ENOMEM); - if (MLX5_CAP_ESW_FLOWTABLE(bridge->br_offloads->esw->dev, flow_source) && - port->vport_num == MLX5_VPORT_UPLINK) - rule_spec->flow_context.flow_source = - MLX5_FLOW_CONTEXT_FLOW_SOURCE_LOCAL_VPORT; - if (MLX5_CAP_ESW(bridge->br_offloads->esw->dev, merged_eswitch)) { dest.vport.flags = MLX5_FLOW_DEST_VPORT_VHCA_ID; dest.vport.vhca_id = port->esw_owner_vhca_id; diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h index 1e00c2436377..6f7725238abc 100644 --- a/include/linux/mlx5/fs.h +++ b/include/linux/mlx5/fs.h @@ -67,6 +67,7 @@ enum { MLX5_FLOW_TABLE_TERMINATION = BIT(2), MLX5_FLOW_TABLE_UNMANAGED = BIT(3), MLX5_FLOW_TABLE_OTHER_VPORT = BIT(4), + MLX5_FLOW_TABLE_UPLINK_VPORT = BIT(5), }; #define LEFTOVERS_RULE_NUM 2 -- cgit v1.2.3 From e0cc92fd945a9c8e43d4268cf1ea985d0e99a90f Mon Sep 17 00:00:00 2001 From: Moshe Shemesh Date: Thu, 3 Aug 2023 16:39:40 +0300 Subject: net/mlx5: Add a health error syndrome for pci data poisoned Add new health error syndrome to indicate that pci data poisoned error has been received while fetching device ICM data. Signed-off-by: Moshe Shemesh Reviewed-by: Shay Drory Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/health.c | 2 ++ include/linux/mlx5/mlx5_ifc.h | 1 + 2 files changed, 3 insertions(+) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c index 2fb2598b775e..1c220048ae9a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/health.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c @@ -365,6 +365,8 @@ static const char *hsynd_str(u8 synd) return "FFSER error"; case MLX5_INITIAL_SEG_HEALTH_SYNDROME_HIGH_TEMP_ERR: return "High temperature"; + case MLX5_INITIAL_SEG_HEALTH_SYNDROME_ICM_PCI_POISONED_ERR: + return "ICM fetch PCI data poisoned error"; default: return "unrecognized error"; } diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index dd8421d021cf..b23d8ff286a1 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -10574,6 +10574,7 @@ enum { MLX5_INITIAL_SEG_HEALTH_SYNDROME_EQ_INV = 0xe, MLX5_INITIAL_SEG_HEALTH_SYNDROME_FFSER_ERR = 0xf, MLX5_INITIAL_SEG_HEALTH_SYNDROME_HIGH_TEMP_ERR = 0x10, + MLX5_INITIAL_SEG_HEALTH_SYNDROME_ICM_PCI_POISONED_ERR = 0x12, }; struct mlx5_ifc_initial_seg_bits { -- cgit v1.2.3 From 6b596e62ed9f90c4a97e68ae1f7b1af5beeb3c05 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 8 Sep 2023 18:22:51 +0200 Subject: sched: Provide rt_mutex specific scheduler helpers With PREEMPT_RT there is a rt_mutex recursion problem where sched_submit_work() can use an rtlock (aka spinlock_t). More specifically what happens is: mutex_lock() /* really rt_mutex */ ... __rt_mutex_slowlock_locked() task_blocks_on_rt_mutex() // enqueue current task as waiter // do PI chain walk rt_mutex_slowlock_block() schedule() sched_submit_work() ... spin_lock() /* really rtlock */ ... __rt_mutex_slowlock_locked() task_blocks_on_rt_mutex() // enqueue current task as waiter *AGAIN* // *CONFUSION* Fix this by making rt_mutex do the sched_submit_work() early, before it enqueues itself as a waiter -- before it even knows *if* it will wait. [[ basically Thomas' patch but with different naming and a few asserts added ]] Originally-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20230908162254.999499-5-bigeasy@linutronix.de --- include/linux/sched.h | 3 +++ include/linux/sched/rt.h | 4 ++++ kernel/sched/core.c | 36 ++++++++++++++++++++++++++++++++---- 3 files changed, 39 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 77f01ac385f7..67623ffd4a8e 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -911,6 +911,9 @@ struct task_struct { * ->sched_remote_wakeup gets used, so it can be in this word. */ unsigned sched_remote_wakeup:1; +#ifdef CONFIG_RT_MUTEXES + unsigned sched_rt_mutex:1; +#endif /* Bit to tell LSMs we're in execve(): */ unsigned in_execve:1; diff --git a/include/linux/sched/rt.h b/include/linux/sched/rt.h index 994c25640e15..b2b9e6eb9683 100644 --- a/include/linux/sched/rt.h +++ b/include/linux/sched/rt.h @@ -30,6 +30,10 @@ static inline bool task_is_realtime(struct task_struct *tsk) } #ifdef CONFIG_RT_MUTEXES +extern void rt_mutex_pre_schedule(void); +extern void rt_mutex_schedule(void); +extern void rt_mutex_post_schedule(void); + /* * Must hold either p->pi_lock or task_rq(p)->lock. */ diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 1ea7ba53aad2..58d0346d1bb3 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -6723,9 +6723,6 @@ static inline void sched_submit_work(struct task_struct *tsk) static DEFINE_WAIT_OVERRIDE_MAP(sched_map, LD_WAIT_CONFIG); unsigned int task_flags; - if (task_is_running(tsk)) - return; - /* * Establish LD_WAIT_CONFIG context to ensure none of the code called * will use a blocking primitive -- which would lead to recursion. @@ -6783,7 +6780,12 @@ asmlinkage __visible void __sched schedule(void) { struct task_struct *tsk = current; - sched_submit_work(tsk); +#ifdef CONFIG_RT_MUTEXES + lockdep_assert(!tsk->sched_rt_mutex); +#endif + + if (!task_is_running(tsk)) + sched_submit_work(tsk); __schedule_loop(SM_NONE); sched_update_worker(tsk); } @@ -7044,6 +7046,32 @@ static void __setscheduler_prio(struct task_struct *p, int prio) #ifdef CONFIG_RT_MUTEXES +/* + * Would be more useful with typeof()/auto_type but they don't mix with + * bit-fields. Since it's a local thing, use int. Keep the generic sounding + * name such that if someone were to implement this function we get to compare + * notes. + */ +#define fetch_and_set(x, v) ({ int _x = (x); (x) = (v); _x; }) + +void rt_mutex_pre_schedule(void) +{ + lockdep_assert(!fetch_and_set(current->sched_rt_mutex, 1)); + sched_submit_work(current); +} + +void rt_mutex_schedule(void) +{ + lockdep_assert(current->sched_rt_mutex); + __schedule_loop(SM_NONE); +} + +void rt_mutex_post_schedule(void) +{ + sched_update_worker(current); + lockdep_assert(fetch_and_set(current->sched_rt_mutex, 0)); +} + static inline int __rt_effective_prio(struct task_struct *pi_task, int prio) { if (pi_task) -- cgit v1.2.3 From 6d2779ecaeb56f92d7105c56772346c71c88c278 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 19 Sep 2023 18:14:29 +0100 Subject: locking/atomic: scripts: fix fallback ifdeffery Since commit: 9257959a6e5b4fca ("locking/atomic: scripts: restructure fallback ifdeffery") The ordering fallbacks for atomic*_read_acquire() and atomic*_set_release() erroneously fall back to the implictly relaxed atomic*_read() and atomic*_set() variants respectively, without any additional barriers. This loses the ACQUIRE and RELEASE ordering semantics, which can result in a wide variety of problems, even on strongly-ordered architectures where the implementation of atomic*_read() and/or atomic*_set() allows the compiler to reorder those relative to other accesses. In practice this has been observed to break bit spinlocks on arm64, resulting in dentry cache corruption. The fallback logic was intended to allow ACQUIRE/RELEASE/RELAXED ops to be defined in terms of FULL ops, but where an op had RELAXED ordering by default, this unintentionally permitted the ACQUIRE/RELEASE ops to be defined in terms of the implicitly RELAXED default. This patch corrects the logic to avoid falling back to implicitly RELAXED ops, resulting in the same behaviour as prior to commit 9257959a6e5b4fca. I've verified the resulting assembly on arm64 by generating outlined wrappers of the atomics. Prior to this patch the compiler generates sequences using relaxed load (LDR) and store (STR) instructions, e.g. | : | ldr x0, [x0] | ret | | : | str x1, [x0] | ret With this patch applied the compiler generates sequences using the intended load-acquire (LDAR) and store-release (STLR) instructions, e.g. | : | ldar x0, [x0] | ret | | : | stlr x1, [x0] | ret To make sure that there were no other victims of the ifdeffery rewrite, I generated outlined copies of all of the {atomic,atomic64,atomic_long} atomic operations before and after commit 9257959a6e5b4fca. A diff of the generated assembly on arm64 shows that only the read_acquire() and set_release() operations were changed, and only lost their intended ordering: | [mark@lakrids:~/src/linux]% diff -u \ | <(aarch64-linux-gnu-objdump -d before-9257959a6e5b4fca.o) | <(aarch64-linux-gnu-objdump -d after-9257959a6e5b4fca.o) | --- /proc/self/fd/11 2023-09-19 16:51:51.114779415 +0100 | +++ /proc/self/fd/16 2023-09-19 16:51:51.114779415 +0100 | @@ -1,5 +1,5 @@ | | -before-9257959a6e5b4fca.o: file format elf64-littleaarch64 | +after-9257959a6e5b4fca.o: file format elf64-littleaarch64 | | | Disassembly of section .text: | @@ -9,7 +9,7 @@ | 4: d65f03c0 ret | | 0000000000000008 : | - 8: 88dffc00 ldar w0, [x0] | + 8: b9400000 ldr w0, [x0] | c: d65f03c0 ret | | 0000000000000010 : | @@ -17,7 +17,7 @@ | 14: d65f03c0 ret | | 0000000000000018 : | - 18: 889ffc01 stlr w1, [x0] | + 18: b9000001 str w1, [x0] | 1c: d65f03c0 ret | | 0000000000000020 : | @@ -1230,7 +1230,7 @@ | 1070: d65f03c0 ret | | 0000000000001074 : | - 1074: c8dffc00 ldar x0, [x0] | + 1074: f9400000 ldr x0, [x0] | 1078: d65f03c0 ret | | 000000000000107c : | @@ -1238,7 +1238,7 @@ | 1080: d65f03c0 ret | | 0000000000001084 : | - 1084: c89ffc01 stlr x1, [x0] | + 1084: f9000001 str x1, [x0] | 1088: d65f03c0 ret | | 000000000000108c : | @@ -2427,7 +2427,7 @@ | 207c: d65f03c0 ret | | 0000000000002080 : | - 2080: c8dffc00 ldar x0, [x0] | + 2080: f9400000 ldr x0, [x0] | 2084: d65f03c0 ret | | 0000000000002088 : | @@ -2435,7 +2435,7 @@ | 208c: d65f03c0 ret | | 0000000000002090 : | - 2090: c89ffc01 stlr x1, [x0] | + 2090: f9000001 str x1, [x0] | 2094: d65f03c0 ret | | 0000000000002098 : I've build tested this with a variety of configs for alpha, arm, arm64, csky, i386, m68k, microblaze, mips, nios2, openrisc, powerpc, riscv, s390, sh, sparc, x86_64, and xtensa, for which I've seen no issues. I was unable to build test for ia64 and parisc due to existing build breakage in v6.6-rc2. Fixes: 9257959a6e5b4fca ("locking/atomic: scripts: restructure fallback ifdeffery") Reported-by: Ming Lei Reported-by: Darrick J. Wong Signed-off-by: Mark Rutland Signed-off-by: Peter Zijlstra (Intel) Tested-by: Baokun Li Link: https://lkml.kernel.org/r/20230919171430.2697727-1-mark.rutland@arm.com --- include/linux/atomic/atomic-arch-fallback.h | 10 +--------- scripts/atomic/gen-atomic-fallback.sh | 2 +- 2 files changed, 2 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/atomic/atomic-arch-fallback.h b/include/linux/atomic/atomic-arch-fallback.h index 18f5744dfb5d..b83ef19da13d 100644 --- a/include/linux/atomic/atomic-arch-fallback.h +++ b/include/linux/atomic/atomic-arch-fallback.h @@ -459,8 +459,6 @@ raw_atomic_read_acquire(const atomic_t *v) { #if defined(arch_atomic_read_acquire) return arch_atomic_read_acquire(v); -#elif defined(arch_atomic_read) - return arch_atomic_read(v); #else int ret; @@ -508,8 +506,6 @@ raw_atomic_set_release(atomic_t *v, int i) { #if defined(arch_atomic_set_release) arch_atomic_set_release(v, i); -#elif defined(arch_atomic_set) - arch_atomic_set(v, i); #else if (__native_word(atomic_t)) { smp_store_release(&(v)->counter, i); @@ -2575,8 +2571,6 @@ raw_atomic64_read_acquire(const atomic64_t *v) { #if defined(arch_atomic64_read_acquire) return arch_atomic64_read_acquire(v); -#elif defined(arch_atomic64_read) - return arch_atomic64_read(v); #else s64 ret; @@ -2624,8 +2618,6 @@ raw_atomic64_set_release(atomic64_t *v, s64 i) { #if defined(arch_atomic64_set_release) arch_atomic64_set_release(v, i); -#elif defined(arch_atomic64_set) - arch_atomic64_set(v, i); #else if (__native_word(atomic64_t)) { smp_store_release(&(v)->counter, i); @@ -4657,4 +4649,4 @@ raw_atomic64_dec_if_positive(atomic64_t *v) } #endif /* _LINUX_ATOMIC_FALLBACK_H */ -// 202b45c7db600ce36198eb1f1fc2c2d5268ace2d +// 2fdd6702823fa842f9cea57a002e6e4476ae780c diff --git a/scripts/atomic/gen-atomic-fallback.sh b/scripts/atomic/gen-atomic-fallback.sh index c0c8a85d7c81..a45154cefa48 100755 --- a/scripts/atomic/gen-atomic-fallback.sh +++ b/scripts/atomic/gen-atomic-fallback.sh @@ -102,7 +102,7 @@ gen_proto_order_variant() fi # Allow ACQUIRE/RELEASE/RELAXED ops to be defined in terms of FULL ops - if [ ! -z "${order}" ]; then + if [ ! -z "${order}" ] && ! meta_is_implicitly_relaxed "${meta}"; then printf "#elif defined(arch_${basename})\n" printf "\t${retstmt}arch_${basename}(${args});\n" fi -- cgit v1.2.3 From 03a95cf233b5bdd65ddd4eca63cd4874b6b84d50 Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Sat, 26 Aug 2023 13:53:03 +0100 Subject: firmware: arm_scmi: Simplify enable/disable clock operations SCMI clock enable/disable operations come in 2 different flavours which simply just differ in how the underlying SCMI transactions is carried on: atomic or not. Currently we expose such SCMI operations through 2 distinctly named wrappers, that, in turn, are wrapped into another couple of similarly and distinctly named callbacks inside SCMI clock driver user. Reduce the churn of duplicated wrappers by adding a param to SCMI clock enable/disable operations to ask for atomic operation while removing the _atomic version of such operations. No functional change. CC: Michael Turquette CC: Stephen Boyd CC: linux-clk@vger.kernel.org Signed-off-by: Cristian Marussi Acked-by: Stephen Boyd Link: https://lore.kernel.org/r/20230826125308.462328-2-cristian.marussi@arm.com Signed-off-by: Sudeep Holla --- drivers/clk/clk-scmi.c | 11 +++++++---- drivers/firmware/arm_scmi/clock.c | 24 ++++++------------------ include/linux/scmi_protocol.h | 9 ++++----- 3 files changed, 17 insertions(+), 27 deletions(-) (limited to 'include/linux') diff --git a/drivers/clk/clk-scmi.c b/drivers/clk/clk-scmi.c index 2c7a830ce308..b7a180b3443e 100644 --- a/drivers/clk/clk-scmi.c +++ b/drivers/clk/clk-scmi.c @@ -13,6 +13,9 @@ #include #include +#define NOT_ATOMIC false +#define ATOMIC true + static const struct scmi_clk_proto_ops *scmi_proto_clk_ops; struct scmi_clk { @@ -78,28 +81,28 @@ static int scmi_clk_enable(struct clk_hw *hw) { struct scmi_clk *clk = to_scmi_clk(hw); - return scmi_proto_clk_ops->enable(clk->ph, clk->id); + return scmi_proto_clk_ops->enable(clk->ph, clk->id, NOT_ATOMIC); } static void scmi_clk_disable(struct clk_hw *hw) { struct scmi_clk *clk = to_scmi_clk(hw); - scmi_proto_clk_ops->disable(clk->ph, clk->id); + scmi_proto_clk_ops->disable(clk->ph, clk->id, NOT_ATOMIC); } static int scmi_clk_atomic_enable(struct clk_hw *hw) { struct scmi_clk *clk = to_scmi_clk(hw); - return scmi_proto_clk_ops->enable_atomic(clk->ph, clk->id); + return scmi_proto_clk_ops->enable(clk->ph, clk->id, ATOMIC); } static void scmi_clk_atomic_disable(struct clk_hw *hw) { struct scmi_clk *clk = to_scmi_clk(hw); - scmi_proto_clk_ops->disable_atomic(clk->ph, clk->id); + scmi_proto_clk_ops->disable(clk->ph, clk->id, ATOMIC); } /* diff --git a/drivers/firmware/arm_scmi/clock.c b/drivers/firmware/arm_scmi/clock.c index 96060bf90a24..447d29b5fc72 100644 --- a/drivers/firmware/arm_scmi/clock.c +++ b/drivers/firmware/arm_scmi/clock.c @@ -418,26 +418,16 @@ scmi_clock_config_set(const struct scmi_protocol_handle *ph, u32 clk_id, return ret; } -static int scmi_clock_enable(const struct scmi_protocol_handle *ph, u32 clk_id) +static int scmi_clock_enable(const struct scmi_protocol_handle *ph, u32 clk_id, + bool atomic) { - return scmi_clock_config_set(ph, clk_id, CLOCK_ENABLE, false); + return scmi_clock_config_set(ph, clk_id, CLOCK_ENABLE, atomic); } -static int scmi_clock_disable(const struct scmi_protocol_handle *ph, u32 clk_id) +static int scmi_clock_disable(const struct scmi_protocol_handle *ph, u32 clk_id, + bool atomic) { - return scmi_clock_config_set(ph, clk_id, 0, false); -} - -static int scmi_clock_enable_atomic(const struct scmi_protocol_handle *ph, - u32 clk_id) -{ - return scmi_clock_config_set(ph, clk_id, CLOCK_ENABLE, true); -} - -static int scmi_clock_disable_atomic(const struct scmi_protocol_handle *ph, - u32 clk_id) -{ - return scmi_clock_config_set(ph, clk_id, 0, true); + return scmi_clock_config_set(ph, clk_id, 0, atomic); } static int scmi_clock_count_get(const struct scmi_protocol_handle *ph) @@ -470,8 +460,6 @@ static const struct scmi_clk_proto_ops clk_proto_ops = { .rate_set = scmi_clock_rate_set, .enable = scmi_clock_enable, .disable = scmi_clock_disable, - .enable_atomic = scmi_clock_enable_atomic, - .disable_atomic = scmi_clock_disable_atomic, }; static int scmi_clk_rate_notify(const struct scmi_protocol_handle *ph, diff --git a/include/linux/scmi_protocol.h b/include/linux/scmi_protocol.h index e6fe4f73ffe6..b4c631a8d0ac 100644 --- a/include/linux/scmi_protocol.h +++ b/include/linux/scmi_protocol.h @@ -90,11 +90,10 @@ struct scmi_clk_proto_ops { u64 *rate); int (*rate_set)(const struct scmi_protocol_handle *ph, u32 clk_id, u64 rate); - int (*enable)(const struct scmi_protocol_handle *ph, u32 clk_id); - int (*disable)(const struct scmi_protocol_handle *ph, u32 clk_id); - int (*enable_atomic)(const struct scmi_protocol_handle *ph, u32 clk_id); - int (*disable_atomic)(const struct scmi_protocol_handle *ph, - u32 clk_id); + int (*enable)(const struct scmi_protocol_handle *ph, u32 clk_id, + bool atomic); + int (*disable)(const struct scmi_protocol_handle *ph, u32 clk_id, + bool atomic); }; /** -- cgit v1.2.3 From 34592bf0a5cb0681ce3d64de5598951768f43328 Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Sat, 26 Aug 2023 13:53:05 +0100 Subject: firmware: arm_scmi: Add v3.2 clock CONFIG_GET support Add support for v3.2 clock CONFIG_GET command and related new clock protocol operation state_get() to retrieve the status of a clock. Signed-off-by: Cristian Marussi Link: https://lore.kernel.org/r/20230826125308.462328-4-cristian.marussi@arm.com Signed-off-by: Sudeep Holla --- drivers/firmware/arm_scmi/clock.c | 64 +++++++++++++++++++++++++++++++++++++++ include/linux/scmi_protocol.h | 3 ++ 2 files changed, 67 insertions(+) (limited to 'include/linux') diff --git a/drivers/firmware/arm_scmi/clock.c b/drivers/firmware/arm_scmi/clock.c index 63bd043365cd..aaa95624493d 100644 --- a/drivers/firmware/arm_scmi/clock.c +++ b/drivers/firmware/arm_scmi/clock.c @@ -21,6 +21,7 @@ enum scmi_clock_protocol_cmd { CLOCK_NAME_GET = 0x8, CLOCK_RATE_NOTIFY = 0x9, CLOCK_RATE_CHANGE_REQUESTED_NOTIFY = 0xA, + CLOCK_CONFIG_GET = 0xB, }; enum clk_state { @@ -59,6 +60,19 @@ struct scmi_msg_clock_config_set_v21 { __le32 oem_config_val; }; +struct scmi_msg_clock_config_get { + __le32 id; + __le32 flags; +#define REGMASK_OEM_TYPE_GET GENMASK(7, 0) +}; + +struct scmi_msg_resp_clock_config_get { + __le32 attributes; + __le32 config; +#define IS_CLK_ENABLED(x) le32_get_bits((x), BIT(0)) + __le32 oem_config_val; +}; + struct scmi_msg_clock_describe_rates { __le32 id; __le32 rate_index; @@ -496,6 +510,55 @@ static int scmi_clock_disable(const struct scmi_protocol_handle *ph, u32 clk_id, NULL_OEM_TYPE, 0, atomic); } +static int +scmi_clock_config_get(const struct scmi_protocol_handle *ph, u32 clk_id, + u8 oem_type, u32 *attributes, bool *enabled, + u32 *oem_val, bool atomic) +{ + int ret; + u32 flags; + struct scmi_xfer *t; + struct scmi_msg_clock_config_get *cfg; + + ret = ph->xops->xfer_get_init(ph, CLOCK_CONFIG_GET, + sizeof(*cfg), 0, &t); + if (ret) + return ret; + + t->hdr.poll_completion = atomic; + + flags = FIELD_PREP(REGMASK_OEM_TYPE_GET, oem_type); + + cfg = t->tx.buf; + cfg->id = cpu_to_le32(clk_id); + cfg->flags = cpu_to_le32(flags); + + ret = ph->xops->do_xfer(ph, t); + if (!ret) { + struct scmi_msg_resp_clock_config_get *resp = t->rx.buf; + + if (attributes) + *attributes = le32_to_cpu(resp->attributes); + + if (enabled) + *enabled = IS_CLK_ENABLED(resp->config); + + if (oem_val && oem_type) + *oem_val = le32_to_cpu(resp->oem_config_val); + } + + ph->xops->xfer_put(ph, t); + + return ret; +} + +static int scmi_clock_state_get(const struct scmi_protocol_handle *ph, + u32 clk_id, bool *enabled, bool atomic) +{ + return scmi_clock_config_get(ph, clk_id, NULL_OEM_TYPE, NULL, + enabled, NULL, atomic); +} + static int scmi_clock_count_get(const struct scmi_protocol_handle *ph) { struct clock_info *ci = ph->get_priv(ph); @@ -526,6 +589,7 @@ static const struct scmi_clk_proto_ops clk_proto_ops = { .rate_set = scmi_clock_rate_set, .enable = scmi_clock_enable, .disable = scmi_clock_disable, + .state_get = scmi_clock_state_get, }; static int scmi_clk_rate_notify(const struct scmi_protocol_handle *ph, diff --git a/include/linux/scmi_protocol.h b/include/linux/scmi_protocol.h index b4c631a8d0ac..d11ca4286d57 100644 --- a/include/linux/scmi_protocol.h +++ b/include/linux/scmi_protocol.h @@ -80,6 +80,7 @@ struct scmi_protocol_handle; * @rate_set: set the clock rate of a clock * @enable: enables the specified clock * @disable: disables the specified clock + * @state_get: get the status of the specified clock */ struct scmi_clk_proto_ops { int (*count_get)(const struct scmi_protocol_handle *ph); @@ -94,6 +95,8 @@ struct scmi_clk_proto_ops { bool atomic); int (*disable)(const struct scmi_protocol_handle *ph, u32 clk_id, bool atomic); + int (*state_get)(const struct scmi_protocol_handle *ph, u32 clk_id, + bool *enabled, bool atomic); }; /** -- cgit v1.2.3 From 141b4fa0362569138653cf0165d92d48576db3fa Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Sat, 26 Aug 2023 13:53:08 +0100 Subject: firmware: arm_scmi: Add clock OEM config clock operations Expose a couple of new SCMI clock operations to get and set OEM specific clock configurations when talking to an SCMI v3.2 compliant. Issuing such requests against an SCMI platform server not supporting v3.2 extension for OEM specific clock configurations will fail. Signed-off-by: Cristian Marussi Link: https://lore.kernel.org/r/20230826125308.462328-7-cristian.marussi@arm.com Signed-off-by: Sudeep Holla --- drivers/firmware/arm_scmi/clock.c | 22 ++++++++++++++++++++++ include/linux/scmi_protocol.h | 7 +++++++ 2 files changed, 29 insertions(+) (limited to 'include/linux') diff --git a/drivers/firmware/arm_scmi/clock.c b/drivers/firmware/arm_scmi/clock.c index 333d08822f77..d18bf789fc24 100644 --- a/drivers/firmware/arm_scmi/clock.c +++ b/drivers/firmware/arm_scmi/clock.c @@ -594,6 +594,26 @@ static int scmi_clock_state_get(const struct scmi_protocol_handle *ph, enabled, NULL, atomic); } +static int scmi_clock_config_oem_set(const struct scmi_protocol_handle *ph, + u32 clk_id, u8 oem_type, u32 oem_val, + bool atomic) +{ + struct clock_info *ci = ph->get_priv(ph); + + return ci->clock_config_set(ph, clk_id, CLK_STATE_UNCHANGED, + oem_type, oem_val, atomic); +} + +static int scmi_clock_config_oem_get(const struct scmi_protocol_handle *ph, + u32 clk_id, u8 oem_type, u32 *oem_val, + u32 *attributes, bool atomic) +{ + struct clock_info *ci = ph->get_priv(ph); + + return ci->clock_config_get(ph, clk_id, oem_type, attributes, + NULL, oem_val, atomic); +} + static int scmi_clock_count_get(const struct scmi_protocol_handle *ph) { struct clock_info *ci = ph->get_priv(ph); @@ -625,6 +645,8 @@ static const struct scmi_clk_proto_ops clk_proto_ops = { .enable = scmi_clock_enable, .disable = scmi_clock_disable, .state_get = scmi_clock_state_get, + .config_oem_get = scmi_clock_config_oem_get, + .config_oem_set = scmi_clock_config_oem_set, }; static int scmi_clk_rate_notify(const struct scmi_protocol_handle *ph, diff --git a/include/linux/scmi_protocol.h b/include/linux/scmi_protocol.h index d11ca4286d57..e09ac428fa1b 100644 --- a/include/linux/scmi_protocol.h +++ b/include/linux/scmi_protocol.h @@ -81,6 +81,8 @@ struct scmi_protocol_handle; * @enable: enables the specified clock * @disable: disables the specified clock * @state_get: get the status of the specified clock + * @config_oem_get: get the value of an OEM specific clock config + * @config_oem_set: set the value of an OEM specific clock config */ struct scmi_clk_proto_ops { int (*count_get)(const struct scmi_protocol_handle *ph); @@ -97,6 +99,11 @@ struct scmi_clk_proto_ops { bool atomic); int (*state_get)(const struct scmi_protocol_handle *ph, u32 clk_id, bool *enabled, bool atomic); + int (*config_oem_get)(const struct scmi_protocol_handle *ph, u32 clk_id, + u8 oem_type, u32 *oem_val, u32 *attributes, + bool atomic); + int (*config_oem_set)(const struct scmi_protocol_handle *ph, u32 clk_id, + u8 oem_type, u32 oem_val, bool atomic); }; /** -- cgit v1.2.3 From 647aa768281f38cb1002edb3a1f673c3d66a8d81 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Wed, 20 Sep 2023 16:40:13 +0200 Subject: Revert "fs: add infrastructure for multigrain timestamps" This reverts commit ffb6cf19e06334062744b7e3493f71e500964f8e. Users reported regressions due to enabling multi-grained timestamps unconditionally. As no clear consensus on a solution has come up and the discussion has gone back to the drawing board revert the infrastructure changes for. If it isn't code that's here to stay, make it go away. Message-ID: <20230920-keine-eile-c9755b5825db@brauner> Acked-by: Jan Kara Acked-by: Jeff Layton Signed-off-by: Christian Brauner --- fs/inode.c | 82 ++---------------------------------------------------- fs/stat.c | 41 ++------------------------- include/linux/fs.h | 46 ++---------------------------- 3 files changed, 7 insertions(+), 162 deletions(-) (limited to 'include/linux') diff --git a/fs/inode.c b/fs/inode.c index 35fd688168c5..84bc3c76e5cc 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -2102,52 +2102,10 @@ int file_remove_privs(struct file *file) } EXPORT_SYMBOL(file_remove_privs); -/** - * current_mgtime - Return FS time (possibly fine-grained) - * @inode: inode. - * - * Return the current time truncated to the time granularity supported by - * the fs, as suitable for a ctime/mtime change. If the ctime is flagged - * as having been QUERIED, get a fine-grained timestamp. - */ -struct timespec64 current_mgtime(struct inode *inode) -{ - struct timespec64 now, ctime; - atomic_long_t *pnsec = (atomic_long_t *)&inode->__i_ctime.tv_nsec; - long nsec = atomic_long_read(pnsec); - - if (nsec & I_CTIME_QUERIED) { - ktime_get_real_ts64(&now); - return timestamp_truncate(now, inode); - } - - ktime_get_coarse_real_ts64(&now); - now = timestamp_truncate(now, inode); - - /* - * If we've recently fetched a fine-grained timestamp - * then the coarse-grained one may still be earlier than the - * existing ctime. Just keep the existing value if so. - */ - ctime = inode_get_ctime(inode); - if (timespec64_compare(&ctime, &now) > 0) - now = ctime; - - return now; -} -EXPORT_SYMBOL(current_mgtime); - -static struct timespec64 current_ctime(struct inode *inode) -{ - if (is_mgtime(inode)) - return current_mgtime(inode); - return current_time(inode); -} - static int inode_needs_update_time(struct inode *inode) { int sync_it = 0; - struct timespec64 now = current_ctime(inode); + struct timespec64 now = current_time(inode); struct timespec64 ctime; /* First try to exhaust all avenues to not sync */ @@ -2578,43 +2536,9 @@ EXPORT_SYMBOL(current_time); */ struct timespec64 inode_set_ctime_current(struct inode *inode) { - struct timespec64 now; - struct timespec64 ctime; - - ctime.tv_nsec = READ_ONCE(inode->__i_ctime.tv_nsec); - if (!(ctime.tv_nsec & I_CTIME_QUERIED)) { - now = current_time(inode); + struct timespec64 now = current_time(inode); - /* Just copy it into place if it's not multigrain */ - if (!is_mgtime(inode)) { - inode_set_ctime_to_ts(inode, now); - return now; - } - - /* - * If we've recently updated with a fine-grained timestamp, - * then the coarse-grained one may still be earlier than the - * existing ctime. Just keep the existing value if so. - */ - ctime.tv_sec = inode->__i_ctime.tv_sec; - if (timespec64_compare(&ctime, &now) > 0) - return ctime; - - /* - * Ctime updates are usually protected by the inode_lock, but - * we can still race with someone setting the QUERIED flag. - * Try to swap the new nsec value into place. If it's changed - * in the interim, then just go with a fine-grained timestamp. - */ - if (cmpxchg(&inode->__i_ctime.tv_nsec, ctime.tv_nsec, - now.tv_nsec) != ctime.tv_nsec) - goto fine_grained; - inode->__i_ctime.tv_sec = now.tv_sec; - return now; - } -fine_grained: - ktime_get_real_ts64(&now); - inode_set_ctime_to_ts(inode, timestamp_truncate(now, inode)); + inode_set_ctime(inode, now.tv_sec, now.tv_nsec); return now; } EXPORT_SYMBOL(inode_set_ctime_current); diff --git a/fs/stat.c b/fs/stat.c index 6e60389d6a15..d43a5cc1bfa4 100644 --- a/fs/stat.c +++ b/fs/stat.c @@ -26,37 +26,6 @@ #include "internal.h" #include "mount.h" -/** - * fill_mg_cmtime - Fill in the mtime and ctime and flag ctime as QUERIED - * @stat: where to store the resulting values - * @request_mask: STATX_* values requested - * @inode: inode from which to grab the c/mtime - * - * Given @inode, grab the ctime and mtime out if it and store the result - * in @stat. When fetching the value, flag it as queried so the next write - * will use a fine-grained timestamp. - */ -void fill_mg_cmtime(struct kstat *stat, u32 request_mask, struct inode *inode) -{ - atomic_long_t *pnsec = (atomic_long_t *)&inode->__i_ctime.tv_nsec; - - /* If neither time was requested, then don't report them */ - if (!(request_mask & (STATX_CTIME|STATX_MTIME))) { - stat->result_mask &= ~(STATX_CTIME|STATX_MTIME); - return; - } - - stat->mtime = inode->i_mtime; - stat->ctime.tv_sec = inode->__i_ctime.tv_sec; - /* - * Atomically set the QUERIED flag and fetch the new value with - * the flag masked off. - */ - stat->ctime.tv_nsec = atomic_long_fetch_or(I_CTIME_QUERIED, pnsec) & - ~I_CTIME_QUERIED; -} -EXPORT_SYMBOL(fill_mg_cmtime); - /** * generic_fillattr - Fill in the basic attributes from the inode struct * @idmap: idmap of the mount the inode was found from @@ -89,14 +58,8 @@ void generic_fillattr(struct mnt_idmap *idmap, u32 request_mask, stat->rdev = inode->i_rdev; stat->size = i_size_read(inode); stat->atime = inode->i_atime; - - if (is_mgtime(inode)) { - fill_mg_cmtime(stat, request_mask, inode); - } else { - stat->mtime = inode->i_mtime; - stat->ctime = inode_get_ctime(inode); - } - + stat->mtime = inode->i_mtime; + stat->ctime = inode_get_ctime(inode); stat->blksize = i_blocksize(inode); stat->blocks = inode->i_blocks; diff --git a/include/linux/fs.h b/include/linux/fs.h index 4aeb3fa11927..b528f063e8ff 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1508,47 +1508,18 @@ static inline bool fsuidgid_has_mapping(struct super_block *sb, kgid_has_mapping(fs_userns, kgid); } -struct timespec64 current_mgtime(struct inode *inode); struct timespec64 current_time(struct inode *inode); struct timespec64 inode_set_ctime_current(struct inode *inode); -/* - * Multigrain timestamps - * - * Conditionally use fine-grained ctime and mtime timestamps when there - * are users actively observing them via getattr. The primary use-case - * for this is NFS clients that use the ctime to distinguish between - * different states of the file, and that are often fooled by multiple - * operations that occur in the same coarse-grained timer tick. - * - * The kernel always keeps normalized struct timespec64 values in the ctime, - * which means that only the first 30 bits of the value are used. Use the - * 31st bit of the ctime's tv_nsec field as a flag to indicate that the value - * has been queried since it was last updated. - */ -#define I_CTIME_QUERIED (1L<<30) - /** * inode_get_ctime - fetch the current ctime from the inode * @inode: inode from which to fetch ctime * - * Grab the current ctime tv_nsec field from the inode, mask off the - * I_CTIME_QUERIED flag and return it. This is mostly intended for use by - * internal consumers of the ctime that aren't concerned with ensuring a - * fine-grained update on the next change (e.g. when preparing to store - * the value in the backing store for later retrieval). - * - * This is safe to call regardless of whether the underlying filesystem - * is using multigrain timestamps. + * Grab the current ctime from the inode and return it. */ static inline struct timespec64 inode_get_ctime(const struct inode *inode) { - struct timespec64 ctime; - - ctime.tv_sec = inode->__i_ctime.tv_sec; - ctime.tv_nsec = inode->__i_ctime.tv_nsec & ~I_CTIME_QUERIED; - - return ctime; + return inode->__i_ctime; } /** @@ -2334,7 +2305,6 @@ struct file_system_type { #define FS_USERNS_MOUNT 8 /* Can be mounted by userns root */ #define FS_DISALLOW_NOTIFY_PERM 16 /* Disable fanotify permission events */ #define FS_ALLOW_IDMAP 32 /* FS has been updated to handle vfs idmappings. */ -#define FS_MGTIME 64 /* FS uses multigrain timestamps */ #define FS_RENAME_DOES_D_MOVE 32768 /* FS will handle d_move() during rename() internally. */ int (*init_fs_context)(struct fs_context *); const struct fs_parameter_spec *parameters; @@ -2358,17 +2328,6 @@ struct file_system_type { #define MODULE_ALIAS_FS(NAME) MODULE_ALIAS("fs-" NAME) -/** - * is_mgtime: is this inode using multigrain timestamps - * @inode: inode to test for multigrain timestamps - * - * Return true if the inode uses multigrain timestamps, false otherwise. - */ -static inline bool is_mgtime(const struct inode *inode) -{ - return inode->i_sb->s_type->fs_flags & FS_MGTIME; -} - extern struct dentry *mount_bdev(struct file_system_type *fs_type, int flags, const char *dev_name, void *data, int (*fill_super)(struct super_block *, void *, int)); @@ -3054,7 +3013,6 @@ extern void page_put_link(void *); extern int page_symlink(struct inode *inode, const char *symname, int len); extern const struct inode_operations page_symlink_inode_operations; extern void kfree_link(void *); -void fill_mg_cmtime(struct kstat *stat, u32 request_mask, struct inode *inode); void generic_fillattr(struct mnt_idmap *, u32, struct inode *, struct kstat *); void generic_fill_statx_attr(struct inode *inode, struct kstat *stat); extern int vfs_getattr_nosec(const struct path *, struct kstat *, u32, unsigned int); -- cgit v1.2.3 From 41b43b6c6e30a832c790b010a06772e793bca193 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Wed, 20 Sep 2023 12:46:27 +0200 Subject: locking/seqlock: Do the lockdep annotation before locking in do_write_seqcount_begin_nested() It was brought up by Tetsuo that the following sequence: write_seqlock_irqsave() printk_deferred_enter() could lead to a deadlock if the lockdep annotation within write_seqlock_irqsave() triggers. The problem is that the sequence counter is incremented before the lockdep annotation is performed. The lockdep splat would then attempt to invoke printk() but the reader side, of the same seqcount, could have a tty_port::lock acquired waiting for the sequence number to become even again. The other lockdep annotations come before the actual locking because "we want to see the locking error before it happens". There is no reason why seqcount should be different here. Do the lockdep annotation first then perform the locking operation (the sequence increment). Fixes: 1ca7d67cf5d5a ("seqcount: Add lockdep functionality to seqcount/seqlock structures") Reported-by: Tetsuo Handa Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20230920104627._DTHgPyA@linutronix.de Closes: https://lore.kernel.org/20230621130641.-5iueY1I@linutronix.de --- include/linux/seqlock.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h index 987a59d977c5..e9bd2f65d7f4 100644 --- a/include/linux/seqlock.h +++ b/include/linux/seqlock.h @@ -512,8 +512,8 @@ do { \ static inline void do_write_seqcount_begin_nested(seqcount_t *s, int subclass) { - do_raw_write_seqcount_begin(s); seqcount_acquire(&s->dep_map, subclass, 0, _RET_IP_); + do_raw_write_seqcount_begin(s); } /** -- cgit v1.2.3 From 87c3a5893e865739ce78aa7192d36011022e0af7 Mon Sep 17 00:00:00 2001 From: Finn Thain Date: Fri, 15 Sep 2023 15:47:11 +1000 Subject: sched/core: Optimize in_task() and in_interrupt() a bit Except on x86, preempt_count is always accessed with READ_ONCE(). Repeated invocations in macros like irq_count() produce repeated loads. These redundant instructions appear in various fast paths. In the one shown below, for example, irq_count() is evaluated during kernel entry if !tick_nohz_full_cpu(smp_processor_id()). 0001ed0a : 1ed0a: 4e56 0000 linkw %fp,#0 1ed0e: 200f movel %sp,%d0 1ed10: 0280 ffff e000 andil #-8192,%d0 1ed16: 2040 moveal %d0,%a0 1ed18: 2028 0008 movel %a0@(8),%d0 1ed1c: 0680 0001 0000 addil #65536,%d0 1ed22: 2140 0008 movel %d0,%a0@(8) 1ed26: 082a 0001 000f btst #1,%a2@(15) 1ed2c: 670c beqs 1ed3a 1ed2e: 2028 0008 movel %a0@(8),%d0 1ed32: 2028 0008 movel %a0@(8),%d0 1ed36: 2028 0008 movel %a0@(8),%d0 1ed3a: 4e5e unlk %fp 1ed3c: 4e75 rts This patch doesn't prevent the pointless btst and beqs instructions above, but it does eliminate 2 of the 3 pointless move instructions here and elsewhere. On x86, preempt_count is per-cpu data and the problem does not arise presumably because the compiler is free to optimize more effectively. This patch was tested on m68k and x86. I was expecting no changes to object code for x86 and mostly that's what I saw. However, there were a few places where code generation was perturbed for some reason. The performance issue addressed here is minor on uniprocessor m68k. I got a 0.01% improvement from this patch for a simple "find /sys -false" benchmark. For architectures and workloads susceptible to cache line bounce the improvement is expected to be larger. The only SMP architecture I have is x86, and as x86 unaffected I have not done any further measurements. Fixes: 15115830c887 ("preempt: Cleanup the macro maze a bit") Signed-off-by: Finn Thain Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/0a403120a682a525e6db2d81d1a3ffcc137c3742.1694756831.git.fthain@linux-m68k.org --- include/linux/preempt.h | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/preempt.h b/include/linux/preempt.h index 1424670df161..9aa6358a1a16 100644 --- a/include/linux/preempt.h +++ b/include/linux/preempt.h @@ -99,14 +99,21 @@ static __always_inline unsigned char interrupt_context_level(void) return level; } +/* + * These macro definitions avoid redundant invocations of preempt_count() + * because such invocations would result in redundant loads given that + * preempt_count() is commonly implemented with READ_ONCE(). + */ + #define nmi_count() (preempt_count() & NMI_MASK) #define hardirq_count() (preempt_count() & HARDIRQ_MASK) #ifdef CONFIG_PREEMPT_RT # define softirq_count() (current->softirq_disable_cnt & SOFTIRQ_MASK) +# define irq_count() ((preempt_count() & (NMI_MASK | HARDIRQ_MASK)) | softirq_count()) #else # define softirq_count() (preempt_count() & SOFTIRQ_MASK) +# define irq_count() (preempt_count() & (NMI_MASK | HARDIRQ_MASK | SOFTIRQ_MASK)) #endif -#define irq_count() (nmi_count() | hardirq_count() | softirq_count()) /* * Macros to retrieve the current execution context: @@ -119,7 +126,11 @@ static __always_inline unsigned char interrupt_context_level(void) #define in_nmi() (nmi_count()) #define in_hardirq() (hardirq_count()) #define in_serving_softirq() (softirq_count() & SOFTIRQ_OFFSET) -#define in_task() (!(in_nmi() | in_hardirq() | in_serving_softirq())) +#ifdef CONFIG_PREEMPT_RT +# define in_task() (!((preempt_count() & (NMI_MASK | HARDIRQ_MASK)) | in_serving_softirq())) +#else +# define in_task() (!(preempt_count() & (NMI_MASK | HARDIRQ_MASK | SOFTIRQ_OFFSET))) +#endif /* * The following macros are deprecated and should not be used in new code: -- cgit v1.2.3 From 3ba78da711940ce07c39c4cdd1f4ad284067a42d Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 6 Jun 2021 13:27:15 +0200 Subject: sched/headers: Add header guard to It's the only non-trivial header in include/linux/sched/ missing a header guard. Signed-off-by: Ingo Molnar --- include/linux/sched/deadline.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched/deadline.h b/include/linux/sched/deadline.h index 7c83d4d5a971..df3aca89d4f5 100644 --- a/include/linux/sched/deadline.h +++ b/include/linux/sched/deadline.h @@ -1,4 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_SCHED_DEADLINE_H +#define _LINUX_SCHED_DEADLINE_H /* * SCHED_DEADLINE tasks has negative priorities, reflecting @@ -34,3 +36,5 @@ extern void dl_add_task_root_domain(struct task_struct *p); extern void dl_clear_root_domain(struct root_domain *rd); #endif /* CONFIG_SMP */ + +#endif /* _LINUX_SCHED_DEADLINE_H */ -- cgit v1.2.3 From 6eddb116dd830436afbd922568292867de6c8b9e Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 21 Sep 2023 11:24:17 +0200 Subject: sched/headers: Standardize the header guard name Use the same _LINUX_SCHED_ prefix nomenclature as the other 29 header guards in include/linux/sched/ do. Signed-off-by: Ingo Molnar --- include/linux/sched/vhost_task.h | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched/vhost_task.h b/include/linux/sched/vhost_task.h index 837a23624a66..bc60243d43b3 100644 --- a/include/linux/sched/vhost_task.h +++ b/include/linux/sched/vhost_task.h @@ -1,7 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _LINUX_VHOST_TASK_H -#define _LINUX_VHOST_TASK_H - +#ifndef _LINUX_SCHED_VHOST_TASK_H +#define _LINUX_SCHED_VHOST_TASK_H struct vhost_task; @@ -11,4 +10,4 @@ void vhost_task_start(struct vhost_task *vtsk); void vhost_task_stop(struct vhost_task *vtsk); void vhost_task_wake(struct vhost_task *vtsk); -#endif +#endif /* _LINUX_SCHED_VHOST_TASK_H */ -- cgit v1.2.3 From 0f9a1a4d234c064d8dff69cf3f3755554dd479ec Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 21 Sep 2023 11:27:37 +0200 Subject: sched/headers: Standardize the header guard #endif Signed-off-by: Ingo Molnar --- include/linux/sched/types.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sched/types.h b/include/linux/sched/types.h index 3c3e049224ae..969aaf5ef9d6 100644 --- a/include/linux/sched/types.h +++ b/include/linux/sched/types.h @@ -20,4 +20,4 @@ struct task_cputime { unsigned long long sum_exec_runtime; }; -#endif +#endif /* _LINUX_SCHED_TYPES_H */ -- cgit v1.2.3 From 1632d47fae2f2d229dd432854c4443ebb0bb27a4 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 21 Sep 2023 11:28:48 +0200 Subject: sched/headers: Standardize the header guard #endif Signed-off-by: Ingo Molnar --- include/linux/sched/smt.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sched/smt.h b/include/linux/sched/smt.h index 59d3736c454c..fb1e295e7e63 100644 --- a/include/linux/sched/smt.h +++ b/include/linux/sched/smt.h @@ -17,4 +17,4 @@ static inline bool sched_smt_active(void) { return false; } void arch_smt_update(void); -#endif +#endif /* _LINUX_SCHED_SMT_H */ -- cgit v1.2.3 From 0df7cd3c13e44d01f9f28e29cbce74e2931b00fe Mon Sep 17 00:00:00 2001 From: Arseniy Krasnov Date: Sat, 16 Sep 2023 16:09:15 +0300 Subject: vsock/virtio/vhost: read data from non-linear skb This is preparation patch for MSG_ZEROCOPY support. It adds handling of non-linear skbs by replacing direct calls of 'memcpy_to_msg()' with 'skb_copy_datagram_iter()'. Main advantage of the second one is that it can handle paged part of the skb by using 'kmap()' on each page, but if there are no pages in the skb, it behaves like simple copying to iov iterator. This patch also adds new field to the control block of skb - this value shows current offset in the skb to read next portion of data (it doesn't matter linear it or not). Idea behind this field is that 'skb_copy_datagram_iter()' handles both types of skb internally - it just needs an offset from which to copy data from the given skb. This offset is incremented on each read from skb. This approach allows to simplify handling of both linear and non-linear skbs, because for linear skb we need to call 'skb_pull()' after reading data from it, while in non-linear case we need to update 'data_len'. Signed-off-by: Arseniy Krasnov Reviewed-by: Stefano Garzarella Acked-by: Michael S. Tsirkin Signed-off-by: Paolo Abeni --- drivers/vhost/vsock.c | 14 +++++++++----- include/linux/virtio_vsock.h | 1 + net/vmw_vsock/virtio_transport_common.c | 32 +++++++++++++++++++------------- 3 files changed, 29 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c index 817d377a3f36..83711aad855c 100644 --- a/drivers/vhost/vsock.c +++ b/drivers/vhost/vsock.c @@ -114,6 +114,7 @@ vhost_transport_do_send_pkt(struct vhost_vsock *vsock, struct sk_buff *skb; unsigned out, in; size_t nbytes; + u32 offset; int head; skb = virtio_vsock_skb_dequeue(&vsock->send_pkt_queue); @@ -156,7 +157,8 @@ vhost_transport_do_send_pkt(struct vhost_vsock *vsock, } iov_iter_init(&iov_iter, ITER_DEST, &vq->iov[out], in, iov_len); - payload_len = skb->len; + offset = VIRTIO_VSOCK_SKB_CB(skb)->offset; + payload_len = skb->len - offset; hdr = virtio_vsock_hdr(skb); /* If the packet is greater than the space available in the @@ -197,8 +199,10 @@ vhost_transport_do_send_pkt(struct vhost_vsock *vsock, break; } - nbytes = copy_to_iter(skb->data, payload_len, &iov_iter); - if (nbytes != payload_len) { + if (skb_copy_datagram_iter(skb, + offset, + &iov_iter, + payload_len)) { kfree_skb(skb); vq_err(vq, "Faulted on copying pkt buf\n"); break; @@ -212,13 +216,13 @@ vhost_transport_do_send_pkt(struct vhost_vsock *vsock, vhost_add_used(vq, head, sizeof(*hdr) + payload_len); added = true; - skb_pull(skb, payload_len); + VIRTIO_VSOCK_SKB_CB(skb)->offset += payload_len; total_len += payload_len; /* If we didn't send all the payload we can requeue the packet * to send it with the next available buffer. */ - if (skb->len > 0) { + if (VIRTIO_VSOCK_SKB_CB(skb)->offset < skb->len) { hdr->flags |= cpu_to_le32(flags_to_restore); /* We are queueing the same skb to handle diff --git a/include/linux/virtio_vsock.h b/include/linux/virtio_vsock.h index c58453699ee9..a91fbdf233e4 100644 --- a/include/linux/virtio_vsock.h +++ b/include/linux/virtio_vsock.h @@ -12,6 +12,7 @@ struct virtio_vsock_skb_cb { bool reply; bool tap_delivered; + u32 offset; }; #define VIRTIO_VSOCK_SKB_CB(skb) ((struct virtio_vsock_skb_cb *)((skb)->cb)) diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c index 352d042b130b..3e08d52a9355 100644 --- a/net/vmw_vsock/virtio_transport_common.c +++ b/net/vmw_vsock/virtio_transport_common.c @@ -364,9 +364,10 @@ virtio_transport_stream_do_peek(struct vsock_sock *vsk, spin_unlock_bh(&vvs->rx_lock); /* sk_lock is held by caller so no one else can dequeue. - * Unlock rx_lock since memcpy_to_msg() may sleep. + * Unlock rx_lock since skb_copy_datagram_iter() may sleep. */ - err = memcpy_to_msg(msg, skb->data, bytes); + err = skb_copy_datagram_iter(skb, VIRTIO_VSOCK_SKB_CB(skb)->offset, + &msg->msg_iter, bytes); if (err) goto out; @@ -410,25 +411,27 @@ virtio_transport_stream_do_dequeue(struct vsock_sock *vsk, while (total < len && !skb_queue_empty(&vvs->rx_queue)) { skb = skb_peek(&vvs->rx_queue); - bytes = len - total; - if (bytes > skb->len) - bytes = skb->len; + bytes = min_t(size_t, len - total, + skb->len - VIRTIO_VSOCK_SKB_CB(skb)->offset); /* sk_lock is held by caller so no one else can dequeue. - * Unlock rx_lock since memcpy_to_msg() may sleep. + * Unlock rx_lock since skb_copy_datagram_iter() may sleep. */ spin_unlock_bh(&vvs->rx_lock); - err = memcpy_to_msg(msg, skb->data, bytes); + err = skb_copy_datagram_iter(skb, + VIRTIO_VSOCK_SKB_CB(skb)->offset, + &msg->msg_iter, bytes); if (err) goto out; spin_lock_bh(&vvs->rx_lock); total += bytes; - skb_pull(skb, bytes); - if (skb->len == 0) { + VIRTIO_VSOCK_SKB_CB(skb)->offset += bytes; + + if (skb->len == VIRTIO_VSOCK_SKB_CB(skb)->offset) { u32 pkt_len = le32_to_cpu(virtio_vsock_hdr(skb)->len); virtio_transport_dec_rx_pkt(vvs, pkt_len); @@ -492,9 +495,10 @@ virtio_transport_seqpacket_do_peek(struct vsock_sock *vsk, spin_unlock_bh(&vvs->rx_lock); /* sk_lock is held by caller so no one else can dequeue. - * Unlock rx_lock since memcpy_to_msg() may sleep. + * Unlock rx_lock since skb_copy_datagram_iter() may sleep. */ - err = memcpy_to_msg(msg, skb->data, bytes); + err = skb_copy_datagram_iter(skb, VIRTIO_VSOCK_SKB_CB(skb)->offset, + &msg->msg_iter, bytes); if (err) return err; @@ -553,11 +557,13 @@ static int virtio_transport_seqpacket_do_dequeue(struct vsock_sock *vsk, int err; /* sk_lock is held by caller so no one else can dequeue. - * Unlock rx_lock since memcpy_to_msg() may sleep. + * Unlock rx_lock since skb_copy_datagram_iter() may sleep. */ spin_unlock_bh(&vvs->rx_lock); - err = memcpy_to_msg(msg, skb->data, bytes_to_copy); + err = skb_copy_datagram_iter(skb, 0, + &msg->msg_iter, + bytes_to_copy); if (err) { /* Copy of message failed. Rest of * fragments will be freed without copy. -- cgit v1.2.3 From 581512a6dc939ef122e49336626ae159f3b8a345 Mon Sep 17 00:00:00 2001 From: Arseniy Krasnov Date: Sat, 16 Sep 2023 16:09:18 +0300 Subject: vsock/virtio: MSG_ZEROCOPY flag support This adds handling of MSG_ZEROCOPY flag on transmission path: 1) If this flag is set and zerocopy transmission is possible (enabled in socket options and transport allows zerocopy), then non-linear skb will be created and filled with the pages of user's buffer. Pages of user's buffer are locked in memory by 'get_user_pages()'. 2) Replaces way of skb owning: instead of 'skb_set_owner_sk_safe()' it calls 'skb_set_owner_w()'. Reason of this change is that '__zerocopy_sg_from_iter()' increments 'sk_wmem_alloc' of socket, so to decrease this field correctly, proper skb destructor is needed: 'sock_wfree()'. This destructor is set by 'skb_set_owner_w()'. 3) Adds new callback to 'struct virtio_transport': 'can_msgzerocopy'. If this callback is set, then transport needs extra check to be able to send provided number of buffers in zerocopy mode. Currently, the only transport that needs this callback set is virtio, because this transport adds new buffers to the virtio queue and we need to check, that number of these buffers is less than size of the queue (it is required by virtio spec). vhost and loopback transports don't need this check. Signed-off-by: Arseniy Krasnov Reviewed-by: Stefano Garzarella Acked-by: Michael S. Tsirkin Signed-off-by: Paolo Abeni --- include/linux/virtio_vsock.h | 9 + .../trace/events/vsock_virtio_transport_common.h | 12 +- net/vmw_vsock/virtio_transport.c | 32 +++ net/vmw_vsock/virtio_transport_common.c | 250 ++++++++++++++++----- 4 files changed, 241 insertions(+), 62 deletions(-) (limited to 'include/linux') diff --git a/include/linux/virtio_vsock.h b/include/linux/virtio_vsock.h index a91fbdf233e4..ebb3ce63d64d 100644 --- a/include/linux/virtio_vsock.h +++ b/include/linux/virtio_vsock.h @@ -160,6 +160,15 @@ struct virtio_transport { /* Takes ownership of the packet */ int (*send_pkt)(struct sk_buff *skb); + + /* Used in MSG_ZEROCOPY mode. Checks, that provided data + * (number of buffers) could be transmitted with zerocopy + * mode. If this callback is not implemented for the current + * transport - this means that this transport doesn't need + * extra checks and can perform zerocopy transmission by + * default. + */ + bool (*can_msgzerocopy)(int bufs_num); }; ssize_t diff --git a/include/trace/events/vsock_virtio_transport_common.h b/include/trace/events/vsock_virtio_transport_common.h index d0b3f0ea9ba1..f1ebe36787c3 100644 --- a/include/trace/events/vsock_virtio_transport_common.h +++ b/include/trace/events/vsock_virtio_transport_common.h @@ -43,7 +43,8 @@ TRACE_EVENT(virtio_transport_alloc_pkt, __u32 len, __u16 type, __u16 op, - __u32 flags + __u32 flags, + bool zcopy ), TP_ARGS( src_cid, src_port, @@ -51,7 +52,8 @@ TRACE_EVENT(virtio_transport_alloc_pkt, len, type, op, - flags + flags, + zcopy ), TP_STRUCT__entry( __field(__u32, src_cid) @@ -62,6 +64,7 @@ TRACE_EVENT(virtio_transport_alloc_pkt, __field(__u16, type) __field(__u16, op) __field(__u32, flags) + __field(bool, zcopy) ), TP_fast_assign( __entry->src_cid = src_cid; @@ -72,14 +75,15 @@ TRACE_EVENT(virtio_transport_alloc_pkt, __entry->type = type; __entry->op = op; __entry->flags = flags; + __entry->zcopy = zcopy; ), - TP_printk("%u:%u -> %u:%u len=%u type=%s op=%s flags=%#x", + TP_printk("%u:%u -> %u:%u len=%u type=%s op=%s flags=%#x zcopy=%s", __entry->src_cid, __entry->src_port, __entry->dst_cid, __entry->dst_port, __entry->len, show_type(__entry->type), show_op(__entry->op), - __entry->flags) + __entry->flags, __entry->zcopy ? "true" : "false") ); TRACE_EVENT(virtio_transport_recv_pkt, diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c index 73d730156349..09ba3128e759 100644 --- a/net/vmw_vsock/virtio_transport.c +++ b/net/vmw_vsock/virtio_transport.c @@ -455,6 +455,37 @@ static void virtio_vsock_rx_done(struct virtqueue *vq) queue_work(virtio_vsock_workqueue, &vsock->rx_work); } +static bool virtio_transport_can_msgzerocopy(int bufs_num) +{ + struct virtio_vsock *vsock; + bool res = false; + + rcu_read_lock(); + + vsock = rcu_dereference(the_virtio_vsock); + if (vsock) { + struct virtqueue *vq = vsock->vqs[VSOCK_VQ_TX]; + + /* Check that tx queue is large enough to keep whole + * data to send. This is needed, because when there is + * not enough free space in the queue, current skb to + * send will be reinserted to the head of tx list of + * the socket to retry transmission later, so if skb + * is bigger than whole queue, it will be reinserted + * again and again, thus blocking other skbs to be sent. + * Each page of the user provided buffer will be added + * as a single buffer to the tx virtqueue, so compare + * number of pages against maximum capacity of the queue. + */ + if (bufs_num <= vq->num_max) + res = true; + } + + rcu_read_unlock(); + + return res; +} + static bool virtio_transport_seqpacket_allow(u32 remote_cid); static struct virtio_transport virtio_transport = { @@ -504,6 +535,7 @@ static struct virtio_transport virtio_transport = { }, .send_pkt = virtio_transport_send_pkt, + .can_msgzerocopy = virtio_transport_can_msgzerocopy, }; static bool virtio_transport_seqpacket_allow(u32 remote_cid) diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c index 3a48e48a99ac..e22c81435ef7 100644 --- a/net/vmw_vsock/virtio_transport_common.c +++ b/net/vmw_vsock/virtio_transport_common.c @@ -37,73 +37,99 @@ virtio_transport_get_ops(struct vsock_sock *vsk) return container_of(t, struct virtio_transport, transport); } -/* Returns a new packet on success, otherwise returns NULL. - * - * If NULL is returned, errp is set to a negative errno. - */ -static struct sk_buff * -virtio_transport_alloc_skb(struct virtio_vsock_pkt_info *info, - size_t len, - u32 src_cid, - u32 src_port, - u32 dst_cid, - u32 dst_port) -{ - const size_t skb_len = VIRTIO_VSOCK_SKB_HEADROOM + len; - struct virtio_vsock_hdr *hdr; - struct sk_buff *skb; - void *payload; - int err; +static bool virtio_transport_can_zcopy(const struct virtio_transport *t_ops, + struct virtio_vsock_pkt_info *info, + size_t pkt_len) +{ + struct iov_iter *iov_iter; - skb = virtio_vsock_alloc_skb(skb_len, GFP_KERNEL); - if (!skb) - return NULL; + if (!info->msg) + return false; - hdr = virtio_vsock_hdr(skb); - hdr->type = cpu_to_le16(info->type); - hdr->op = cpu_to_le16(info->op); - hdr->src_cid = cpu_to_le64(src_cid); - hdr->dst_cid = cpu_to_le64(dst_cid); - hdr->src_port = cpu_to_le32(src_port); - hdr->dst_port = cpu_to_le32(dst_port); - hdr->flags = cpu_to_le32(info->flags); - hdr->len = cpu_to_le32(len); + iov_iter = &info->msg->msg_iter; - if (info->msg && len > 0) { - payload = skb_put(skb, len); - err = memcpy_from_msg(payload, info->msg, len); - if (err) - goto out; + if (iov_iter->iov_offset) + return false; - if (msg_data_left(info->msg) == 0 && - info->type == VIRTIO_VSOCK_TYPE_SEQPACKET) { - hdr->flags |= cpu_to_le32(VIRTIO_VSOCK_SEQ_EOM); + /* We can't send whole iov. */ + if (iov_iter->count > pkt_len) + return false; - if (info->msg->msg_flags & MSG_EOR) - hdr->flags |= cpu_to_le32(VIRTIO_VSOCK_SEQ_EOR); - } + /* Check that transport can send data in zerocopy mode. */ + t_ops = virtio_transport_get_ops(info->vsk); + + if (t_ops->can_msgzerocopy) { + int pages_in_iov = iov_iter_npages(iov_iter, MAX_SKB_FRAGS); + int pages_to_send = min(pages_in_iov, MAX_SKB_FRAGS); + + /* +1 is for packet header. */ + return t_ops->can_msgzerocopy(pages_to_send + 1); } - if (info->reply) - virtio_vsock_skb_set_reply(skb); + return true; +} - trace_virtio_transport_alloc_pkt(src_cid, src_port, - dst_cid, dst_port, - len, - info->type, - info->op, - info->flags); +static int virtio_transport_init_zcopy_skb(struct vsock_sock *vsk, + struct sk_buff *skb, + struct msghdr *msg, + bool zerocopy) +{ + struct ubuf_info *uarg; - if (info->vsk && !skb_set_owner_sk_safe(skb, sk_vsock(info->vsk))) { - WARN_ONCE(1, "failed to allocate skb on vsock socket with sk_refcnt == 0\n"); - goto out; + if (msg->msg_ubuf) { + uarg = msg->msg_ubuf; + net_zcopy_get(uarg); + } else { + struct iov_iter *iter = &msg->msg_iter; + struct ubuf_info_msgzc *uarg_zc; + + uarg = msg_zerocopy_realloc(sk_vsock(vsk), + iter->count, + NULL); + if (!uarg) + return -1; + + uarg_zc = uarg_to_msgzc(uarg); + uarg_zc->zerocopy = zerocopy ? 1 : 0; } - return skb; + skb_zcopy_init(skb, uarg); -out: - kfree_skb(skb); - return NULL; + return 0; +} + +static int virtio_transport_fill_skb(struct sk_buff *skb, + struct virtio_vsock_pkt_info *info, + size_t len, + bool zcopy) +{ + if (zcopy) + return __zerocopy_sg_from_iter(info->msg, NULL, skb, + &info->msg->msg_iter, + len); + + return memcpy_from_msg(skb_put(skb, len), info->msg, len); +} + +static void virtio_transport_init_hdr(struct sk_buff *skb, + struct virtio_vsock_pkt_info *info, + size_t payload_len, + u32 src_cid, + u32 src_port, + u32 dst_cid, + u32 dst_port) +{ + struct virtio_vsock_hdr *hdr; + + hdr = virtio_vsock_hdr(skb); + hdr->type = cpu_to_le16(info->type); + hdr->op = cpu_to_le16(info->op); + hdr->src_cid = cpu_to_le64(src_cid); + hdr->dst_cid = cpu_to_le64(dst_cid); + hdr->src_port = cpu_to_le32(src_port); + hdr->dst_port = cpu_to_le32(dst_port); + hdr->flags = cpu_to_le32(info->flags); + hdr->len = cpu_to_le32(payload_len); } static void virtio_transport_copy_nonlinear_skb(const struct sk_buff *skb, @@ -214,6 +240,82 @@ static u16 virtio_transport_get_type(struct sock *sk) return VIRTIO_VSOCK_TYPE_SEQPACKET; } +/* Returns new sk_buff on success, otherwise returns NULL. */ +static struct sk_buff *virtio_transport_alloc_skb(struct virtio_vsock_pkt_info *info, + size_t payload_len, + bool zcopy, + u32 src_cid, + u32 src_port, + u32 dst_cid, + u32 dst_port) +{ + struct vsock_sock *vsk; + struct sk_buff *skb; + size_t skb_len; + + skb_len = VIRTIO_VSOCK_SKB_HEADROOM; + + if (!zcopy) + skb_len += payload_len; + + skb = virtio_vsock_alloc_skb(skb_len, GFP_KERNEL); + if (!skb) + return NULL; + + virtio_transport_init_hdr(skb, info, payload_len, src_cid, src_port, + dst_cid, dst_port); + + vsk = info->vsk; + + /* If 'vsk' != NULL then payload is always present, so we + * will never call '__zerocopy_sg_from_iter()' below without + * setting skb owner in 'skb_set_owner_w()'. The only case + * when 'vsk' == NULL is VIRTIO_VSOCK_OP_RST control message + * without payload. + */ + WARN_ON_ONCE(!(vsk && (info->msg && payload_len)) && zcopy); + + /* Set owner here, because '__zerocopy_sg_from_iter()' uses + * owner of skb without check to update 'sk_wmem_alloc'. + */ + if (vsk) + skb_set_owner_w(skb, sk_vsock(vsk)); + + if (info->msg && payload_len > 0) { + int err; + + err = virtio_transport_fill_skb(skb, info, payload_len, zcopy); + if (err) + goto out; + + if (msg_data_left(info->msg) == 0 && + info->type == VIRTIO_VSOCK_TYPE_SEQPACKET) { + struct virtio_vsock_hdr *hdr = virtio_vsock_hdr(skb); + + hdr->flags |= cpu_to_le32(VIRTIO_VSOCK_SEQ_EOM); + + if (info->msg->msg_flags & MSG_EOR) + hdr->flags |= cpu_to_le32(VIRTIO_VSOCK_SEQ_EOR); + } + } + + if (info->reply) + virtio_vsock_skb_set_reply(skb); + + trace_virtio_transport_alloc_pkt(src_cid, src_port, + dst_cid, dst_port, + payload_len, + info->type, + info->op, + info->flags, + zcopy); + + return skb; +out: + kfree_skb(skb); + return NULL; +} + /* This function can only be used on connecting/connected sockets, * since a socket assigned to a transport is required. * @@ -222,10 +324,12 @@ static u16 virtio_transport_get_type(struct sock *sk) static int virtio_transport_send_pkt_info(struct vsock_sock *vsk, struct virtio_vsock_pkt_info *info) { + u32 max_skb_len = VIRTIO_VSOCK_MAX_PKT_BUF_SIZE; u32 src_cid, src_port, dst_cid, dst_port; const struct virtio_transport *t_ops; struct virtio_vsock_sock *vvs; u32 pkt_len = info->pkt_len; + bool can_zcopy = false; u32 rest_len; int ret; @@ -254,15 +358,30 @@ static int virtio_transport_send_pkt_info(struct vsock_sock *vsk, if (pkt_len == 0 && info->op == VIRTIO_VSOCK_OP_RW) return pkt_len; + if (info->msg) { + /* If zerocopy is not enabled by 'setsockopt()', we behave as + * there is no MSG_ZEROCOPY flag set. + */ + if (!sock_flag(sk_vsock(vsk), SOCK_ZEROCOPY)) + info->msg->msg_flags &= ~MSG_ZEROCOPY; + + if (info->msg->msg_flags & MSG_ZEROCOPY) + can_zcopy = virtio_transport_can_zcopy(t_ops, info, pkt_len); + + if (can_zcopy) + max_skb_len = min_t(u32, VIRTIO_VSOCK_MAX_PKT_BUF_SIZE, + (MAX_SKB_FRAGS * PAGE_SIZE)); + } + rest_len = pkt_len; do { struct sk_buff *skb; size_t skb_len; - skb_len = min_t(u32, VIRTIO_VSOCK_MAX_PKT_BUF_SIZE, rest_len); + skb_len = min(max_skb_len, rest_len); - skb = virtio_transport_alloc_skb(info, skb_len, + skb = virtio_transport_alloc_skb(info, skb_len, can_zcopy, src_cid, src_port, dst_cid, dst_port); if (!skb) { @@ -270,6 +389,21 @@ static int virtio_transport_send_pkt_info(struct vsock_sock *vsk, break; } + /* We process buffer part by part, allocating skb on + * each iteration. If this is last skb for this buffer + * and MSG_ZEROCOPY mode is in use - we must allocate + * completion for the current syscall. + */ + if (info->msg && info->msg->msg_flags & MSG_ZEROCOPY && + skb_len == rest_len && info->op == VIRTIO_VSOCK_OP_RW) { + if (virtio_transport_init_zcopy_skb(vsk, skb, + info->msg, + can_zcopy)) { + ret = -ENOMEM; + break; + } + } + virtio_transport_inc_tx_pkt(vvs, skb); ret = t_ops->send_pkt(skb); @@ -985,7 +1119,7 @@ static int virtio_transport_reset_no_sock(const struct virtio_transport *t, if (!t) return -ENOTCONN; - reply = virtio_transport_alloc_skb(&info, 0, + reply = virtio_transport_alloc_skb(&info, 0, false, le64_to_cpu(hdr->dst_cid), le32_to_cpu(hdr->dst_port), le64_to_cpu(hdr->src_cid), -- cgit v1.2.3 From e9090e70e618cd62ab7bf2914511e5eea31a2535 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Fri, 25 Aug 2023 13:26:21 +0200 Subject: firmware: arm_scmi: Extend perf protocol ops to get number of domains Similar to other protocol ops, it's useful for an scmi module driver to get the number of supported performance domains, hence let's make this available by adding a new perf protocol callback. Note that, a user is being added from subsequent changes. Signed-off-by: Ulf Hansson Link: https://lore.kernel.org/r/20230825112633.236607-2-ulf.hansson@linaro.org Signed-off-by: Sudeep Holla --- drivers/firmware/arm_scmi/perf.c | 8 ++++++++ include/linux/scmi_protocol.h | 2 ++ 2 files changed, 10 insertions(+) (limited to 'include/linux') diff --git a/drivers/firmware/arm_scmi/perf.c b/drivers/firmware/arm_scmi/perf.c index c0cd556fbaae..9fb63dd44c20 100644 --- a/drivers/firmware/arm_scmi/perf.c +++ b/drivers/firmware/arm_scmi/perf.c @@ -423,6 +423,13 @@ scmi_perf_describe_levels_get(const struct scmi_protocol_handle *ph, return ret; } +static int scmi_perf_num_domains_get(const struct scmi_protocol_handle *ph) +{ + struct scmi_perf_info *pi = ph->get_priv(ph); + + return pi->num_domains; +} + static int scmi_perf_msg_limits_set(const struct scmi_protocol_handle *ph, u32 domain, u32 max_perf, u32 min_perf) { @@ -948,6 +955,7 @@ scmi_power_scale_get(const struct scmi_protocol_handle *ph) } static const struct scmi_perf_proto_ops perf_proto_ops = { + .num_domains_get = scmi_perf_num_domains_get, .limits_set = scmi_perf_limits_set, .limits_get = scmi_perf_limits_get, .level_set = scmi_perf_level_set, diff --git a/include/linux/scmi_protocol.h b/include/linux/scmi_protocol.h index e09ac428fa1b..14791dd54706 100644 --- a/include/linux/scmi_protocol.h +++ b/include/linux/scmi_protocol.h @@ -110,6 +110,7 @@ struct scmi_clk_proto_ops { * struct scmi_perf_proto_ops - represents the various operations provided * by SCMI Performance Protocol * + * @num_domains_get: gets the number of supported performance domains * @limits_set: sets limits on the performance level of a domain * @limits_get: gets limits on the performance level of a domain * @level_set: sets the performance level of a domain @@ -129,6 +130,7 @@ struct scmi_clk_proto_ops { * or in some other (abstract) scale */ struct scmi_perf_proto_ops { + int (*num_domains_get)(const struct scmi_protocol_handle *ph); int (*limits_set)(const struct scmi_protocol_handle *ph, u32 domain, u32 max_perf, u32 min_perf); int (*limits_get)(const struct scmi_protocol_handle *ph, u32 domain, -- cgit v1.2.3 From 3d99ed60721bf2e108c8fc660775766057689a92 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Fri, 25 Aug 2023 13:26:22 +0200 Subject: firmware: arm_scmi: Extend perf protocol ops to get information of a domain Similar to other protocol ops, it's useful for an scmi module driver to get some generic information of a performance domain. Therefore, let's add a new callback to provide this information. The information is currently limited to the name of the performance domain and whether the set-level operation is supported, although this can easily be extended if we find the need for it. Signed-off-by: Ulf Hansson Link: https://lore.kernel.org/r/20230825112633.236607-3-ulf.hansson@linaro.org Signed-off-by: Sudeep Holla --- drivers/firmware/arm_scmi/perf.c | 47 +++++++++++++++++++++++++--------------- include/linux/scmi_protocol.h | 8 +++++++ 2 files changed, 38 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/drivers/firmware/arm_scmi/perf.c b/drivers/firmware/arm_scmi/perf.c index 9fb63dd44c20..d20bb6b8abfa 100644 --- a/drivers/firmware/arm_scmi/perf.c +++ b/drivers/firmware/arm_scmi/perf.c @@ -145,7 +145,6 @@ struct scmi_msg_resp_perf_describe_levels_v4 { struct perf_dom_info { u32 id; bool set_limits; - bool set_perf; bool perf_limit_notify; bool perf_level_notify; bool perf_fastchannels; @@ -154,7 +153,7 @@ struct perf_dom_info { u32 sustained_freq_khz; u32 sustained_perf_level; u32 mult_factor; - char name[SCMI_MAX_STR_SIZE]; + struct scmi_perf_domain_info info; struct scmi_opp opp[MAX_OPPS]; struct scmi_fc_info *fc_info; struct xarray opps_by_idx; @@ -257,7 +256,7 @@ scmi_perf_domain_attributes_get(const struct scmi_protocol_handle *ph, flags = le32_to_cpu(attr->flags); dom_info->set_limits = SUPPORTS_SET_LIMITS(flags); - dom_info->set_perf = SUPPORTS_SET_PERF_LVL(flags); + dom_info->info.set_perf = SUPPORTS_SET_PERF_LVL(flags); dom_info->perf_limit_notify = SUPPORTS_PERF_LIMIT_NOTIFY(flags); dom_info->perf_level_notify = SUPPORTS_PERF_LEVEL_NOTIFY(flags); dom_info->perf_fastchannels = SUPPORTS_PERF_FASTCHANNELS(flags); @@ -276,7 +275,8 @@ scmi_perf_domain_attributes_get(const struct scmi_protocol_handle *ph, dom_info->mult_factor = (dom_info->sustained_freq_khz * 1000) / dom_info->sustained_perf_level; - strscpy(dom_info->name, attr->name, SCMI_SHORT_NAME_MAX_SIZE); + strscpy(dom_info->info.name, attr->name, + SCMI_SHORT_NAME_MAX_SIZE); } ph->xops->xfer_put(ph, t); @@ -288,7 +288,7 @@ scmi_perf_domain_attributes_get(const struct scmi_protocol_handle *ph, if (!ret && PROTOCOL_REV_MAJOR(version) >= 0x3 && SUPPORTS_EXTENDED_NAMES(flags)) ph->hops->extended_name_get(ph, PERF_DOMAIN_NAME_GET, - dom_info->id, dom_info->name, + dom_info->id, dom_info->info.name, SCMI_MAX_STR_SIZE); if (dom_info->level_indexing_mode) { @@ -430,6 +430,29 @@ static int scmi_perf_num_domains_get(const struct scmi_protocol_handle *ph) return pi->num_domains; } +static inline struct perf_dom_info * +scmi_perf_domain_lookup(const struct scmi_protocol_handle *ph, u32 domain) +{ + struct scmi_perf_info *pi = ph->get_priv(ph); + + if (domain >= pi->num_domains) + return ERR_PTR(-EINVAL); + + return pi->dom_info + domain; +} + +static const struct scmi_perf_domain_info * +scmi_perf_info_get(const struct scmi_protocol_handle *ph, u32 domain) +{ + struct perf_dom_info *dom; + + dom = scmi_perf_domain_lookup(ph, domain); + if (IS_ERR(dom)) + return ERR_PTR(-EINVAL); + + return &dom->info; +} + static int scmi_perf_msg_limits_set(const struct scmi_protocol_handle *ph, u32 domain, u32 max_perf, u32 min_perf) { @@ -453,17 +476,6 @@ static int scmi_perf_msg_limits_set(const struct scmi_protocol_handle *ph, return ret; } -static inline struct perf_dom_info * -scmi_perf_domain_lookup(const struct scmi_protocol_handle *ph, u32 domain) -{ - struct scmi_perf_info *pi = ph->get_priv(ph); - - if (domain >= pi->num_domains) - return ERR_PTR(-EINVAL); - - return pi->dom_info + domain; -} - static int __scmi_perf_limits_set(const struct scmi_protocol_handle *ph, struct perf_dom_info *dom, u32 max_perf, u32 min_perf) @@ -819,7 +831,7 @@ static int scmi_dvfs_device_opps_add(const struct scmi_protocol_handle *ph, } dev_dbg(dev, "[%d][%s]:: Registered OPP[%d] %lu\n", - domain, dom->name, idx, freq); + domain, dom->info.name, idx, freq); } return 0; } @@ -956,6 +968,7 @@ scmi_power_scale_get(const struct scmi_protocol_handle *ph) static const struct scmi_perf_proto_ops perf_proto_ops = { .num_domains_get = scmi_perf_num_domains_get, + .info_get = scmi_perf_info_get, .limits_set = scmi_perf_limits_set, .limits_get = scmi_perf_limits_get, .level_set = scmi_perf_level_set, diff --git a/include/linux/scmi_protocol.h b/include/linux/scmi_protocol.h index 14791dd54706..9108fe8ed3e3 100644 --- a/include/linux/scmi_protocol.h +++ b/include/linux/scmi_protocol.h @@ -106,11 +106,17 @@ struct scmi_clk_proto_ops { u8 oem_type, u32 oem_val, bool atomic); }; +struct scmi_perf_domain_info { + char name[SCMI_MAX_STR_SIZE]; + bool set_perf; +}; + /** * struct scmi_perf_proto_ops - represents the various operations provided * by SCMI Performance Protocol * * @num_domains_get: gets the number of supported performance domains + * @info_get: get the information of a performance domain * @limits_set: sets limits on the performance level of a domain * @limits_get: gets limits on the performance level of a domain * @level_set: sets the performance level of a domain @@ -131,6 +137,8 @@ struct scmi_clk_proto_ops { */ struct scmi_perf_proto_ops { int (*num_domains_get)(const struct scmi_protocol_handle *ph); + const struct scmi_perf_domain_info __must_check *(*info_get) + (const struct scmi_protocol_handle *ph, u32 domain); int (*limits_set)(const struct scmi_protocol_handle *ph, u32 domain, u32 max_perf, u32 min_perf); int (*limits_get)(const struct scmi_protocol_handle *ph, u32 domain, -- cgit v1.2.3 From 39dfa5b9e1f0fa63b811a0a87f1c2fb9c76a0456 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Fri, 25 Aug 2023 13:26:24 +0200 Subject: firmware: arm_scmi: Align perf ops to use domain-id as in-parameter Most scmi_perf_proto_ops are already using an "u32 domain" as an in-parameter to indicate what performance domain we shall operate upon. However, some of the ops are using a "struct device *dev", which means that an additional OF parsing is needed each time the perf ops gets called, to find the corresponding domain-id. To avoid the above, but also to make the code more consistent, let's replace the in-parameter "struct device *dev" with an "u32 domain". Note that, this requires us to make some corresponding changes to the scmi cpufreq driver, so let's do that too. Signed-off-by: Ulf Hansson Acked-by: Viresh Kumar Link: https://lore.kernel.org/r/20230825112633.236607-5-ulf.hansson@linaro.org Signed-off-by: Sudeep Holla --- drivers/cpufreq/scmi-cpufreq.c | 14 +++++++++----- drivers/firmware/arm_scmi/perf.c | 22 ++++------------------ include/linux/scmi_protocol.h | 6 +++--- 3 files changed, 16 insertions(+), 26 deletions(-) (limited to 'include/linux') diff --git a/drivers/cpufreq/scmi-cpufreq.c b/drivers/cpufreq/scmi-cpufreq.c index 7d05d48c0337..125e8a8421fb 100644 --- a/drivers/cpufreq/scmi-cpufreq.c +++ b/drivers/cpufreq/scmi-cpufreq.c @@ -137,7 +137,7 @@ scmi_get_cpu_power(struct device *cpu_dev, unsigned long *power, static int scmi_cpufreq_init(struct cpufreq_policy *policy) { - int ret, nr_opp; + int ret, nr_opp, domain; unsigned int latency; struct device *cpu_dev; struct scmi_data *priv; @@ -149,6 +149,10 @@ static int scmi_cpufreq_init(struct cpufreq_policy *policy) return -ENODEV; } + domain = scmi_cpu_domain_id(cpu_dev); + if (domain < 0) + return domain; + priv = kzalloc(sizeof(*priv), GFP_KERNEL); if (!priv) return -ENOMEM; @@ -187,7 +191,7 @@ static int scmi_cpufreq_init(struct cpufreq_policy *policy) */ nr_opp = dev_pm_opp_get_opp_count(cpu_dev); if (nr_opp <= 0) { - ret = perf_ops->device_opps_add(ph, cpu_dev); + ret = perf_ops->device_opps_add(ph, cpu_dev, domain); if (ret) { dev_warn(cpu_dev, "failed to add opps to the device\n"); goto out_free_cpumask; @@ -220,7 +224,7 @@ static int scmi_cpufreq_init(struct cpufreq_policy *policy) } priv->cpu_dev = cpu_dev; - priv->domain_id = scmi_cpu_domain_id(cpu_dev); + priv->domain_id = domain; policy->driver_data = priv; policy->freq_table = freq_table; @@ -228,14 +232,14 @@ static int scmi_cpufreq_init(struct cpufreq_policy *policy) /* SCMI allows DVFS request for any domain from any CPU */ policy->dvfs_possible_from_any_cpu = true; - latency = perf_ops->transition_latency_get(ph, cpu_dev); + latency = perf_ops->transition_latency_get(ph, domain); if (!latency) latency = CPUFREQ_ETERNAL; policy->cpuinfo.transition_latency = latency; policy->fast_switch_possible = - perf_ops->fast_switch_possible(ph, cpu_dev); + perf_ops->fast_switch_possible(ph, domain); return 0; diff --git a/drivers/firmware/arm_scmi/perf.c b/drivers/firmware/arm_scmi/perf.c index d20bb6b8abfa..092b51cf9596 100644 --- a/drivers/firmware/arm_scmi/perf.c +++ b/drivers/firmware/arm_scmi/perf.c @@ -795,17 +795,13 @@ static int scmi_dev_domain_id(struct device *dev) } static int scmi_dvfs_device_opps_add(const struct scmi_protocol_handle *ph, - struct device *dev) + struct device *dev, u32 domain) { - int idx, ret, domain; + int idx, ret; unsigned long freq; struct scmi_opp *opp; struct perf_dom_info *dom; - domain = scmi_dev_domain_id(dev); - if (domain < 0) - return -EINVAL; - dom = scmi_perf_domain_lookup(ph, domain); if (IS_ERR(dom)) return PTR_ERR(dom); @@ -838,15 +834,10 @@ static int scmi_dvfs_device_opps_add(const struct scmi_protocol_handle *ph, static int scmi_dvfs_transition_latency_get(const struct scmi_protocol_handle *ph, - struct device *dev) + u32 domain) { - int domain; struct perf_dom_info *dom; - domain = scmi_dev_domain_id(dev); - if (domain < 0) - return -EINVAL; - dom = scmi_perf_domain_lookup(ph, domain); if (IS_ERR(dom)) return PTR_ERR(dom); @@ -942,15 +933,10 @@ static int scmi_dvfs_est_power_get(const struct scmi_protocol_handle *ph, } static bool scmi_fast_switch_possible(const struct scmi_protocol_handle *ph, - struct device *dev) + u32 domain) { - int domain; struct perf_dom_info *dom; - domain = scmi_dev_domain_id(dev); - if (domain < 0) - return false; - dom = scmi_perf_domain_lookup(ph, domain); if (IS_ERR(dom)) return false; diff --git a/include/linux/scmi_protocol.h b/include/linux/scmi_protocol.h index 9108fe8ed3e3..4e63766adc6f 100644 --- a/include/linux/scmi_protocol.h +++ b/include/linux/scmi_protocol.h @@ -149,9 +149,9 @@ struct scmi_perf_proto_ops { u32 *level, bool poll); int (*device_domain_id)(struct device *dev); int (*transition_latency_get)(const struct scmi_protocol_handle *ph, - struct device *dev); + u32 domain); int (*device_opps_add)(const struct scmi_protocol_handle *ph, - struct device *dev); + struct device *dev, u32 domain); int (*freq_set)(const struct scmi_protocol_handle *ph, u32 domain, unsigned long rate, bool poll); int (*freq_get)(const struct scmi_protocol_handle *ph, u32 domain, @@ -159,7 +159,7 @@ struct scmi_perf_proto_ops { int (*est_power_get)(const struct scmi_protocol_handle *ph, u32 domain, unsigned long *rate, unsigned long *power); bool (*fast_switch_possible)(const struct scmi_protocol_handle *ph, - struct device *dev); + u32 domain); enum scmi_power_scale (*power_scale_get)(const struct scmi_protocol_handle *ph); }; -- cgit v1.2.3 From 9b578d83629e13f81a53d1695a4f700cdb10f772 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Fri, 25 Aug 2023 13:26:25 +0200 Subject: firmware: arm_scmi: Drop redundant ->device_domain_id() from perf ops There are no longer any users of the ->device_domain_id() ops in the scmi_perf_proto_ops, therefore let's remove it. Signed-off-by: Ulf Hansson Link: https://lore.kernel.org/r/20230825112633.236607-6-ulf.hansson@linaro.org Signed-off-by: Sudeep Holla --- drivers/firmware/arm_scmi/perf.c | 13 ------------- include/linux/scmi_protocol.h | 2 -- 2 files changed, 15 deletions(-) (limited to 'include/linux') diff --git a/drivers/firmware/arm_scmi/perf.c b/drivers/firmware/arm_scmi/perf.c index 092b51cf9596..9eb58df9124d 100644 --- a/drivers/firmware/arm_scmi/perf.c +++ b/drivers/firmware/arm_scmi/perf.c @@ -782,18 +782,6 @@ static void scmi_perf_domain_init_fc(const struct scmi_protocol_handle *ph, *p_fc = fc; } -/* Device specific ops */ -static int scmi_dev_domain_id(struct device *dev) -{ - struct of_phandle_args clkspec; - - if (of_parse_phandle_with_args(dev->of_node, "clocks", "#clock-cells", - 0, &clkspec)) - return -EINVAL; - - return clkspec.args[0]; -} - static int scmi_dvfs_device_opps_add(const struct scmi_protocol_handle *ph, struct device *dev, u32 domain) { @@ -959,7 +947,6 @@ static const struct scmi_perf_proto_ops perf_proto_ops = { .limits_get = scmi_perf_limits_get, .level_set = scmi_perf_level_set, .level_get = scmi_perf_level_get, - .device_domain_id = scmi_dev_domain_id, .transition_latency_get = scmi_dvfs_transition_latency_get, .device_opps_add = scmi_dvfs_device_opps_add, .freq_set = scmi_dvfs_freq_set, diff --git a/include/linux/scmi_protocol.h b/include/linux/scmi_protocol.h index 4e63766adc6f..27bfa5a65b45 100644 --- a/include/linux/scmi_protocol.h +++ b/include/linux/scmi_protocol.h @@ -121,7 +121,6 @@ struct scmi_perf_domain_info { * @limits_get: gets limits on the performance level of a domain * @level_set: sets the performance level of a domain * @level_get: gets the performance level of a domain - * @device_domain_id: gets the scmi domain id for a given device * @transition_latency_get: gets the DVFS transition latency for a given device * @device_opps_add: adds all the OPPs for a given device * @freq_set: sets the frequency for a given device using sustained frequency @@ -147,7 +146,6 @@ struct scmi_perf_proto_ops { u32 level, bool poll); int (*level_get)(const struct scmi_protocol_handle *ph, u32 domain, u32 *level, bool poll); - int (*device_domain_id)(struct device *dev); int (*transition_latency_get)(const struct scmi_protocol_handle *ph, u32 domain); int (*device_opps_add)(const struct scmi_protocol_handle *ph, -- cgit v1.2.3 From 3dd91515ef43dd43e32e2a84e4bd881b64fb33ae Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Fri, 25 Aug 2023 13:26:32 +0200 Subject: PM: domains: Allow genpd providers to manage OPP tables directly by its FW In some cases the OPP tables aren't specified in device tree, but rather encoded in the FW. To allow a genpd provider to specify them dynamically instead, let's add a new genpd flag, GENPD_FLAG_OPP_TABLE_FW. Signed-off-by: Ulf Hansson Link: https://lore.kernel.org/r/20230825112633.236607-13-ulf.hansson@linaro.org Signed-off-by: Sudeep Holla --- drivers/base/power/domain.c | 11 ++++++----- include/linux/pm_domain.h | 5 +++++ 2 files changed, 11 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index 5cb2023581d4..c74edf80417f 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -130,6 +130,7 @@ static const struct genpd_lock_ops genpd_spin_ops = { #define genpd_is_active_wakeup(genpd) (genpd->flags & GENPD_FLAG_ACTIVE_WAKEUP) #define genpd_is_cpu_domain(genpd) (genpd->flags & GENPD_FLAG_CPU_DOMAIN) #define genpd_is_rpm_always_on(genpd) (genpd->flags & GENPD_FLAG_RPM_ALWAYS_ON) +#define genpd_is_opp_table_fw(genpd) (genpd->flags & GENPD_FLAG_OPP_TABLE_FW) static inline bool irq_safe_dev_in_sleep_domain(struct device *dev, const struct generic_pm_domain *genpd) @@ -2328,7 +2329,7 @@ int of_genpd_add_provider_simple(struct device_node *np, genpd->dev.of_node = np; /* Parse genpd OPP table */ - if (genpd->set_performance_state) { + if (!genpd_is_opp_table_fw(genpd) && genpd->set_performance_state) { ret = dev_pm_opp_of_add_table(&genpd->dev); if (ret) return dev_err_probe(&genpd->dev, ret, "Failed to add OPP table\n"); @@ -2343,7 +2344,7 @@ int of_genpd_add_provider_simple(struct device_node *np, ret = genpd_add_provider(np, genpd_xlate_simple, genpd); if (ret) { - if (genpd->set_performance_state) { + if (!genpd_is_opp_table_fw(genpd) && genpd->set_performance_state) { dev_pm_opp_put_opp_table(genpd->opp_table); dev_pm_opp_of_remove_table(&genpd->dev); } @@ -2387,7 +2388,7 @@ int of_genpd_add_provider_onecell(struct device_node *np, genpd->dev.of_node = np; /* Parse genpd OPP table */ - if (genpd->set_performance_state) { + if (!genpd_is_opp_table_fw(genpd) && genpd->set_performance_state) { ret = dev_pm_opp_of_add_table_indexed(&genpd->dev, i); if (ret) { dev_err_probe(&genpd->dev, ret, @@ -2423,7 +2424,7 @@ error: genpd->provider = NULL; genpd->has_provider = false; - if (genpd->set_performance_state) { + if (!genpd_is_opp_table_fw(genpd) && genpd->set_performance_state) { dev_pm_opp_put_opp_table(genpd->opp_table); dev_pm_opp_of_remove_table(&genpd->dev); } @@ -2455,7 +2456,7 @@ void of_genpd_del_provider(struct device_node *np) if (gpd->provider == &np->fwnode) { gpd->has_provider = false; - if (!gpd->set_performance_state) + if (genpd_is_opp_table_fw(gpd) || !gpd->set_performance_state) continue; dev_pm_opp_put_opp_table(gpd->opp_table); diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index f776fb93eaa0..05ad8cefdff1 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -61,6 +61,10 @@ * GENPD_FLAG_MIN_RESIDENCY: Enable the genpd governor to consider its * components' next wakeup when determining the * optimal idle state. + * + * GENPD_FLAG_OPP_TABLE_FW: The genpd provider supports performance states, + * but its corresponding OPP tables are not + * described in DT, but are given directly by FW. */ #define GENPD_FLAG_PM_CLK (1U << 0) #define GENPD_FLAG_IRQ_SAFE (1U << 1) @@ -69,6 +73,7 @@ #define GENPD_FLAG_CPU_DOMAIN (1U << 4) #define GENPD_FLAG_RPM_ALWAYS_ON (1U << 5) #define GENPD_FLAG_MIN_RESIDENCY (1U << 6) +#define GENPD_FLAG_OPP_TABLE_FW (1U << 7) enum gpd_status { GENPD_STATE_ON = 0, /* PM domain is on */ -- cgit v1.2.3 From 9f6c532f59b20580acf8ede9409c9b8dce6e74e1 Mon Sep 17 00:00:00 2001 From: "peterz@infradead.org" Date: Thu, 21 Sep 2023 12:45:10 +0200 Subject: futex: Add sys_futex_wake() To complement sys_futex_waitv() add sys_futex_wake(). This syscall implements what was previously known as FUTEX_WAKE_BITSET except it uses 'unsigned long' for the bitmask and takes FUTEX2 flags. The 'unsigned long' allows FUTEX2_SIZE_U64 on 64bit platforms. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Thomas Gleixner Acked-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20230921105247.936205525@noisy.programming.kicks-ass.net --- arch/alpha/kernel/syscalls/syscall.tbl | 1 + arch/arm/tools/syscall.tbl | 1 + arch/arm64/include/asm/unistd.h | 2 +- arch/arm64/include/asm/unistd32.h | 2 ++ arch/ia64/kernel/syscalls/syscall.tbl | 1 + arch/m68k/kernel/syscalls/syscall.tbl | 1 + arch/microblaze/kernel/syscalls/syscall.tbl | 1 + arch/mips/kernel/syscalls/syscall_n32.tbl | 1 + arch/mips/kernel/syscalls/syscall_n64.tbl | 1 + arch/mips/kernel/syscalls/syscall_o32.tbl | 1 + arch/parisc/kernel/syscalls/syscall.tbl | 1 + arch/powerpc/kernel/syscalls/syscall.tbl | 1 + arch/s390/kernel/syscalls/syscall.tbl | 1 + arch/sh/kernel/syscalls/syscall.tbl | 1 + arch/sparc/kernel/syscalls/syscall.tbl | 1 + arch/x86/entry/syscalls/syscall_32.tbl | 1 + arch/x86/entry/syscalls/syscall_64.tbl | 1 + arch/xtensa/kernel/syscalls/syscall.tbl | 1 + include/linux/syscalls.h | 3 +++ include/uapi/asm-generic/unistd.h | 4 +++- kernel/futex/syscalls.c | 30 +++++++++++++++++++++++++++++ kernel/sys_ni.c | 1 + 22 files changed, 56 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/arch/alpha/kernel/syscalls/syscall.tbl b/arch/alpha/kernel/syscalls/syscall.tbl index ad37569d0507..3b86519d68e4 100644 --- a/arch/alpha/kernel/syscalls/syscall.tbl +++ b/arch/alpha/kernel/syscalls/syscall.tbl @@ -492,3 +492,4 @@ 560 common set_mempolicy_home_node sys_ni_syscall 561 common cachestat sys_cachestat 562 common fchmodat2 sys_fchmodat2 +563 common futex_wake sys_futex_wake diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl index c572d6c3dee0..714abeb1e6fa 100644 --- a/arch/arm/tools/syscall.tbl +++ b/arch/arm/tools/syscall.tbl @@ -466,3 +466,4 @@ 450 common set_mempolicy_home_node sys_set_mempolicy_home_node 451 common cachestat sys_cachestat 452 common fchmodat2 sys_fchmodat2 +454 common futex_wake sys_futex_wake diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h index bd77253b62e0..63a8a9c4abc1 100644 --- a/arch/arm64/include/asm/unistd.h +++ b/arch/arm64/include/asm/unistd.h @@ -39,7 +39,7 @@ #define __ARM_NR_compat_set_tls (__ARM_NR_COMPAT_BASE + 5) #define __ARM_NR_COMPAT_END (__ARM_NR_COMPAT_BASE + 0x800) -#define __NR_compat_syscalls 453 +#define __NR_compat_syscalls 455 #endif #define __ARCH_WANT_SYS_CLONE diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h index 78b68311ec81..68974683737b 100644 --- a/arch/arm64/include/asm/unistd32.h +++ b/arch/arm64/include/asm/unistd32.h @@ -911,6 +911,8 @@ __SYSCALL(__NR_set_mempolicy_home_node, sys_set_mempolicy_home_node) __SYSCALL(__NR_cachestat, sys_cachestat) #define __NR_fchmodat2 452 __SYSCALL(__NR_fchmodat2, sys_fchmodat2) +#define __NR_futex_wake 454 +__SYSCALL(__NR_futex_wake, sys_futex_wake) /* * Please add new compat syscalls above this comment and update diff --git a/arch/ia64/kernel/syscalls/syscall.tbl b/arch/ia64/kernel/syscalls/syscall.tbl index 83d8609aec03..cd50247508e6 100644 --- a/arch/ia64/kernel/syscalls/syscall.tbl +++ b/arch/ia64/kernel/syscalls/syscall.tbl @@ -373,3 +373,4 @@ 450 common set_mempolicy_home_node sys_set_mempolicy_home_node 451 common cachestat sys_cachestat 452 common fchmodat2 sys_fchmodat2 +454 common futex_wake sys_futex_wake diff --git a/arch/m68k/kernel/syscalls/syscall.tbl b/arch/m68k/kernel/syscalls/syscall.tbl index 259ceb125367..21eb35c693e1 100644 --- a/arch/m68k/kernel/syscalls/syscall.tbl +++ b/arch/m68k/kernel/syscalls/syscall.tbl @@ -452,3 +452,4 @@ 450 common set_mempolicy_home_node sys_set_mempolicy_home_node 451 common cachestat sys_cachestat 452 common fchmodat2 sys_fchmodat2 +454 common futex_wake sys_futex_wake diff --git a/arch/microblaze/kernel/syscalls/syscall.tbl b/arch/microblaze/kernel/syscalls/syscall.tbl index a3798c2637fd..3a4e8513a8e1 100644 --- a/arch/microblaze/kernel/syscalls/syscall.tbl +++ b/arch/microblaze/kernel/syscalls/syscall.tbl @@ -458,3 +458,4 @@ 450 common set_mempolicy_home_node sys_set_mempolicy_home_node 451 common cachestat sys_cachestat 452 common fchmodat2 sys_fchmodat2 +454 common futex_wake sys_futex_wake diff --git a/arch/mips/kernel/syscalls/syscall_n32.tbl b/arch/mips/kernel/syscalls/syscall_n32.tbl index 152034b8e0a0..6883ea3b830d 100644 --- a/arch/mips/kernel/syscalls/syscall_n32.tbl +++ b/arch/mips/kernel/syscalls/syscall_n32.tbl @@ -391,3 +391,4 @@ 450 n32 set_mempolicy_home_node sys_set_mempolicy_home_node 451 n32 cachestat sys_cachestat 452 n32 fchmodat2 sys_fchmodat2 +454 n32 futex_wake sys_futex_wake diff --git a/arch/mips/kernel/syscalls/syscall_n64.tbl b/arch/mips/kernel/syscalls/syscall_n64.tbl index cb5e757f6621..48bc0fb4e3dc 100644 --- a/arch/mips/kernel/syscalls/syscall_n64.tbl +++ b/arch/mips/kernel/syscalls/syscall_n64.tbl @@ -367,3 +367,4 @@ 450 common set_mempolicy_home_node sys_set_mempolicy_home_node 451 n64 cachestat sys_cachestat 452 n64 fchmodat2 sys_fchmodat2 +454 n64 futex_wake sys_futex_wake diff --git a/arch/mips/kernel/syscalls/syscall_o32.tbl b/arch/mips/kernel/syscalls/syscall_o32.tbl index 1a646813afdc..a92625f5bad8 100644 --- a/arch/mips/kernel/syscalls/syscall_o32.tbl +++ b/arch/mips/kernel/syscalls/syscall_o32.tbl @@ -440,3 +440,4 @@ 450 o32 set_mempolicy_home_node sys_set_mempolicy_home_node 451 o32 cachestat sys_cachestat 452 o32 fchmodat2 sys_fchmodat2 +454 o32 futex_wake sys_futex_wake diff --git a/arch/parisc/kernel/syscalls/syscall.tbl b/arch/parisc/kernel/syscalls/syscall.tbl index e97c175b56f9..57faa9786ffe 100644 --- a/arch/parisc/kernel/syscalls/syscall.tbl +++ b/arch/parisc/kernel/syscalls/syscall.tbl @@ -451,3 +451,4 @@ 450 common set_mempolicy_home_node sys_set_mempolicy_home_node 451 common cachestat sys_cachestat 452 common fchmodat2 sys_fchmodat2 +454 common futex_wake sys_futex_wake diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl index 20e50586e8a2..e6c6ed6b30ee 100644 --- a/arch/powerpc/kernel/syscalls/syscall.tbl +++ b/arch/powerpc/kernel/syscalls/syscall.tbl @@ -539,3 +539,4 @@ 450 nospu set_mempolicy_home_node sys_set_mempolicy_home_node 451 common cachestat sys_cachestat 452 common fchmodat2 sys_fchmodat2 +454 common futex_wake sys_futex_wake diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl index 0122cc156952..754720154dc1 100644 --- a/arch/s390/kernel/syscalls/syscall.tbl +++ b/arch/s390/kernel/syscalls/syscall.tbl @@ -455,3 +455,4 @@ 450 common set_mempolicy_home_node sys_set_mempolicy_home_node sys_set_mempolicy_home_node 451 common cachestat sys_cachestat sys_cachestat 452 common fchmodat2 sys_fchmodat2 sys_fchmodat2 +454 common futex_wake sys_futex_wake sys_futex_wake diff --git a/arch/sh/kernel/syscalls/syscall.tbl b/arch/sh/kernel/syscalls/syscall.tbl index e90d585c4d3e..902a997e7ec6 100644 --- a/arch/sh/kernel/syscalls/syscall.tbl +++ b/arch/sh/kernel/syscalls/syscall.tbl @@ -455,3 +455,4 @@ 450 common set_mempolicy_home_node sys_set_mempolicy_home_node 451 common cachestat sys_cachestat 452 common fchmodat2 sys_fchmodat2 +454 common futex_wake sys_futex_wake diff --git a/arch/sparc/kernel/syscalls/syscall.tbl b/arch/sparc/kernel/syscalls/syscall.tbl index 4ed06c71c43f..8a1f887c8be6 100644 --- a/arch/sparc/kernel/syscalls/syscall.tbl +++ b/arch/sparc/kernel/syscalls/syscall.tbl @@ -498,3 +498,4 @@ 450 common set_mempolicy_home_node sys_set_mempolicy_home_node 451 common cachestat sys_cachestat 452 common fchmodat2 sys_fchmodat2 +454 common futex_wake sys_futex_wake diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl index 2d0b1bd866ea..9e81323979b0 100644 --- a/arch/x86/entry/syscalls/syscall_32.tbl +++ b/arch/x86/entry/syscalls/syscall_32.tbl @@ -457,3 +457,4 @@ 450 i386 set_mempolicy_home_node sys_set_mempolicy_home_node 451 i386 cachestat sys_cachestat 452 i386 fchmodat2 sys_fchmodat2 +454 i386 futex_wake sys_futex_wake diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl index 1d6eee30eceb..d10a6003a7c9 100644 --- a/arch/x86/entry/syscalls/syscall_64.tbl +++ b/arch/x86/entry/syscalls/syscall_64.tbl @@ -375,6 +375,7 @@ 451 common cachestat sys_cachestat 452 common fchmodat2 sys_fchmodat2 453 64 map_shadow_stack sys_map_shadow_stack +454 common futex_wake sys_futex_wake # # Due to a historical design error, certain syscalls are numbered differently diff --git a/arch/xtensa/kernel/syscalls/syscall.tbl b/arch/xtensa/kernel/syscalls/syscall.tbl index fc1a4f3c81d9..4e511bfd4b8f 100644 --- a/arch/xtensa/kernel/syscalls/syscall.tbl +++ b/arch/xtensa/kernel/syscalls/syscall.tbl @@ -423,3 +423,4 @@ 450 common set_mempolicy_home_node sys_set_mempolicy_home_node 451 common cachestat sys_cachestat 452 common fchmodat2 sys_fchmodat2 +454 common futex_wake sys_futex_wake diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 22bc6bc147f8..e174ed86da1d 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -549,6 +549,9 @@ asmlinkage long sys_set_robust_list(struct robust_list_head __user *head, asmlinkage long sys_futex_waitv(struct futex_waitv *waiters, unsigned int nr_futexes, unsigned int flags, struct __kernel_timespec __user *timeout, clockid_t clockid); + +asmlinkage long sys_futex_wake(void __user *uaddr, unsigned long mask, int nr, unsigned int flags); + asmlinkage long sys_nanosleep(struct __kernel_timespec __user *rqtp, struct __kernel_timespec __user *rmtp); asmlinkage long sys_nanosleep_time32(struct old_timespec32 __user *rqtp, diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h index abe087c53b4b..f5454e6f4c6f 100644 --- a/include/uapi/asm-generic/unistd.h +++ b/include/uapi/asm-generic/unistd.h @@ -822,9 +822,11 @@ __SYSCALL(__NR_cachestat, sys_cachestat) #define __NR_fchmodat2 452 __SYSCALL(__NR_fchmodat2, sys_fchmodat2) +#define __NR_futex_wake 454 +__SYSCALL(__NR_futex_wake, sys_futex_wake) #undef __NR_syscalls -#define __NR_syscalls 453 +#define __NR_syscalls 455 /* * 32 bit systems traditionally used different diff --git a/kernel/futex/syscalls.c b/kernel/futex/syscalls.c index 2339f9ccee7f..7049a52ef68e 100644 --- a/kernel/futex/syscalls.c +++ b/kernel/futex/syscalls.c @@ -306,6 +306,36 @@ destroy_timer: return ret; } +/* + * sys_futex_wake - Wake a number of futexes + * @uaddr: Address of the futex(es) to wake + * @mask: bitmask + * @nr: Number of the futexes to wake + * @flags: FUTEX2 flags + * + * Identical to the traditional FUTEX_WAKE_BITSET op, except it is part of the + * futex2 family of calls. + */ + +SYSCALL_DEFINE4(futex_wake, + void __user *, uaddr, + unsigned long, mask, + int, nr, + unsigned int, flags) +{ + if (flags & ~FUTEX2_VALID_MASK) + return -EINVAL; + + flags = futex2_to_flags(flags); + if (!futex_flags_valid(flags)) + return -EINVAL; + + if (!futex_validate_input(flags, mask)) + return -EINVAL; + + return futex_wake(uaddr, flags, nr, mask); +} + #ifdef CONFIG_COMPAT COMPAT_SYSCALL_DEFINE2(set_robust_list, struct compat_robust_list_head __user *, head, diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c index e137c1385c56..983c0583c627 100644 --- a/kernel/sys_ni.c +++ b/kernel/sys_ni.c @@ -87,6 +87,7 @@ COND_SYSCALL_COMPAT(set_robust_list); COND_SYSCALL(get_robust_list); COND_SYSCALL_COMPAT(get_robust_list); COND_SYSCALL(futex_waitv); +COND_SYSCALL(futex_wake); COND_SYSCALL(kexec_load); COND_SYSCALL_COMPAT(kexec_load); COND_SYSCALL(init_module); -- cgit v1.2.3 From cb8c4312afca1b2dc64107e7e7cea81911055612 Mon Sep 17 00:00:00 2001 From: "peterz@infradead.org" Date: Thu, 21 Sep 2023 12:45:12 +0200 Subject: futex: Add sys_futex_wait() To complement sys_futex_waitv()/wake(), add sys_futex_wait(). This syscall implements what was previously known as FUTEX_WAIT_BITSET except it uses 'unsigned long' for the value and bitmask arguments, takes timespec and clockid_t arguments for the absolute timeout and uses FUTEX2 flags. The 'unsigned long' allows FUTEX2_SIZE_U64 on 64bit platforms. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Thomas Gleixner Acked-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20230921105248.164324363@noisy.programming.kicks-ass.net --- arch/alpha/kernel/syscalls/syscall.tbl | 1 + arch/arm/tools/syscall.tbl | 1 + arch/arm64/include/asm/unistd.h | 2 +- arch/arm64/include/asm/unistd32.h | 2 + arch/ia64/kernel/syscalls/syscall.tbl | 1 + arch/m68k/kernel/syscalls/syscall.tbl | 1 + arch/microblaze/kernel/syscalls/syscall.tbl | 1 + arch/mips/kernel/syscalls/syscall_n32.tbl | 1 + arch/mips/kernel/syscalls/syscall_n64.tbl | 1 + arch/mips/kernel/syscalls/syscall_o32.tbl | 1 + arch/parisc/kernel/syscalls/syscall.tbl | 1 + arch/powerpc/kernel/syscalls/syscall.tbl | 1 + arch/s390/kernel/syscalls/syscall.tbl | 1 + arch/sh/kernel/syscalls/syscall.tbl | 1 + arch/sparc/kernel/syscalls/syscall.tbl | 1 + arch/x86/entry/syscalls/syscall_32.tbl | 1 + arch/x86/entry/syscalls/syscall_64.tbl | 1 + arch/xtensa/kernel/syscalls/syscall.tbl | 1 + include/linux/syscalls.h | 4 + include/uapi/asm-generic/unistd.h | 4 +- kernel/futex/futex.h | 3 + kernel/futex/syscalls.c | 120 +++++++++++++++++++++------- kernel/futex/waitwake.c | 61 ++++++++------ kernel/sys_ni.c | 1 + 24 files changed, 156 insertions(+), 57 deletions(-) (limited to 'include/linux') diff --git a/arch/alpha/kernel/syscalls/syscall.tbl b/arch/alpha/kernel/syscalls/syscall.tbl index 3b86519d68e4..c49f12fd264e 100644 --- a/arch/alpha/kernel/syscalls/syscall.tbl +++ b/arch/alpha/kernel/syscalls/syscall.tbl @@ -493,3 +493,4 @@ 561 common cachestat sys_cachestat 562 common fchmodat2 sys_fchmodat2 563 common futex_wake sys_futex_wake +564 common futex_wait sys_futex_wait diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl index 714abeb1e6fa..a6cf56277327 100644 --- a/arch/arm/tools/syscall.tbl +++ b/arch/arm/tools/syscall.tbl @@ -467,3 +467,4 @@ 451 common cachestat sys_cachestat 452 common fchmodat2 sys_fchmodat2 454 common futex_wake sys_futex_wake +455 common futex_wait sys_futex_wait diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h index 63a8a9c4abc1..f33190f17ebb 100644 --- a/arch/arm64/include/asm/unistd.h +++ b/arch/arm64/include/asm/unistd.h @@ -39,7 +39,7 @@ #define __ARM_NR_compat_set_tls (__ARM_NR_COMPAT_BASE + 5) #define __ARM_NR_COMPAT_END (__ARM_NR_COMPAT_BASE + 0x800) -#define __NR_compat_syscalls 455 +#define __NR_compat_syscalls 456 #endif #define __ARCH_WANT_SYS_CLONE diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h index 68974683737b..6e7d37282ba1 100644 --- a/arch/arm64/include/asm/unistd32.h +++ b/arch/arm64/include/asm/unistd32.h @@ -913,6 +913,8 @@ __SYSCALL(__NR_cachestat, sys_cachestat) __SYSCALL(__NR_fchmodat2, sys_fchmodat2) #define __NR_futex_wake 454 __SYSCALL(__NR_futex_wake, sys_futex_wake) +#define __NR_futex_wait 455 +__SYSCALL(__NR_futex_wait, sys_futex_wait) /* * Please add new compat syscalls above this comment and update diff --git a/arch/ia64/kernel/syscalls/syscall.tbl b/arch/ia64/kernel/syscalls/syscall.tbl index cd50247508e6..4043f0c55170 100644 --- a/arch/ia64/kernel/syscalls/syscall.tbl +++ b/arch/ia64/kernel/syscalls/syscall.tbl @@ -374,3 +374,4 @@ 451 common cachestat sys_cachestat 452 common fchmodat2 sys_fchmodat2 454 common futex_wake sys_futex_wake +455 common futex_wait sys_futex_wait diff --git a/arch/m68k/kernel/syscalls/syscall.tbl b/arch/m68k/kernel/syscalls/syscall.tbl index 21eb35c693e1..24841674acc5 100644 --- a/arch/m68k/kernel/syscalls/syscall.tbl +++ b/arch/m68k/kernel/syscalls/syscall.tbl @@ -453,3 +453,4 @@ 451 common cachestat sys_cachestat 452 common fchmodat2 sys_fchmodat2 454 common futex_wake sys_futex_wake +455 common futex_wait sys_futex_wait diff --git a/arch/microblaze/kernel/syscalls/syscall.tbl b/arch/microblaze/kernel/syscalls/syscall.tbl index 3a4e8513a8e1..f03927ab0220 100644 --- a/arch/microblaze/kernel/syscalls/syscall.tbl +++ b/arch/microblaze/kernel/syscalls/syscall.tbl @@ -459,3 +459,4 @@ 451 common cachestat sys_cachestat 452 common fchmodat2 sys_fchmodat2 454 common futex_wake sys_futex_wake +455 common futex_wait sys_futex_wait diff --git a/arch/mips/kernel/syscalls/syscall_n32.tbl b/arch/mips/kernel/syscalls/syscall_n32.tbl index 6883ea3b830d..dbb5edfb667b 100644 --- a/arch/mips/kernel/syscalls/syscall_n32.tbl +++ b/arch/mips/kernel/syscalls/syscall_n32.tbl @@ -392,3 +392,4 @@ 451 n32 cachestat sys_cachestat 452 n32 fchmodat2 sys_fchmodat2 454 n32 futex_wake sys_futex_wake +455 n32 futex_wait sys_futex_wait diff --git a/arch/mips/kernel/syscalls/syscall_n64.tbl b/arch/mips/kernel/syscalls/syscall_n64.tbl index 48bc0fb4e3dc..faff8dfd2983 100644 --- a/arch/mips/kernel/syscalls/syscall_n64.tbl +++ b/arch/mips/kernel/syscalls/syscall_n64.tbl @@ -368,3 +368,4 @@ 451 n64 cachestat sys_cachestat 452 n64 fchmodat2 sys_fchmodat2 454 n64 futex_wake sys_futex_wake +455 n64 futex_wait sys_futex_wait diff --git a/arch/mips/kernel/syscalls/syscall_o32.tbl b/arch/mips/kernel/syscalls/syscall_o32.tbl index a92625f5bad8..542f75605b3e 100644 --- a/arch/mips/kernel/syscalls/syscall_o32.tbl +++ b/arch/mips/kernel/syscalls/syscall_o32.tbl @@ -441,3 +441,4 @@ 451 o32 cachestat sys_cachestat 452 o32 fchmodat2 sys_fchmodat2 454 o32 futex_wake sys_futex_wake +455 o32 futex_wait sys_futex_wait diff --git a/arch/parisc/kernel/syscalls/syscall.tbl b/arch/parisc/kernel/syscalls/syscall.tbl index 57faa9786ffe..8e50e89551f7 100644 --- a/arch/parisc/kernel/syscalls/syscall.tbl +++ b/arch/parisc/kernel/syscalls/syscall.tbl @@ -452,3 +452,4 @@ 451 common cachestat sys_cachestat 452 common fchmodat2 sys_fchmodat2 454 common futex_wake sys_futex_wake +455 common futex_wait sys_futex_wait diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl index e6c6ed6b30ee..ad33a9993a6a 100644 --- a/arch/powerpc/kernel/syscalls/syscall.tbl +++ b/arch/powerpc/kernel/syscalls/syscall.tbl @@ -540,3 +540,4 @@ 451 common cachestat sys_cachestat 452 common fchmodat2 sys_fchmodat2 454 common futex_wake sys_futex_wake +455 common futex_wait sys_futex_wait diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl index 754720154dc1..418853fd2a6b 100644 --- a/arch/s390/kernel/syscalls/syscall.tbl +++ b/arch/s390/kernel/syscalls/syscall.tbl @@ -456,3 +456,4 @@ 451 common cachestat sys_cachestat sys_cachestat 452 common fchmodat2 sys_fchmodat2 sys_fchmodat2 454 common futex_wake sys_futex_wake sys_futex_wake +455 common futex_wait sys_futex_wait sys_futex_wait diff --git a/arch/sh/kernel/syscalls/syscall.tbl b/arch/sh/kernel/syscalls/syscall.tbl index 902a997e7ec6..8ef9557d2779 100644 --- a/arch/sh/kernel/syscalls/syscall.tbl +++ b/arch/sh/kernel/syscalls/syscall.tbl @@ -456,3 +456,4 @@ 451 common cachestat sys_cachestat 452 common fchmodat2 sys_fchmodat2 454 common futex_wake sys_futex_wake +455 common futex_wait sys_futex_wait diff --git a/arch/sparc/kernel/syscalls/syscall.tbl b/arch/sparc/kernel/syscalls/syscall.tbl index 8a1f887c8be6..df59a9d5f109 100644 --- a/arch/sparc/kernel/syscalls/syscall.tbl +++ b/arch/sparc/kernel/syscalls/syscall.tbl @@ -499,3 +499,4 @@ 451 common cachestat sys_cachestat 452 common fchmodat2 sys_fchmodat2 454 common futex_wake sys_futex_wake +455 common futex_wait sys_futex_wait diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl index 9e81323979b0..0f6616822bd5 100644 --- a/arch/x86/entry/syscalls/syscall_32.tbl +++ b/arch/x86/entry/syscalls/syscall_32.tbl @@ -458,3 +458,4 @@ 451 i386 cachestat sys_cachestat 452 i386 fchmodat2 sys_fchmodat2 454 i386 futex_wake sys_futex_wake +455 i386 futex_wait sys_futex_wait diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl index d10a6003a7c9..ddf6288823ad 100644 --- a/arch/x86/entry/syscalls/syscall_64.tbl +++ b/arch/x86/entry/syscalls/syscall_64.tbl @@ -376,6 +376,7 @@ 452 common fchmodat2 sys_fchmodat2 453 64 map_shadow_stack sys_map_shadow_stack 454 common futex_wake sys_futex_wake +455 common futex_wait sys_futex_wait # # Due to a historical design error, certain syscalls are numbered differently diff --git a/arch/xtensa/kernel/syscalls/syscall.tbl b/arch/xtensa/kernel/syscalls/syscall.tbl index 4e511bfd4b8f..ac278dbce2ee 100644 --- a/arch/xtensa/kernel/syscalls/syscall.tbl +++ b/arch/xtensa/kernel/syscalls/syscall.tbl @@ -424,3 +424,4 @@ 451 common cachestat sys_cachestat 452 common fchmodat2 sys_fchmodat2 454 common futex_wake sys_futex_wake +455 common futex_wait sys_futex_wait diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index e174ed86da1d..11f3fdd1ee03 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -552,6 +552,10 @@ asmlinkage long sys_futex_waitv(struct futex_waitv *waiters, asmlinkage long sys_futex_wake(void __user *uaddr, unsigned long mask, int nr, unsigned int flags); +asmlinkage long sys_futex_wait(void __user *uaddr, unsigned long val, unsigned long mask, + unsigned int flags, struct __kernel_timespec __user *timespec, + clockid_t clockid); + asmlinkage long sys_nanosleep(struct __kernel_timespec __user *rqtp, struct __kernel_timespec __user *rmtp); asmlinkage long sys_nanosleep_time32(struct old_timespec32 __user *rqtp, diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h index f5454e6f4c6f..f6553bd5d213 100644 --- a/include/uapi/asm-generic/unistd.h +++ b/include/uapi/asm-generic/unistd.h @@ -824,9 +824,11 @@ __SYSCALL(__NR_cachestat, sys_cachestat) __SYSCALL(__NR_fchmodat2, sys_fchmodat2) #define __NR_futex_wake 454 __SYSCALL(__NR_futex_wake, sys_futex_wake) +#define __NR_futex_wait 455 +__SYSCALL(__NR_futex_wait, sys_futex_wait) #undef __NR_syscalls -#define __NR_syscalls 455 +#define __NR_syscalls 456 /* * 32 bit systems traditionally used different diff --git a/kernel/futex/futex.h b/kernel/futex/futex.h index 0e7821a944a2..e74888a7d71d 100644 --- a/kernel/futex/futex.h +++ b/kernel/futex/futex.h @@ -332,6 +332,9 @@ extern int futex_requeue(u32 __user *uaddr1, unsigned int flags, u32 __user *uaddr2, int nr_wake, int nr_requeue, u32 *cmpval, int requeue_pi); +extern int __futex_wait(u32 __user *uaddr, unsigned int flags, u32 val, + struct hrtimer_sleeper *to, u32 bitset); + extern int futex_wait(u32 __user *uaddr, unsigned int flags, u32 val, ktime_t *abs_time, u32 bitset); diff --git a/kernel/futex/syscalls.c b/kernel/futex/syscalls.c index 47398926765e..e4c8ec713787 100644 --- a/kernel/futex/syscalls.c +++ b/kernel/futex/syscalls.c @@ -221,6 +221,46 @@ static int futex_parse_waitv(struct futex_vector *futexv, return 0; } +static int futex2_setup_timeout(struct __kernel_timespec __user *timeout, + clockid_t clockid, struct hrtimer_sleeper *to) +{ + int flag_clkid = 0, flag_init = 0; + struct timespec64 ts; + ktime_t time; + int ret; + + if (!timeout) + return 0; + + if (clockid == CLOCK_REALTIME) { + flag_clkid = FLAGS_CLOCKRT; + flag_init = FUTEX_CLOCK_REALTIME; + } + + if (clockid != CLOCK_REALTIME && clockid != CLOCK_MONOTONIC) + return -EINVAL; + + if (get_timespec64(&ts, timeout)) + return -EFAULT; + + /* + * Since there's no opcode for futex_waitv, use + * FUTEX_WAIT_BITSET that uses absolute timeout as well + */ + ret = futex_init_timeout(FUTEX_WAIT_BITSET, flag_init, &ts, &time); + if (ret) + return ret; + + futex_setup_timer(&time, to, flag_clkid, 0); + return 0; +} + +static inline void futex2_destroy_timeout(struct hrtimer_sleeper *to) +{ + hrtimer_cancel(&to->timer); + destroy_hrtimer_on_stack(&to->timer); +} + /** * sys_futex_waitv - Wait on a list of futexes * @waiters: List of futexes to wait on @@ -250,8 +290,6 @@ SYSCALL_DEFINE5(futex_waitv, struct futex_waitv __user *, waiters, { struct hrtimer_sleeper to; struct futex_vector *futexv; - struct timespec64 ts; - ktime_t time; int ret; /* This syscall supports no flags for now */ @@ -261,30 +299,8 @@ SYSCALL_DEFINE5(futex_waitv, struct futex_waitv __user *, waiters, if (!nr_futexes || nr_futexes > FUTEX_WAITV_MAX || !waiters) return -EINVAL; - if (timeout) { - int flag_clkid = 0, flag_init = 0; - - if (clockid == CLOCK_REALTIME) { - flag_clkid = FLAGS_CLOCKRT; - flag_init = FUTEX_CLOCK_REALTIME; - } - - if (clockid != CLOCK_REALTIME && clockid != CLOCK_MONOTONIC) - return -EINVAL; - - if (get_timespec64(&ts, timeout)) - return -EFAULT; - - /* - * Since there's no opcode for futex_waitv, use - * FUTEX_WAIT_BITSET that uses absolute timeout as well - */ - ret = futex_init_timeout(FUTEX_WAIT_BITSET, flag_init, &ts, &time); - if (ret) - return ret; - - futex_setup_timer(&time, &to, flag_clkid, 0); - } + if (timeout && (ret = futex2_setup_timeout(timeout, clockid, &to))) + return ret; futexv = kcalloc(nr_futexes, sizeof(*futexv), GFP_KERNEL); if (!futexv) { @@ -299,10 +315,8 @@ SYSCALL_DEFINE5(futex_waitv, struct futex_waitv __user *, waiters, kfree(futexv); destroy_timer: - if (timeout) { - hrtimer_cancel(&to.timer); - destroy_hrtimer_on_stack(&to.timer); - } + if (timeout) + futex2_destroy_timeout(&to); return ret; } @@ -336,6 +350,52 @@ SYSCALL_DEFINE4(futex_wake, return futex_wake(uaddr, FLAGS_STRICT | flags, nr, mask); } +/* + * sys_futex_wait - Wait on a futex + * @uaddr: Address of the futex to wait on + * @val: Value of @uaddr + * @mask: bitmask + * @flags: FUTEX2 flags + * @timeout: Optional absolute timeout + * @clockid: Clock to be used for the timeout, realtime or monotonic + * + * Identical to the traditional FUTEX_WAIT_BITSET op, except it is part of the + * futex2 familiy of calls. + */ + +SYSCALL_DEFINE6(futex_wait, + void __user *, uaddr, + unsigned long, val, + unsigned long, mask, + unsigned int, flags, + struct __kernel_timespec __user *, timeout, + clockid_t, clockid) +{ + struct hrtimer_sleeper to; + int ret; + + if (flags & ~FUTEX2_VALID_MASK) + return -EINVAL; + + flags = futex2_to_flags(flags); + if (!futex_flags_valid(flags)) + return -EINVAL; + + if (!futex_validate_input(flags, val) || + !futex_validate_input(flags, mask)) + return -EINVAL; + + if (timeout && (ret = futex2_setup_timeout(timeout, clockid, &to))) + return ret; + + ret = __futex_wait(uaddr, flags, val, timeout ? &to : NULL, mask); + + if (timeout) + futex2_destroy_timeout(&to); + + return ret; +} + #ifdef CONFIG_COMPAT COMPAT_SYSCALL_DEFINE2(set_robust_list, struct compat_robust_list_head __user *, head, diff --git a/kernel/futex/waitwake.c b/kernel/futex/waitwake.c index ceb05b876597..b109a0810a2c 100644 --- a/kernel/futex/waitwake.c +++ b/kernel/futex/waitwake.c @@ -632,20 +632,18 @@ retry_private: return ret; } -int futex_wait(u32 __user *uaddr, unsigned int flags, u32 val, ktime_t *abs_time, u32 bitset) +int __futex_wait(u32 __user *uaddr, unsigned int flags, u32 val, + struct hrtimer_sleeper *to, u32 bitset) { - struct hrtimer_sleeper timeout, *to; - struct restart_block *restart; - struct futex_hash_bucket *hb; struct futex_q q = futex_q_init; + struct futex_hash_bucket *hb; int ret; if (!bitset) return -EINVAL; + q.bitset = bitset; - to = futex_setup_timer(abs_time, &timeout, flags, - current->timer_slack_ns); retry: /* * Prepare to wait on uaddr. On success, it holds hb->lock and q @@ -653,18 +651,17 @@ retry: */ ret = futex_wait_setup(uaddr, val, flags, &q, &hb); if (ret) - goto out; + return ret; /* futex_queue and wait for wakeup, timeout, or a signal. */ futex_wait_queue(hb, &q, to); /* If we were woken (and unqueued), we succeeded, whatever. */ - ret = 0; if (!futex_unqueue(&q)) - goto out; - ret = -ETIMEDOUT; + return 0; + if (to && !to->task) - goto out; + return -ETIMEDOUT; /* * We expect signal_pending(current), but we might be the @@ -673,24 +670,38 @@ retry: if (!signal_pending(current)) goto retry; - ret = -ERESTARTSYS; - if (!abs_time) - goto out; + return -ERESTARTSYS; +} - restart = ¤t->restart_block; - restart->futex.uaddr = uaddr; - restart->futex.val = val; - restart->futex.time = *abs_time; - restart->futex.bitset = bitset; - restart->futex.flags = flags | FLAGS_HAS_TIMEOUT; +int futex_wait(u32 __user *uaddr, unsigned int flags, u32 val, ktime_t *abs_time, u32 bitset) +{ + struct hrtimer_sleeper timeout, *to; + struct restart_block *restart; + int ret; + + to = futex_setup_timer(abs_time, &timeout, flags, + current->timer_slack_ns); + + ret = __futex_wait(uaddr, flags, val, to, bitset); + + /* No timeout, nothing to clean up. */ + if (!to) + return ret; + + hrtimer_cancel(&to->timer); + destroy_hrtimer_on_stack(&to->timer); - ret = set_restart_fn(restart, futex_wait_restart); + if (ret == -ERESTARTSYS) { + restart = ¤t->restart_block; + restart->futex.uaddr = uaddr; + restart->futex.val = val; + restart->futex.time = *abs_time; + restart->futex.bitset = bitset; + restart->futex.flags = flags | FLAGS_HAS_TIMEOUT; -out: - if (to) { - hrtimer_cancel(&to->timer); - destroy_hrtimer_on_stack(&to->timer); + return set_restart_fn(restart, futex_wait_restart); } + return ret; } diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c index 983c0583c627..13df391194e2 100644 --- a/kernel/sys_ni.c +++ b/kernel/sys_ni.c @@ -88,6 +88,7 @@ COND_SYSCALL(get_robust_list); COND_SYSCALL_COMPAT(get_robust_list); COND_SYSCALL(futex_waitv); COND_SYSCALL(futex_wake); +COND_SYSCALL(futex_wait); COND_SYSCALL(kexec_load); COND_SYSCALL_COMPAT(kexec_load); COND_SYSCALL(init_module); -- cgit v1.2.3 From 0f4b5f972216782a4acb1ae00dcb55173847c2ff Mon Sep 17 00:00:00 2001 From: "peterz@infradead.org" Date: Thu, 21 Sep 2023 12:45:15 +0200 Subject: futex: Add sys_futex_requeue() Finish off the 'simple' futex2 syscall group by adding sys_futex_requeue(). Unlike sys_futex_{wait,wake}() its arguments are too numerous to fit into a regular syscall. As such, use struct futex_waitv to pass the 'source' and 'destination' futexes to the syscall. This syscall implements what was previously known as FUTEX_CMP_REQUEUE and uses {val, uaddr, flags} for source and {uaddr, flags} for destination. This design explicitly allows requeueing between different types of futex by having a different flags word per uaddr. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Thomas Gleixner Acked-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20230921105248.511860556@noisy.programming.kicks-ass.net --- arch/alpha/kernel/syscalls/syscall.tbl | 1 + arch/arm/tools/syscall.tbl | 1 + arch/arm64/include/asm/unistd.h | 2 +- arch/arm64/include/asm/unistd32.h | 2 ++ arch/ia64/kernel/syscalls/syscall.tbl | 1 + arch/m68k/kernel/syscalls/syscall.tbl | 1 + arch/microblaze/kernel/syscalls/syscall.tbl | 1 + arch/mips/kernel/syscalls/syscall_n32.tbl | 1 + arch/mips/kernel/syscalls/syscall_n64.tbl | 1 + arch/mips/kernel/syscalls/syscall_o32.tbl | 1 + arch/parisc/kernel/syscalls/syscall.tbl | 1 + arch/powerpc/kernel/syscalls/syscall.tbl | 1 + arch/s390/kernel/syscalls/syscall.tbl | 1 + arch/sh/kernel/syscalls/syscall.tbl | 1 + arch/sparc/kernel/syscalls/syscall.tbl | 1 + arch/x86/entry/syscalls/syscall_32.tbl | 1 + arch/x86/entry/syscalls/syscall_64.tbl | 1 + arch/xtensa/kernel/syscalls/syscall.tbl | 1 + include/linux/syscalls.h | 3 +++ include/uapi/asm-generic/unistd.h | 4 ++- kernel/futex/syscalls.c | 38 +++++++++++++++++++++++++++++ kernel/sys_ni.c | 1 + 22 files changed, 64 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/arch/alpha/kernel/syscalls/syscall.tbl b/arch/alpha/kernel/syscalls/syscall.tbl index c49f12fd264e..b1865f9bb31e 100644 --- a/arch/alpha/kernel/syscalls/syscall.tbl +++ b/arch/alpha/kernel/syscalls/syscall.tbl @@ -494,3 +494,4 @@ 562 common fchmodat2 sys_fchmodat2 563 common futex_wake sys_futex_wake 564 common futex_wait sys_futex_wait +565 common futex_requeue sys_futex_requeue diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl index a6cf56277327..93d0d46cbb15 100644 --- a/arch/arm/tools/syscall.tbl +++ b/arch/arm/tools/syscall.tbl @@ -468,3 +468,4 @@ 452 common fchmodat2 sys_fchmodat2 454 common futex_wake sys_futex_wake 455 common futex_wait sys_futex_wait +456 common futex_requeue sys_futex_requeue diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h index f33190f17ebb..531effca5f1f 100644 --- a/arch/arm64/include/asm/unistd.h +++ b/arch/arm64/include/asm/unistd.h @@ -39,7 +39,7 @@ #define __ARM_NR_compat_set_tls (__ARM_NR_COMPAT_BASE + 5) #define __ARM_NR_COMPAT_END (__ARM_NR_COMPAT_BASE + 0x800) -#define __NR_compat_syscalls 456 +#define __NR_compat_syscalls 457 #endif #define __ARCH_WANT_SYS_CLONE diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h index 6e7d37282ba1..c453291154fd 100644 --- a/arch/arm64/include/asm/unistd32.h +++ b/arch/arm64/include/asm/unistd32.h @@ -915,6 +915,8 @@ __SYSCALL(__NR_fchmodat2, sys_fchmodat2) __SYSCALL(__NR_futex_wake, sys_futex_wake) #define __NR_futex_wait 455 __SYSCALL(__NR_futex_wait, sys_futex_wait) +#define __NR_futex_requeue 456 +__SYSCALL(__NR_futex_requeue, sys_futex_requeue) /* * Please add new compat syscalls above this comment and update diff --git a/arch/ia64/kernel/syscalls/syscall.tbl b/arch/ia64/kernel/syscalls/syscall.tbl index 4043f0c55170..81375ea78288 100644 --- a/arch/ia64/kernel/syscalls/syscall.tbl +++ b/arch/ia64/kernel/syscalls/syscall.tbl @@ -375,3 +375,4 @@ 452 common fchmodat2 sys_fchmodat2 454 common futex_wake sys_futex_wake 455 common futex_wait sys_futex_wait +456 common futex_requeue sys_futex_requeue diff --git a/arch/m68k/kernel/syscalls/syscall.tbl b/arch/m68k/kernel/syscalls/syscall.tbl index 24841674acc5..f7f997a88bab 100644 --- a/arch/m68k/kernel/syscalls/syscall.tbl +++ b/arch/m68k/kernel/syscalls/syscall.tbl @@ -454,3 +454,4 @@ 452 common fchmodat2 sys_fchmodat2 454 common futex_wake sys_futex_wake 455 common futex_wait sys_futex_wait +456 common futex_requeue sys_futex_requeue diff --git a/arch/microblaze/kernel/syscalls/syscall.tbl b/arch/microblaze/kernel/syscalls/syscall.tbl index f03927ab0220..2967ec26b978 100644 --- a/arch/microblaze/kernel/syscalls/syscall.tbl +++ b/arch/microblaze/kernel/syscalls/syscall.tbl @@ -460,3 +460,4 @@ 452 common fchmodat2 sys_fchmodat2 454 common futex_wake sys_futex_wake 455 common futex_wait sys_futex_wait +456 common futex_requeue sys_futex_requeue diff --git a/arch/mips/kernel/syscalls/syscall_n32.tbl b/arch/mips/kernel/syscalls/syscall_n32.tbl index dbb5edfb667b..383abb1713f4 100644 --- a/arch/mips/kernel/syscalls/syscall_n32.tbl +++ b/arch/mips/kernel/syscalls/syscall_n32.tbl @@ -393,3 +393,4 @@ 452 n32 fchmodat2 sys_fchmodat2 454 n32 futex_wake sys_futex_wake 455 n32 futex_wait sys_futex_wait +456 n32 futex_requeue sys_futex_requeue diff --git a/arch/mips/kernel/syscalls/syscall_n64.tbl b/arch/mips/kernel/syscalls/syscall_n64.tbl index faff8dfd2983..c9bd09ba905f 100644 --- a/arch/mips/kernel/syscalls/syscall_n64.tbl +++ b/arch/mips/kernel/syscalls/syscall_n64.tbl @@ -369,3 +369,4 @@ 452 n64 fchmodat2 sys_fchmodat2 454 n64 futex_wake sys_futex_wake 455 n64 futex_wait sys_futex_wait +456 n64 futex_requeue sys_futex_requeue diff --git a/arch/mips/kernel/syscalls/syscall_o32.tbl b/arch/mips/kernel/syscalls/syscall_o32.tbl index 542f75605b3e..ba5ef6cea97a 100644 --- a/arch/mips/kernel/syscalls/syscall_o32.tbl +++ b/arch/mips/kernel/syscalls/syscall_o32.tbl @@ -442,3 +442,4 @@ 452 o32 fchmodat2 sys_fchmodat2 454 o32 futex_wake sys_futex_wake 455 o32 futex_wait sys_futex_wait +456 o32 futex_requeue sys_futex_requeue diff --git a/arch/parisc/kernel/syscalls/syscall.tbl b/arch/parisc/kernel/syscalls/syscall.tbl index 8e50e89551f7..9f0f6df55361 100644 --- a/arch/parisc/kernel/syscalls/syscall.tbl +++ b/arch/parisc/kernel/syscalls/syscall.tbl @@ -453,3 +453,4 @@ 452 common fchmodat2 sys_fchmodat2 454 common futex_wake sys_futex_wake 455 common futex_wait sys_futex_wait +456 common futex_requeue sys_futex_requeue diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl index ad33a9993a6a..26fc41904266 100644 --- a/arch/powerpc/kernel/syscalls/syscall.tbl +++ b/arch/powerpc/kernel/syscalls/syscall.tbl @@ -541,3 +541,4 @@ 452 common fchmodat2 sys_fchmodat2 454 common futex_wake sys_futex_wake 455 common futex_wait sys_futex_wait +456 common futex_requeue sys_futex_requeue diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl index 418853fd2a6b..31be90b241f7 100644 --- a/arch/s390/kernel/syscalls/syscall.tbl +++ b/arch/s390/kernel/syscalls/syscall.tbl @@ -457,3 +457,4 @@ 452 common fchmodat2 sys_fchmodat2 sys_fchmodat2 454 common futex_wake sys_futex_wake sys_futex_wake 455 common futex_wait sys_futex_wait sys_futex_wait +456 common futex_requeue sys_futex_requeue sys_futex_requeue diff --git a/arch/sh/kernel/syscalls/syscall.tbl b/arch/sh/kernel/syscalls/syscall.tbl index 8ef9557d2779..4bc5d488ab17 100644 --- a/arch/sh/kernel/syscalls/syscall.tbl +++ b/arch/sh/kernel/syscalls/syscall.tbl @@ -457,3 +457,4 @@ 452 common fchmodat2 sys_fchmodat2 454 common futex_wake sys_futex_wake 455 common futex_wait sys_futex_wait +456 common futex_requeue sys_futex_requeue diff --git a/arch/sparc/kernel/syscalls/syscall.tbl b/arch/sparc/kernel/syscalls/syscall.tbl index df59a9d5f109..8404c8e50394 100644 --- a/arch/sparc/kernel/syscalls/syscall.tbl +++ b/arch/sparc/kernel/syscalls/syscall.tbl @@ -500,3 +500,4 @@ 452 common fchmodat2 sys_fchmodat2 454 common futex_wake sys_futex_wake 455 common futex_wait sys_futex_wait +456 common futex_requeue sys_futex_requeue diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl index 0f6616822bd5..31c48bc2c3d8 100644 --- a/arch/x86/entry/syscalls/syscall_32.tbl +++ b/arch/x86/entry/syscalls/syscall_32.tbl @@ -459,3 +459,4 @@ 452 i386 fchmodat2 sys_fchmodat2 454 i386 futex_wake sys_futex_wake 455 i386 futex_wait sys_futex_wait +456 i386 futex_requeue sys_futex_requeue diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl index ddf6288823ad..a577bb27c16d 100644 --- a/arch/x86/entry/syscalls/syscall_64.tbl +++ b/arch/x86/entry/syscalls/syscall_64.tbl @@ -377,6 +377,7 @@ 453 64 map_shadow_stack sys_map_shadow_stack 454 common futex_wake sys_futex_wake 455 common futex_wait sys_futex_wait +456 common futex_requeue sys_futex_requeue # # Due to a historical design error, certain syscalls are numbered differently diff --git a/arch/xtensa/kernel/syscalls/syscall.tbl b/arch/xtensa/kernel/syscalls/syscall.tbl index ac278dbce2ee..dd71ecce8b86 100644 --- a/arch/xtensa/kernel/syscalls/syscall.tbl +++ b/arch/xtensa/kernel/syscalls/syscall.tbl @@ -425,3 +425,4 @@ 452 common fchmodat2 sys_fchmodat2 454 common futex_wake sys_futex_wake 455 common futex_wait sys_futex_wait +456 common futex_requeue sys_futex_requeue diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 11f3fdd1ee03..0901af60d971 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -556,6 +556,9 @@ asmlinkage long sys_futex_wait(void __user *uaddr, unsigned long val, unsigned l unsigned int flags, struct __kernel_timespec __user *timespec, clockid_t clockid); +asmlinkage long sys_futex_requeue(struct futex_waitv __user *waiters, + unsigned int flags, int nr_wake, int nr_requeue); + asmlinkage long sys_nanosleep(struct __kernel_timespec __user *rqtp, struct __kernel_timespec __user *rmtp); asmlinkage long sys_nanosleep_time32(struct old_timespec32 __user *rqtp, diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h index f6553bd5d213..d9e9cd13e577 100644 --- a/include/uapi/asm-generic/unistd.h +++ b/include/uapi/asm-generic/unistd.h @@ -826,9 +826,11 @@ __SYSCALL(__NR_fchmodat2, sys_fchmodat2) __SYSCALL(__NR_futex_wake, sys_futex_wake) #define __NR_futex_wait 455 __SYSCALL(__NR_futex_wait, sys_futex_wait) +#define __NR_futex_requeue 456 +__SYSCALL(__NR_futex_requeue, sys_futex_requeue) #undef __NR_syscalls -#define __NR_syscalls 456 +#define __NR_syscalls 457 /* * 32 bit systems traditionally used different diff --git a/kernel/futex/syscalls.c b/kernel/futex/syscalls.c index dde9b74db9af..8200d86d30e1 100644 --- a/kernel/futex/syscalls.c +++ b/kernel/futex/syscalls.c @@ -396,6 +396,44 @@ SYSCALL_DEFINE6(futex_wait, return ret; } +/* + * sys_futex_requeue - Requeue a waiter from one futex to another + * @waiters: array describing the source and destination futex + * @flags: unused + * @nr_wake: number of futexes to wake + * @nr_requeue: number of futexes to requeue + * + * Identical to the traditional FUTEX_CMP_REQUEUE op, except it is part of the + * futex2 family of calls. + */ + +SYSCALL_DEFINE4(futex_requeue, + struct futex_waitv __user *, waiters, + unsigned int, flags, + int, nr_wake, + int, nr_requeue) +{ + struct futex_vector futexes[2]; + u32 cmpval; + int ret; + + if (flags) + return -EINVAL; + + if (!waiters) + return -EINVAL; + + ret = futex_parse_waitv(futexes, waiters, 2); + if (ret) + return ret; + + cmpval = futexes[0].w.val; + + return futex_requeue(u64_to_user_ptr(futexes[0].w.uaddr), futexes[0].w.flags, + u64_to_user_ptr(futexes[1].w.uaddr), futexes[1].w.flags, + nr_wake, nr_requeue, &cmpval, 0); +} + #ifdef CONFIG_COMPAT COMPAT_SYSCALL_DEFINE2(set_robust_list, struct compat_robust_list_head __user *, head, diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c index 13df391194e2..9db51ea373b0 100644 --- a/kernel/sys_ni.c +++ b/kernel/sys_ni.c @@ -89,6 +89,7 @@ COND_SYSCALL_COMPAT(get_robust_list); COND_SYSCALL(futex_waitv); COND_SYSCALL(futex_wake); COND_SYSCALL(futex_wait); +COND_SYSCALL(futex_requeue); COND_SYSCALL(kexec_load); COND_SYSCALL_COMPAT(kexec_load); COND_SYSCALL(init_module); -- cgit v1.2.3 From f31ecf671ddc498f20219453395794ff2383e06b Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 10 Jul 2023 16:14:37 -0600 Subject: io_uring: add IORING_OP_WAITID support This adds support for an async version of waitid(2), in a fully async version. If an event isn't immediately available, wait for a callback to trigger a retry. The format of the sqe is as follows: sqe->len The 'which', the idtype being queried/waited for. sqe->fd The 'pid' (or id) being waited for. sqe->file_index The 'options' being set. sqe->addr2 A pointer to siginfo_t, if any, being filled in. buf_index, add3, and waitid_flags are reserved/unused for now. waitid_flags will be used for options for this request type. One interesting use case may be to add multi-shot support, so that the request stays armed and posts a notification every time a monitored process state change occurs. Note that this does not support rusage, on Arnd's recommendation. See the waitid(2) man page for details on the arguments. Signed-off-by: Jens Axboe --- include/linux/io_uring_types.h | 2 + include/uapi/linux/io_uring.h | 2 + io_uring/Makefile | 3 +- io_uring/cancel.c | 5 + io_uring/io_uring.c | 3 + io_uring/opdef.c | 9 + io_uring/waitid.c | 372 +++++++++++++++++++++++++++++++++++++++++ io_uring/waitid.h | 15 ++ 8 files changed, 410 insertions(+), 1 deletion(-) create mode 100644 io_uring/waitid.c create mode 100644 io_uring/waitid.h (limited to 'include/linux') diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index 13d19b9be9f4..fe1c5d4ec56c 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -313,6 +313,8 @@ struct io_ring_ctx { struct list_head cq_overflow_list; struct io_hash_table cancel_table; + struct hlist_head waitid_list; + const struct cred *sq_creds; /* cred used for __io_sq_thread() */ struct io_sq_data *sq_data; /* if using sq thread polling */ diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index d127948b0d8a..683ac2b74721 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -65,6 +65,7 @@ struct io_uring_sqe { __u32 xattr_flags; __u32 msg_ring_flags; __u32 uring_cmd_flags; + __u32 waitid_flags; }; __u64 user_data; /* data to be passed back at completion time */ /* pack this to avoid bogus arm OABI complaints */ @@ -241,6 +242,7 @@ enum io_uring_op { IORING_OP_SEND_ZC, IORING_OP_SENDMSG_ZC, IORING_OP_READ_MULTISHOT, + IORING_OP_WAITID, /* this goes last, obviously */ IORING_OP_LAST, diff --git a/io_uring/Makefile b/io_uring/Makefile index 8cc8e5387a75..7bd64e442567 100644 --- a/io_uring/Makefile +++ b/io_uring/Makefile @@ -7,5 +7,6 @@ obj-$(CONFIG_IO_URING) += io_uring.o xattr.o nop.o fs.o splice.o \ openclose.o uring_cmd.o epoll.o \ statx.o net.o msg_ring.o timeout.o \ sqpoll.o fdinfo.o tctx.o poll.o \ - cancel.o kbuf.o rsrc.o rw.o opdef.o notif.o + cancel.o kbuf.o rsrc.o rw.o opdef.o \ + notif.o waitid.o obj-$(CONFIG_IO_WQ) += io-wq.o diff --git a/io_uring/cancel.c b/io_uring/cancel.c index 7b23607cf4af..eb77a51c5a79 100644 --- a/io_uring/cancel.c +++ b/io_uring/cancel.c @@ -15,6 +15,7 @@ #include "tctx.h" #include "poll.h" #include "timeout.h" +#include "waitid.h" #include "cancel.h" struct io_cancel { @@ -119,6 +120,10 @@ int io_try_cancel(struct io_uring_task *tctx, struct io_cancel_data *cd, if (ret != -ENOENT) return ret; + ret = io_waitid_cancel(ctx, cd, issue_flags); + if (ret != -ENOENT) + return ret; + spin_lock(&ctx->completion_lock); if (!(cd->flags & IORING_ASYNC_CANCEL_FD)) ret = io_timeout_cancel(ctx, cd); diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 783ed0fff71b..2dff4772bf14 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -92,6 +92,7 @@ #include "cancel.h" #include "net.h" #include "notif.h" +#include "waitid.h" #include "timeout.h" #include "poll.h" @@ -348,6 +349,7 @@ static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p) INIT_LIST_HEAD(&ctx->tctx_list); ctx->submit_state.free_list.next = NULL; INIT_WQ_LIST(&ctx->locked_free_list); + INIT_HLIST_HEAD(&ctx->waitid_list); INIT_DELAYED_WORK(&ctx->fallback_work, io_fallback_req_func); INIT_WQ_LIST(&ctx->submit_state.compl_reqs); return ctx; @@ -3303,6 +3305,7 @@ static __cold bool io_uring_try_cancel_requests(struct io_ring_ctx *ctx, ret |= io_cancel_defer_files(ctx, task, cancel_all); mutex_lock(&ctx->uring_lock); ret |= io_poll_remove_all(ctx, task, cancel_all); + ret |= io_waitid_remove_all(ctx, task, cancel_all); mutex_unlock(&ctx->uring_lock); ret |= io_kill_timeouts(ctx, task, cancel_all); if (task) diff --git a/io_uring/opdef.c b/io_uring/opdef.c index a3fb1f9b3998..aadcbf7136b0 100644 --- a/io_uring/opdef.c +++ b/io_uring/opdef.c @@ -33,6 +33,7 @@ #include "poll.h" #include "cancel.h" #include "rw.h" +#include "waitid.h" static int io_no_issue(struct io_kiocb *req, unsigned int issue_flags) { @@ -439,6 +440,10 @@ const struct io_issue_def io_issue_defs[] = { .prep = io_read_mshot_prep, .issue = io_read_mshot, }, + [IORING_OP_WAITID] = { + .prep = io_waitid_prep, + .issue = io_waitid, + }, }; const struct io_cold_def io_cold_defs[] = { @@ -661,6 +666,10 @@ const struct io_cold_def io_cold_defs[] = { [IORING_OP_READ_MULTISHOT] = { .name = "READ_MULTISHOT", }, + [IORING_OP_WAITID] = { + .name = "WAITID", + .async_size = sizeof(struct io_waitid_async), + }, }; const char *io_uring_get_opcode(u8 opcode) diff --git a/io_uring/waitid.c b/io_uring/waitid.c new file mode 100644 index 000000000000..6f851978606d --- /dev/null +++ b/io_uring/waitid.c @@ -0,0 +1,372 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Support for async notification of waitid + */ +#include +#include +#include +#include +#include +#include + +#include + +#include "io_uring.h" +#include "cancel.h" +#include "waitid.h" +#include "../kernel/exit.h" + +static void io_waitid_cb(struct io_kiocb *req, struct io_tw_state *ts); + +#define IO_WAITID_CANCEL_FLAG BIT(31) +#define IO_WAITID_REF_MASK GENMASK(30, 0) + +struct io_waitid { + struct file *file; + int which; + pid_t upid; + int options; + atomic_t refs; + struct wait_queue_head *head; + struct siginfo __user *infop; + struct waitid_info info; +}; + +static void io_waitid_free(struct io_kiocb *req) +{ + struct io_waitid_async *iwa = req->async_data; + + put_pid(iwa->wo.wo_pid); + kfree(req->async_data); + req->async_data = NULL; + req->flags &= ~REQ_F_ASYNC_DATA; +} + +#ifdef CONFIG_COMPAT +static bool io_waitid_compat_copy_si(struct io_waitid *iw, int signo) +{ + struct compat_siginfo __user *infop; + bool ret; + + infop = (struct compat_siginfo __user *) iw->infop; + + if (!user_write_access_begin(infop, sizeof(*infop))) + return false; + + unsafe_put_user(signo, &infop->si_signo, Efault); + unsafe_put_user(0, &infop->si_errno, Efault); + unsafe_put_user(iw->info.cause, &infop->si_code, Efault); + unsafe_put_user(iw->info.pid, &infop->si_pid, Efault); + unsafe_put_user(iw->info.uid, &infop->si_uid, Efault); + unsafe_put_user(iw->info.status, &infop->si_status, Efault); + ret = true; +done: + user_write_access_end(); + return ret; +Efault: + ret = false; + goto done; +} +#endif + +static bool io_waitid_copy_si(struct io_kiocb *req, int signo) +{ + struct io_waitid *iw = io_kiocb_to_cmd(req, struct io_waitid); + bool ret; + + if (!iw->infop) + return true; + +#ifdef CONFIG_COMPAT + if (req->ctx->compat) + return io_waitid_compat_copy_si(iw, signo); +#endif + + if (!user_write_access_begin(iw->infop, sizeof(*iw->infop))) + return false; + + unsafe_put_user(signo, &iw->infop->si_signo, Efault); + unsafe_put_user(0, &iw->infop->si_errno, Efault); + unsafe_put_user(iw->info.cause, &iw->infop->si_code, Efault); + unsafe_put_user(iw->info.pid, &iw->infop->si_pid, Efault); + unsafe_put_user(iw->info.uid, &iw->infop->si_uid, Efault); + unsafe_put_user(iw->info.status, &iw->infop->si_status, Efault); + ret = true; +done: + user_write_access_end(); + return ret; +Efault: + ret = false; + goto done; +} + +static int io_waitid_finish(struct io_kiocb *req, int ret) +{ + int signo = 0; + + if (ret > 0) { + signo = SIGCHLD; + ret = 0; + } + + if (!io_waitid_copy_si(req, signo)) + ret = -EFAULT; + io_waitid_free(req); + return ret; +} + +static void io_waitid_complete(struct io_kiocb *req, int ret) +{ + struct io_waitid *iw = io_kiocb_to_cmd(req, struct io_waitid); + struct io_tw_state ts = { .locked = true }; + + /* anyone completing better be holding a reference */ + WARN_ON_ONCE(!(atomic_read(&iw->refs) & IO_WAITID_REF_MASK)); + + lockdep_assert_held(&req->ctx->uring_lock); + + /* + * Did cancel find it meanwhile? + */ + if (hlist_unhashed(&req->hash_node)) + return; + + hlist_del_init(&req->hash_node); + + ret = io_waitid_finish(req, ret); + if (ret < 0) + req_set_fail(req); + io_req_set_res(req, ret, 0); + io_req_task_complete(req, &ts); +} + +static bool __io_waitid_cancel(struct io_ring_ctx *ctx, struct io_kiocb *req) +{ + struct io_waitid *iw = io_kiocb_to_cmd(req, struct io_waitid); + struct io_waitid_async *iwa = req->async_data; + + /* + * Mark us canceled regardless of ownership. This will prevent a + * potential retry from a spurious wakeup. + */ + atomic_or(IO_WAITID_CANCEL_FLAG, &iw->refs); + + /* claim ownership */ + if (atomic_fetch_inc(&iw->refs) & IO_WAITID_REF_MASK) + return false; + + spin_lock_irq(&iw->head->lock); + list_del_init(&iwa->wo.child_wait.entry); + spin_unlock_irq(&iw->head->lock); + io_waitid_complete(req, -ECANCELED); + return true; +} + +int io_waitid_cancel(struct io_ring_ctx *ctx, struct io_cancel_data *cd, + unsigned int issue_flags) +{ + struct hlist_node *tmp; + struct io_kiocb *req; + int nr = 0; + + if (cd->flags & (IORING_ASYNC_CANCEL_FD|IORING_ASYNC_CANCEL_FD_FIXED)) + return -ENOENT; + + io_ring_submit_lock(ctx, issue_flags); + hlist_for_each_entry_safe(req, tmp, &ctx->waitid_list, hash_node) { + if (req->cqe.user_data != cd->data && + !(cd->flags & IORING_ASYNC_CANCEL_ANY)) + continue; + if (__io_waitid_cancel(ctx, req)) + nr++; + if (!(cd->flags & IORING_ASYNC_CANCEL_ALL)) + break; + } + io_ring_submit_unlock(ctx, issue_flags); + + if (nr) + return nr; + + return -ENOENT; +} + +bool io_waitid_remove_all(struct io_ring_ctx *ctx, struct task_struct *task, + bool cancel_all) +{ + struct hlist_node *tmp; + struct io_kiocb *req; + bool found = false; + + lockdep_assert_held(&ctx->uring_lock); + + hlist_for_each_entry_safe(req, tmp, &ctx->waitid_list, hash_node) { + if (!io_match_task_safe(req, task, cancel_all)) + continue; + __io_waitid_cancel(ctx, req); + found = true; + } + + return found; +} + +static inline bool io_waitid_drop_issue_ref(struct io_kiocb *req) +{ + struct io_waitid *iw = io_kiocb_to_cmd(req, struct io_waitid); + struct io_waitid_async *iwa = req->async_data; + + if (!atomic_sub_return(1, &iw->refs)) + return false; + + /* + * Wakeup triggered, racing with us. It was prevented from + * completing because of that, queue up the tw to do that. + */ + req->io_task_work.func = io_waitid_cb; + io_req_task_work_add(req); + remove_wait_queue(iw->head, &iwa->wo.child_wait); + return true; +} + +static void io_waitid_cb(struct io_kiocb *req, struct io_tw_state *ts) +{ + struct io_waitid_async *iwa = req->async_data; + struct io_ring_ctx *ctx = req->ctx; + int ret; + + io_tw_lock(ctx, ts); + + ret = __do_wait(&iwa->wo); + + /* + * If we get -ERESTARTSYS here, we need to re-arm and check again + * to ensure we get another callback. If the retry works, then we can + * just remove ourselves from the waitqueue again and finish the + * request. + */ + if (unlikely(ret == -ERESTARTSYS)) { + struct io_waitid *iw = io_kiocb_to_cmd(req, struct io_waitid); + + /* Don't retry if cancel found it meanwhile */ + ret = -ECANCELED; + if (!(atomic_read(&iw->refs) & IO_WAITID_CANCEL_FLAG)) { + iw->head = ¤t->signal->wait_chldexit; + add_wait_queue(iw->head, &iwa->wo.child_wait); + ret = __do_wait(&iwa->wo); + if (ret == -ERESTARTSYS) { + /* retry armed, drop our ref */ + io_waitid_drop_issue_ref(req); + return; + } + + remove_wait_queue(iw->head, &iwa->wo.child_wait); + } + } + + io_waitid_complete(req, ret); +} + +static int io_waitid_wait(struct wait_queue_entry *wait, unsigned mode, + int sync, void *key) +{ + struct wait_opts *wo = container_of(wait, struct wait_opts, child_wait); + struct io_waitid_async *iwa = container_of(wo, struct io_waitid_async, wo); + struct io_kiocb *req = iwa->req; + struct io_waitid *iw = io_kiocb_to_cmd(req, struct io_waitid); + struct task_struct *p = key; + + if (!pid_child_should_wake(wo, p)) + return 0; + + /* cancel is in progress */ + if (atomic_fetch_inc(&iw->refs) & IO_WAITID_REF_MASK) + return 1; + + req->io_task_work.func = io_waitid_cb; + io_req_task_work_add(req); + list_del_init(&wait->entry); + return 1; +} + +int io_waitid_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) +{ + struct io_waitid *iw = io_kiocb_to_cmd(req, struct io_waitid); + + if (sqe->addr || sqe->buf_index || sqe->addr3 || sqe->waitid_flags) + return -EINVAL; + + iw->which = READ_ONCE(sqe->len); + iw->upid = READ_ONCE(sqe->fd); + iw->options = READ_ONCE(sqe->file_index); + iw->infop = u64_to_user_ptr(READ_ONCE(sqe->addr2)); + return 0; +} + +int io_waitid(struct io_kiocb *req, unsigned int issue_flags) +{ + struct io_waitid *iw = io_kiocb_to_cmd(req, struct io_waitid); + struct io_ring_ctx *ctx = req->ctx; + struct io_waitid_async *iwa; + int ret; + + if (io_alloc_async_data(req)) + return -ENOMEM; + + iwa = req->async_data; + iwa->req = req; + + ret = kernel_waitid_prepare(&iwa->wo, iw->which, iw->upid, &iw->info, + iw->options, NULL); + if (ret) + goto done; + + /* + * Mark the request as busy upfront, in case we're racing with the + * wakeup. If we are, then we'll notice when we drop this initial + * reference again after arming. + */ + atomic_set(&iw->refs, 1); + + /* + * Cancel must hold the ctx lock, so there's no risk of cancelation + * finding us until a) we remain on the list, and b) the lock is + * dropped. We only need to worry about racing with the wakeup + * callback. + */ + io_ring_submit_lock(ctx, issue_flags); + hlist_add_head(&req->hash_node, &ctx->waitid_list); + + init_waitqueue_func_entry(&iwa->wo.child_wait, io_waitid_wait); + iwa->wo.child_wait.private = req->task; + iw->head = ¤t->signal->wait_chldexit; + add_wait_queue(iw->head, &iwa->wo.child_wait); + + ret = __do_wait(&iwa->wo); + if (ret == -ERESTARTSYS) { + /* + * Nobody else grabbed a reference, it'll complete when we get + * a waitqueue callback, or if someone cancels it. + */ + if (!io_waitid_drop_issue_ref(req)) { + io_ring_submit_unlock(ctx, issue_flags); + return IOU_ISSUE_SKIP_COMPLETE; + } + + /* + * Wakeup triggered, racing with us. It was prevented from + * completing because of that, queue up the tw to do that. + */ + io_ring_submit_unlock(ctx, issue_flags); + return IOU_ISSUE_SKIP_COMPLETE; + } + + hlist_del_init(&req->hash_node); + remove_wait_queue(iw->head, &iwa->wo.child_wait); + ret = io_waitid_finish(req, ret); + + io_ring_submit_unlock(ctx, issue_flags); +done: + if (ret < 0) + req_set_fail(req); + io_req_set_res(req, ret, 0); + return IOU_OK; +} diff --git a/io_uring/waitid.h b/io_uring/waitid.h new file mode 100644 index 000000000000..956a8adafe8c --- /dev/null +++ b/io_uring/waitid.h @@ -0,0 +1,15 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "../kernel/exit.h" + +struct io_waitid_async { + struct io_kiocb *req; + struct wait_opts wo; +}; + +int io_waitid_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe); +int io_waitid(struct io_kiocb *req, unsigned int issue_flags); +int io_waitid_cancel(struct io_ring_ctx *ctx, struct io_cancel_data *cd, + unsigned int issue_flags); +bool io_waitid_remove_all(struct io_ring_ctx *ctx, struct task_struct *task, + bool cancel_all); -- cgit v1.2.3 From 41e845628511878d6e89e2a9249c095e72aab7eb Mon Sep 17 00:00:00 2001 From: Mateusz Guzik Date: Sat, 9 Sep 2023 21:19:32 +0200 Subject: cred: add get_cred_many and put_cred_many Some of the frequent consumers of get_cred and put_cred operate on 2 references on the same creds back-to-back. Switch them to doing the work in one go instead. Signed-off-by: Mateusz Guzik [PM: removed changelog from commit description] Signed-off-by: Paul Moore --- include/linux/cred.h | 59 ++++++++++++++++++++++++++++++++++++++++++++-------- kernel/cred.c | 26 +++++++++++++---------- 2 files changed, 65 insertions(+), 20 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cred.h b/include/linux/cred.h index f923528d5cc4..56bc432fe49b 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -218,6 +218,20 @@ static inline bool cap_ambient_invariant_ok(const struct cred *cred) cred->cap_inheritable)); } +/** + * get_new_cred_many - Get references on a new set of credentials + * @cred: The new credentials to reference + * @nr: Number of references to acquire + * + * Get references on the specified set of new credentials. The caller must + * release all acquired references. + */ +static inline struct cred *get_new_cred_many(struct cred *cred, int nr) +{ + atomic_add(nr, &cred->usage); + return cred; +} + /** * get_new_cred - Get a reference on a new set of credentials * @cred: The new credentials to reference @@ -227,16 +241,16 @@ static inline bool cap_ambient_invariant_ok(const struct cred *cred) */ static inline struct cred *get_new_cred(struct cred *cred) { - atomic_inc(&cred->usage); - return cred; + return get_new_cred_many(cred, 1); } /** - * get_cred - Get a reference on a set of credentials + * get_cred_many - Get references on a set of credentials * @cred: The credentials to reference + * @nr: Number of references to acquire * - * Get a reference on the specified set of credentials. The caller must - * release the reference. If %NULL is passed, it is returned with no action. + * Get references on the specified set of credentials. The caller must release + * all acquired reference. If %NULL is passed, it is returned with no action. * * This is used to deal with a committed set of credentials. Although the * pointer is const, this will temporarily discard the const and increment the @@ -244,14 +258,28 @@ static inline struct cred *get_new_cred(struct cred *cred) * accidental alteration of a set of credentials that should be considered * immutable. */ -static inline const struct cred *get_cred(const struct cred *cred) +static inline const struct cred *get_cred_many(const struct cred *cred, int nr) { struct cred *nonconst_cred = (struct cred *) cred; if (!cred) return cred; validate_creds(cred); nonconst_cred->non_rcu = 0; - return get_new_cred(nonconst_cred); + return get_new_cred_many(nonconst_cred, nr); +} + +/* + * get_cred - Get a reference on a set of credentials + * @cred: The credentials to reference + * + * Get a reference on the specified set of credentials. The caller must + * release the reference. If %NULL is passed, it is returned with no action. + * + * This is used to deal with a committed set of credentials. + */ +static inline const struct cred *get_cred(const struct cred *cred) +{ + return get_cred_many(cred, 1); } static inline const struct cred *get_cred_rcu(const struct cred *cred) @@ -269,6 +297,7 @@ static inline const struct cred *get_cred_rcu(const struct cred *cred) /** * put_cred - Release a reference to a set of credentials * @cred: The credentials to release + * @nr: Number of references to release * * Release a reference to a set of credentials, deleting them when the last ref * is released. If %NULL is passed, nothing is done. @@ -277,17 +306,29 @@ static inline const struct cred *get_cred_rcu(const struct cred *cred) * on task_struct are attached by const pointers to prevent accidental * alteration of otherwise immutable credential sets. */ -static inline void put_cred(const struct cred *_cred) +static inline void put_cred_many(const struct cred *_cred, int nr) { struct cred *cred = (struct cred *) _cred; if (cred) { validate_creds(cred); - if (atomic_dec_and_test(&(cred)->usage)) + if (atomic_sub_and_test(nr, &cred->usage)) __put_cred(cred); } } +/* + * put_cred - Release a reference to a set of credentials + * @cred: The credentials to release + * + * Release a reference to a set of credentials, deleting them when the last ref + * is released. If %NULL is passed, nothing is done. + */ +static inline void put_cred(const struct cred *cred) +{ + put_cred_many(cred, 1); +} + /** * current_cred - Access the current task's subjective credentials * diff --git a/kernel/cred.c b/kernel/cred.c index 98cb4eca23fb..9398e534b997 100644 --- a/kernel/cred.c +++ b/kernel/cred.c @@ -162,23 +162,29 @@ EXPORT_SYMBOL(__put_cred); */ void exit_creds(struct task_struct *tsk) { - struct cred *cred; + struct cred *real_cred, *cred; kdebug("exit_creds(%u,%p,%p,{%d,%d})", tsk->pid, tsk->real_cred, tsk->cred, atomic_read(&tsk->cred->usage), read_cred_subscribers(tsk->cred)); - cred = (struct cred *) tsk->real_cred; + real_cred = (struct cred *) tsk->real_cred; tsk->real_cred = NULL; - validate_creds(cred); - alter_cred_subscribers(cred, -1); - put_cred(cred); cred = (struct cred *) tsk->cred; tsk->cred = NULL; + validate_creds(cred); - alter_cred_subscribers(cred, -1); - put_cred(cred); + if (real_cred == cred) { + alter_cred_subscribers(cred, -2); + put_cred_many(cred, 2); + } else { + validate_creds(real_cred); + alter_cred_subscribers(real_cred, -1); + put_cred(real_cred); + alter_cred_subscribers(cred, -1); + put_cred(cred); + } #ifdef CONFIG_KEYS_REQUEST_CACHE key_put(tsk->cached_requested_key); @@ -355,8 +361,7 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags) #endif clone_flags & CLONE_THREAD ) { - p->real_cred = get_cred(p->cred); - get_cred(p->cred); + p->real_cred = get_cred_many(p->cred, 2); alter_cred_subscribers(p->cred, 2); kdebug("share_creds(%p{%d,%d})", p->cred, atomic_read(&p->cred->usage), @@ -520,8 +525,7 @@ int commit_creds(struct cred *new) proc_id_connector(task, PROC_EVENT_GID); /* release the old obj and subj refs both */ - put_cred(old); - put_cred(old); + put_cred_many(old, 2); return 0; } EXPORT_SYMBOL(commit_creds); -- cgit v1.2.3 From 8bf0cdfac7f8aa3fa6151b5c5f5eebdb44a64e89 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 21 Sep 2023 11:32:58 +0200 Subject: : Introduce the list_for_each_reverse() method The list_head counterpart of list_for_each_entry_reverse() was missing, add it to complete the list handling APIs in . [ This new API is also relied on by a WIP scheduler patch, so this variant is not a theoretical possibility only. ] Signed-off-by: Ingo Molnar Cc: Linus Torvalds Cc: linux-kernel@vger.kernel.org --- include/linux/list.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/list.h b/include/linux/list.h index 164b4d0e9d2a..1837caedf723 100644 --- a/include/linux/list.h +++ b/include/linux/list.h @@ -686,6 +686,14 @@ static inline void list_splice_tail_init(struct list_head *list, #define list_for_each(pos, head) \ for (pos = (head)->next; !list_is_head(pos, (head)); pos = pos->next) +/** + * list_for_each_reverse - iterate backwards over a list + * @pos: the &struct list_head to use as a loop cursor. + * @head: the head for your list. + */ +#define list_for_each_reverse(pos, head) \ + for (pos = (head)->prev; pos != (head); pos = pos->prev) + /** * list_for_each_rcu - Iterate over a list in an RCU-safe fashion * @pos: the &struct list_head to use as a loop cursor. -- cgit v1.2.3 From b8ec60e1186cdcfce41e7db4c827cb107e459002 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Tue, 19 Sep 2023 17:17:28 -0700 Subject: x86/speculation, objtool: Use absolute relocations for annotations .discard.retpoline_safe sections do not have the SHF_ALLOC flag. These sections referencing text sections' STT_SECTION symbols with PC-relative relocations like R_386_PC32 [0] is conceptually not suitable. Newer LLD will report warnings for REL relocations even for relocatable links [1]: ld.lld: warning: vmlinux.a(drivers/i2c/busses/i2c-i801.o):(.discard.retpoline_safe+0x120): has non-ABS relocation R_386_PC32 against symbol '' Switch to absolute relocations instead, which indicate link-time addresses. In a relocatable link, these addresses are also output section offsets, used by checks in tools/objtool/check.c. When linking vmlinux, these .discard.* sections will be discarded, therefore it is not a problem that R_X86_64_32 cannot represent a kernel address. Alternatively, we could set the SHF_ALLOC flag for .discard.* sections, but I think non-SHF_ALLOC for sections to be discarded makes more sense. Note: if we decide to never support REL architectures (e.g. arm, i386), we can utilize R_*_NONE relocations (.reloc ., BFD_RELOC_NONE, sym), making .discard.* sections zero-sized. That said, the section content waste is 4 bytes per entry, much smaller than sizeof(Elf{32,64}_Rel). [0] commit 1c0c1faf5692 ("objtool: Use relative pointers for annotations") [1] https://github.com/ClangBuiltLinux/linux/issues/1937 Signed-off-by: Fangrui Song Signed-off-by: Ingo Molnar Acked-by: Peter Zijlstra (Intel) Cc: Josh Poimboeuf Link: https://lore.kernel.org/r/20230920001728.1439947-1-maskray@google.com --- arch/x86/include/asm/alternative.h | 4 ++-- arch/x86/include/asm/nospec-branch.h | 4 ++-- include/linux/objtool.h | 10 +++++----- 3 files changed, 9 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index 9c4da699e11a..65f79092c9d9 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h @@ -58,7 +58,7 @@ #define ANNOTATE_IGNORE_ALTERNATIVE \ "999:\n\t" \ ".pushsection .discard.ignore_alts\n\t" \ - ".long 999b - .\n\t" \ + ".long 999b\n\t" \ ".popsection\n\t" /* @@ -352,7 +352,7 @@ static inline int alternatives_text_reserved(void *start, void *end) .macro ANNOTATE_IGNORE_ALTERNATIVE .Lannotate_\@: .pushsection .discard.ignore_alts - .long .Lannotate_\@ - . + .long .Lannotate_\@ .popsection .endm diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index c55cc243592e..4952b73d944e 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -196,7 +196,7 @@ .macro ANNOTATE_RETPOLINE_SAFE .Lhere_\@: .pushsection .discard.retpoline_safe - .long .Lhere_\@ - . + .long .Lhere_\@ .popsection .endm @@ -334,7 +334,7 @@ #define ANNOTATE_RETPOLINE_SAFE \ "999:\n\t" \ ".pushsection .discard.retpoline_safe\n\t" \ - ".long 999b - .\n\t" \ + ".long 999b\n\t" \ ".popsection\n\t" typedef u8 retpoline_thunk_t[RETPOLINE_THUNK_SIZE]; diff --git a/include/linux/objtool.h b/include/linux/objtool.h index 03f82c2c2ebf..6f6da95fe7f9 100644 --- a/include/linux/objtool.h +++ b/include/linux/objtool.h @@ -48,13 +48,13 @@ #define ANNOTATE_NOENDBR \ "986: \n\t" \ ".pushsection .discard.noendbr\n\t" \ - ".long 986b - .\n\t" \ + ".long 986b\n\t" \ ".popsection\n\t" #define ASM_REACHABLE \ "998:\n\t" \ ".pushsection .discard.reachable\n\t" \ - ".long 998b - .\n\t" \ + ".long 998b\n\t" \ ".popsection\n\t" #else /* __ASSEMBLY__ */ @@ -66,7 +66,7 @@ #define ANNOTATE_INTRA_FUNCTION_CALL \ 999: \ .pushsection .discard.intra_function_calls; \ - .long 999b - .; \ + .long 999b; \ .popsection; /* @@ -118,7 +118,7 @@ .macro ANNOTATE_NOENDBR .Lhere_\@: .pushsection .discard.noendbr - .long .Lhere_\@ - . + .long .Lhere_\@ .popsection .endm @@ -141,7 +141,7 @@ .macro REACHABLE .Lhere_\@: .pushsection .discard.reachable - .long .Lhere_\@ - . + .long .Lhere_\@ .popsection .endm -- cgit v1.2.3 From f1a9be986cedbef839062428e8e9b2bcc9c58af5 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 15 Sep 2023 13:12:00 -0700 Subject: mtd: Annotate struct lpddr_private with __counted_by Prepare for the coming implementation by GCC and Clang of the __counted_by attribute. Flexible array members annotated with __counted_by can have their accesses bounds-checked at run-time checking via CONFIG_UBSAN_BOUNDS (for array indexing) and CONFIG_FORTIFY_SOURCE (for strcpy/memcpy-family functions). As found with Coccinelle[1], add __counted_by for struct lpddr_private. [1] https://github.com/kees/kernel-tools/blob/trunk/coccinelle/examples/counted_by.cocci Cc: Miquel Raynal Cc: Richard Weinberger Cc: Vignesh Raghavendra Cc: linux-mtd@lists.infradead.org Signed-off-by: Kees Cook Reviewed-by: Gustavo A. R. Silva Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20230915201159.never.112-kees@kernel.org --- include/linux/mtd/qinfo.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mtd/qinfo.h b/include/linux/mtd/qinfo.h index 2e3f43788d48..0421f12156b5 100644 --- a/include/linux/mtd/qinfo.h +++ b/include/linux/mtd/qinfo.h @@ -24,7 +24,7 @@ struct lpddr_private { struct qinfo_chip *qinfo; int numchips; unsigned long chipshift; - struct flchip chips[]; + struct flchip chips[] __counted_by(numchips); }; /* qinfo_query_info structure contains request information for -- cgit v1.2.3 From 1442d628d05c7aad86e5754fa554f32edc3e77a8 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 15 Sep 2023 13:12:06 -0700 Subject: mtd: cfi: Annotate struct cfi_private with __counted_by Prepare for the coming implementation by GCC and Clang of the __counted_by attribute. Flexible array members annotated with __counted_by can have their accesses bounds-checked at run-time checking via CONFIG_UBSAN_BOUNDS (for array indexing) and CONFIG_FORTIFY_SOURCE (for strcpy/memcpy-family functions). As found with Coccinelle[1], add __counted_by for struct cfi_private. [1] https://github.com/kees/kernel-tools/blob/trunk/coccinelle/examples/counted_by.cocci Cc: Miquel Raynal Cc: Richard Weinberger Cc: Vignesh Raghavendra Cc: linux-mtd@lists.infradead.org Signed-off-by: Kees Cook Reviewed-by: Gustavo A. R. Silva Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20230915201206.never.107-kees@kernel.org --- include/linux/mtd/cfi.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mtd/cfi.h b/include/linux/mtd/cfi.h index d88bb56c18e2..947410faf9e2 100644 --- a/include/linux/mtd/cfi.h +++ b/include/linux/mtd/cfi.h @@ -287,7 +287,7 @@ struct cfi_private { unsigned long chipshift; /* Because they're of the same type */ const char *im_name; /* inter_module name for cmdset_setup */ unsigned long quirks; - struct flchip chips[]; /* per-chip data structure for each chip */ + struct flchip chips[] __counted_by(numchips); /* per-chip data structure for each chip */ }; uint32_t cfi_build_cmd_addr(uint32_t cmd_ofs, -- cgit v1.2.3 From 48554df6bf2b1e83f70749bf4b4d7914f8b3c01d Mon Sep 17 00:00:00 2001 From: Chengming Zhou Date: Wed, 13 Sep 2023 15:16:13 +0000 Subject: blk-mq: remove RQF_MQ_INFLIGHT Since the previous patch change to only account active requests when we really allocate the driver tag, the RQF_MQ_INFLIGHT can be removed and no double account problem. 1. none elevator: flush request will use the first pending request's driver tag, won't double account. 2. other elevator: flush request will be accounted when allocate driver tag when issue, and will be unaccounted when it put the driver tag. Signed-off-by: Chengming Zhou Reviewed-by: Ming Lei Link: https://lore.kernel.org/r/20230913151616.3164338-3-chengming.zhou@linux.dev Signed-off-by: Jens Axboe --- block/blk-flush.c | 11 ++--------- block/blk-mq-debugfs.c | 1 - block/blk-mq.c | 4 ---- include/linux/blk-mq.h | 2 -- 4 files changed, 2 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/block/blk-flush.c b/block/blk-flush.c index e73dc22d05c1..3f4d41952ef2 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c @@ -323,16 +323,9 @@ static void blk_kick_flush(struct request_queue *q, struct blk_flush_queue *fq, flush_rq->mq_ctx = first_rq->mq_ctx; flush_rq->mq_hctx = first_rq->mq_hctx; - if (!q->elevator) { + if (!q->elevator) flush_rq->tag = first_rq->tag; - - /* - * We borrow data request's driver tag, so have to mark - * this flush request as INFLIGHT for avoiding double - * account of this driver tag - */ - flush_rq->rq_flags |= RQF_MQ_INFLIGHT; - } else + else flush_rq->internal_tag = first_rq->internal_tag; flush_rq->cmd_flags = REQ_OP_FLUSH | REQ_PREFLUSH; diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index c3b5930106b2..5cbeb9344f2f 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -246,7 +246,6 @@ static const char *const rqf_name[] = { RQF_NAME(STARTED), RQF_NAME(FLUSH_SEQ), RQF_NAME(MIXED_MERGE), - RQF_NAME(MQ_INFLIGHT), RQF_NAME(DONTPREP), RQF_NAME(SCHED_TAGS), RQF_NAME(USE_SCHED), diff --git a/block/blk-mq.c b/block/blk-mq.c index e776388decc3..c209a7dddee3 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1066,10 +1066,6 @@ static inline void blk_mq_flush_tag_batch(struct blk_mq_hw_ctx *hctx, { struct request_queue *q = hctx->queue; - /* - * All requests should have been marked as RQF_MQ_INFLIGHT, so - * update hctx->nr_active in batch - */ blk_mq_sub_active_requests(hctx, nr_tags); blk_mq_put_tags(hctx->tags, tag_array, nr_tags); diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 958ed7e89b30..1ab3081c82ed 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -32,8 +32,6 @@ typedef __u32 __bitwise req_flags_t; #define RQF_FLUSH_SEQ ((__force req_flags_t)(1 << 4)) /* merge of different types, fail separately */ #define RQF_MIXED_MERGE ((__force req_flags_t)(1 << 5)) -/* track inflight for MQ */ -#define RQF_MQ_INFLIGHT ((__force req_flags_t)(1 << 6)) /* don't call prep for this one */ #define RQF_DONTPREP ((__force req_flags_t)(1 << 7)) /* use hctx->sched_tags */ -- cgit v1.2.3 From 5f05285df691b1e82108eead7165feae238c95ef Mon Sep 17 00:00:00 2001 From: Basavaraj Natikar Date: Tue, 19 Sep 2023 13:40:48 +0530 Subject: iio: hid-sensor-als: Add light color temperature support In most cases, ambient color sensors also support light color temperature. As a result, add support of light color temperature. Signed-off-by: Basavaraj Natikar Acked-by: Srinivas Pandruvada Link: https://lore.kernel.org/r/20230919081054.2050714-4-Basavaraj.Natikar@amd.com Signed-off-by: Jonathan Cameron --- drivers/iio/light/hid-sensor-als.c | 37 +++++++++++++++++++++++++++++++++++-- include/linux/hid-sensor-ids.h | 1 + 2 files changed, 36 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/drivers/iio/light/hid-sensor-als.c b/drivers/iio/light/hid-sensor-als.c index efb1f8862b28..16a3f1941c27 100644 --- a/drivers/iio/light/hid-sensor-als.c +++ b/drivers/iio/light/hid-sensor-als.c @@ -14,8 +14,9 @@ #include "../common/hid-sensors/hid-sensor-trigger.h" enum { - CHANNEL_SCAN_INDEX_INTENSITY = 0, - CHANNEL_SCAN_INDEX_ILLUM = 1, + CHANNEL_SCAN_INDEX_INTENSITY, + CHANNEL_SCAN_INDEX_ILLUM, + CHANNEL_SCAN_INDEX_COLOR_TEMP, CHANNEL_SCAN_INDEX_MAX }; @@ -65,6 +66,16 @@ static const struct iio_chan_spec als_channels[] = { BIT(IIO_CHAN_INFO_HYSTERESIS_RELATIVE), .scan_index = CHANNEL_SCAN_INDEX_ILLUM, }, + { + .type = IIO_COLORTEMP, + .info_mask_separate = BIT(IIO_CHAN_INFO_RAW), + .info_mask_shared_by_type = BIT(IIO_CHAN_INFO_OFFSET) | + BIT(IIO_CHAN_INFO_SCALE) | + BIT(IIO_CHAN_INFO_SAMP_FREQ) | + BIT(IIO_CHAN_INFO_HYSTERESIS) | + BIT(IIO_CHAN_INFO_HYSTERESIS_RELATIVE), + .scan_index = CHANNEL_SCAN_INDEX_COLOR_TEMP, + }, IIO_CHAN_SOFT_TIMESTAMP(CHANNEL_SCAN_INDEX_TIMESTAMP) }; @@ -103,6 +114,11 @@ static int als_read_raw(struct iio_dev *indio_dev, min = als_state->als[chan->scan_index].logical_minimum; address = HID_USAGE_SENSOR_LIGHT_ILLUM; break; + case CHANNEL_SCAN_INDEX_COLOR_TEMP: + report_id = als_state->als[chan->scan_index].report_id; + min = als_state->als[chan->scan_index].logical_minimum; + address = HID_USAGE_SENSOR_LIGHT_COLOR_TEMPERATURE; + break; default: report_id = -1; break; @@ -223,6 +239,10 @@ static int als_capture_sample(struct hid_sensor_hub_device *hsdev, als_state->scan.illum[CHANNEL_SCAN_INDEX_ILLUM] = sample_data; ret = 0; break; + case HID_USAGE_SENSOR_LIGHT_COLOR_TEMPERATURE: + als_state->scan.illum[CHANNEL_SCAN_INDEX_COLOR_TEMP] = sample_data; + ret = 0; + break; case HID_USAGE_SENSOR_TIME_TIMESTAMP: als_state->timestamp = hid_sensor_convert_timestamp(&als_state->common_attributes, *(s64 *)raw_data); @@ -258,6 +278,19 @@ static int als_parse_report(struct platform_device *pdev, st->als[i].report_id); } + ret = sensor_hub_input_get_attribute_info(hsdev, HID_INPUT_REPORT, + usage_id, + HID_USAGE_SENSOR_LIGHT_COLOR_TEMPERATURE, + &st->als[CHANNEL_SCAN_INDEX_COLOR_TEMP]); + if (ret < 0) + return ret; + als_adjust_channel_bit_mask(channels, CHANNEL_SCAN_INDEX_COLOR_TEMP, + st->als[CHANNEL_SCAN_INDEX_COLOR_TEMP].size); + + dev_dbg(&pdev->dev, "als %x:%x\n", + st->als[CHANNEL_SCAN_INDEX_COLOR_TEMP].index, + st->als[CHANNEL_SCAN_INDEX_COLOR_TEMP].report_id); + st->scale_precision = hid_sensor_format_scale(usage_id, &st->als[CHANNEL_SCAN_INDEX_INTENSITY], &st->scale_pre_decml, &st->scale_post_decml); diff --git a/include/linux/hid-sensor-ids.h b/include/linux/hid-sensor-ids.h index 13b1e65fbdcc..8af4fb3e0254 100644 --- a/include/linux/hid-sensor-ids.h +++ b/include/linux/hid-sensor-ids.h @@ -21,6 +21,7 @@ #define HID_USAGE_SENSOR_ALS 0x200041 #define HID_USAGE_SENSOR_DATA_LIGHT 0x2004d0 #define HID_USAGE_SENSOR_LIGHT_ILLUM 0x2004d1 +#define HID_USAGE_SENSOR_LIGHT_COLOR_TEMPERATURE 0x2004d2 /* PROX (200011) */ #define HID_USAGE_SENSOR_PROX 0x200011 -- cgit v1.2.3 From ee3710f39f9d0ae5137a866138d005fe1ad18132 Mon Sep 17 00:00:00 2001 From: Basavaraj Natikar Date: Tue, 19 Sep 2023 13:40:52 +0530 Subject: iio: hid-sensor-als: Add light chromaticity support In most cases, ambient color sensors also support the x and y light colors, which represent the coordinates on the CIE 1931 chromaticity diagram. Thus, add light chromaticity x and y. Signed-off-by: Basavaraj Natikar Acked-by: Srinivas Pandruvada Link: https://lore.kernel.org/r/20230919081054.2050714-8-Basavaraj.Natikar@amd.com Signed-off-by: Jonathan Cameron --- drivers/iio/light/hid-sensor-als.c | 63 ++++++++++++++++++++++++++++++++++++++ include/linux/hid-sensor-ids.h | 3 ++ 2 files changed, 66 insertions(+) (limited to 'include/linux') diff --git a/drivers/iio/light/hid-sensor-als.c b/drivers/iio/light/hid-sensor-als.c index 16a3f1941c27..c9d114ff080a 100644 --- a/drivers/iio/light/hid-sensor-als.c +++ b/drivers/iio/light/hid-sensor-als.c @@ -17,6 +17,8 @@ enum { CHANNEL_SCAN_INDEX_INTENSITY, CHANNEL_SCAN_INDEX_ILLUM, CHANNEL_SCAN_INDEX_COLOR_TEMP, + CHANNEL_SCAN_INDEX_CHROMATICITY_X, + CHANNEL_SCAN_INDEX_CHROMATICITY_Y, CHANNEL_SCAN_INDEX_MAX }; @@ -76,6 +78,30 @@ static const struct iio_chan_spec als_channels[] = { BIT(IIO_CHAN_INFO_HYSTERESIS_RELATIVE), .scan_index = CHANNEL_SCAN_INDEX_COLOR_TEMP, }, + { + .type = IIO_CHROMATICITY, + .modified = 1, + .channel2 = IIO_MOD_X, + .info_mask_separate = BIT(IIO_CHAN_INFO_RAW), + .info_mask_shared_by_type = BIT(IIO_CHAN_INFO_OFFSET) | + BIT(IIO_CHAN_INFO_SCALE) | + BIT(IIO_CHAN_INFO_SAMP_FREQ) | + BIT(IIO_CHAN_INFO_HYSTERESIS) | + BIT(IIO_CHAN_INFO_HYSTERESIS_RELATIVE), + .scan_index = CHANNEL_SCAN_INDEX_CHROMATICITY_X, + }, + { + .type = IIO_CHROMATICITY, + .modified = 1, + .channel2 = IIO_MOD_Y, + .info_mask_separate = BIT(IIO_CHAN_INFO_RAW), + .info_mask_shared_by_type = BIT(IIO_CHAN_INFO_OFFSET) | + BIT(IIO_CHAN_INFO_SCALE) | + BIT(IIO_CHAN_INFO_SAMP_FREQ) | + BIT(IIO_CHAN_INFO_HYSTERESIS) | + BIT(IIO_CHAN_INFO_HYSTERESIS_RELATIVE), + .scan_index = CHANNEL_SCAN_INDEX_CHROMATICITY_Y, + }, IIO_CHAN_SOFT_TIMESTAMP(CHANNEL_SCAN_INDEX_TIMESTAMP) }; @@ -119,6 +145,16 @@ static int als_read_raw(struct iio_dev *indio_dev, min = als_state->als[chan->scan_index].logical_minimum; address = HID_USAGE_SENSOR_LIGHT_COLOR_TEMPERATURE; break; + case CHANNEL_SCAN_INDEX_CHROMATICITY_X: + report_id = als_state->als[chan->scan_index].report_id; + min = als_state->als[chan->scan_index].logical_minimum; + address = HID_USAGE_SENSOR_LIGHT_CHROMATICITY_X; + break; + case CHANNEL_SCAN_INDEX_CHROMATICITY_Y: + report_id = als_state->als[chan->scan_index].report_id; + min = als_state->als[chan->scan_index].logical_minimum; + address = HID_USAGE_SENSOR_LIGHT_CHROMATICITY_Y; + break; default: report_id = -1; break; @@ -243,6 +279,14 @@ static int als_capture_sample(struct hid_sensor_hub_device *hsdev, als_state->scan.illum[CHANNEL_SCAN_INDEX_COLOR_TEMP] = sample_data; ret = 0; break; + case HID_USAGE_SENSOR_LIGHT_CHROMATICITY_X: + als_state->scan.illum[CHANNEL_SCAN_INDEX_CHROMATICITY_X] = sample_data; + ret = 0; + break; + case HID_USAGE_SENSOR_LIGHT_CHROMATICITY_Y: + als_state->scan.illum[CHANNEL_SCAN_INDEX_CHROMATICITY_Y] = sample_data; + ret = 0; + break; case HID_USAGE_SENSOR_TIME_TIMESTAMP: als_state->timestamp = hid_sensor_convert_timestamp(&als_state->common_attributes, *(s64 *)raw_data); @@ -291,6 +335,25 @@ static int als_parse_report(struct platform_device *pdev, st->als[CHANNEL_SCAN_INDEX_COLOR_TEMP].index, st->als[CHANNEL_SCAN_INDEX_COLOR_TEMP].report_id); + for (i = 0; i < 2; i++) { + int next_scan_index = CHANNEL_SCAN_INDEX_CHROMATICITY_X + i; + + ret = sensor_hub_input_get_attribute_info(hsdev, + HID_INPUT_REPORT, usage_id, + HID_USAGE_SENSOR_LIGHT_CHROMATICITY_X + i, + &st->als[next_scan_index]); + if (ret < 0) + return ret; + + als_adjust_channel_bit_mask(channels, + CHANNEL_SCAN_INDEX_CHROMATICITY_X + i, + st->als[next_scan_index].size); + + dev_dbg(&pdev->dev, "als %x:%x\n", + st->als[next_scan_index].index, + st->als[next_scan_index].report_id); + } + st->scale_precision = hid_sensor_format_scale(usage_id, &st->als[CHANNEL_SCAN_INDEX_INTENSITY], &st->scale_pre_decml, &st->scale_post_decml); diff --git a/include/linux/hid-sensor-ids.h b/include/linux/hid-sensor-ids.h index 8af4fb3e0254..6730ee900ee1 100644 --- a/include/linux/hid-sensor-ids.h +++ b/include/linux/hid-sensor-ids.h @@ -22,6 +22,9 @@ #define HID_USAGE_SENSOR_DATA_LIGHT 0x2004d0 #define HID_USAGE_SENSOR_LIGHT_ILLUM 0x2004d1 #define HID_USAGE_SENSOR_LIGHT_COLOR_TEMPERATURE 0x2004d2 +#define HID_USAGE_SENSOR_LIGHT_CHROMATICITY 0x2004d3 +#define HID_USAGE_SENSOR_LIGHT_CHROMATICITY_X 0x2004d4 +#define HID_USAGE_SENSOR_LIGHT_CHROMATICITY_Y 0x2004d5 /* PROX (200011) */ #define HID_USAGE_SENSOR_PROX 0x200011 -- cgit v1.2.3 From a741deac787f0d2d7068638c067db20af9e63752 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 26 Jul 2023 13:57:03 -0700 Subject: torture: Make torture_hrtimeout_ns() take an hrtimer mode parameter The current torture-test sleeps are waiting for a duration, but there are situations where it is better to wait for an absolute time, for example, when ending a stutter interval. This commit therefore adds an hrtimer mode parameter to torture_hrtimeout_ns(). Why not also the other torture_hrtimeout_*() functions? The theory is that most absolute times will be in nanoseconds, especially not (say) jiffies. Signed-off-by: Paul E. McKenney Signed-off-by: Frederic Weisbecker --- include/linux/torture.h | 3 ++- kernel/torture.c | 13 +++++++------ 2 files changed, 9 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/torture.h b/include/linux/torture.h index bb466eec01e4..017f0f710815 100644 --- a/include/linux/torture.h +++ b/include/linux/torture.h @@ -81,7 +81,8 @@ static inline void torture_random_init(struct torture_random_state *trsp) } /* Definitions for high-resolution-timer sleeps. */ -int torture_hrtimeout_ns(ktime_t baset_ns, u32 fuzzt_ns, struct torture_random_state *trsp); +int torture_hrtimeout_ns(ktime_t baset_ns, u32 fuzzt_ns, const enum hrtimer_mode mode, + struct torture_random_state *trsp); int torture_hrtimeout_us(u32 baset_us, u32 fuzzt_ns, struct torture_random_state *trsp); int torture_hrtimeout_ms(u32 baset_ms, u32 fuzzt_us, struct torture_random_state *trsp); int torture_hrtimeout_jiffies(u32 baset_j, struct torture_random_state *trsp); diff --git a/kernel/torture.c b/kernel/torture.c index 68dba4ecab5c..6ba62e5993e7 100644 --- a/kernel/torture.c +++ b/kernel/torture.c @@ -87,14 +87,15 @@ EXPORT_SYMBOL_GPL(verbose_torout_sleep); * nanosecond random fuzz. This function and its friends desynchronize * testing from the timer wheel. */ -int torture_hrtimeout_ns(ktime_t baset_ns, u32 fuzzt_ns, struct torture_random_state *trsp) +int torture_hrtimeout_ns(ktime_t baset_ns, u32 fuzzt_ns, const enum hrtimer_mode mode, + struct torture_random_state *trsp) { ktime_t hto = baset_ns; if (trsp) hto += torture_random(trsp) % fuzzt_ns; set_current_state(TASK_IDLE); - return schedule_hrtimeout(&hto, HRTIMER_MODE_REL); + return schedule_hrtimeout(&hto, mode); } EXPORT_SYMBOL_GPL(torture_hrtimeout_ns); @@ -106,7 +107,7 @@ int torture_hrtimeout_us(u32 baset_us, u32 fuzzt_ns, struct torture_random_state { ktime_t baset_ns = baset_us * NSEC_PER_USEC; - return torture_hrtimeout_ns(baset_ns, fuzzt_ns, trsp); + return torture_hrtimeout_ns(baset_ns, fuzzt_ns, HRTIMER_MODE_REL, trsp); } EXPORT_SYMBOL_GPL(torture_hrtimeout_us); @@ -123,7 +124,7 @@ int torture_hrtimeout_ms(u32 baset_ms, u32 fuzzt_us, struct torture_random_state fuzzt_ns = (u32)~0U; else fuzzt_ns = fuzzt_us * NSEC_PER_USEC; - return torture_hrtimeout_ns(baset_ns, fuzzt_ns, trsp); + return torture_hrtimeout_ns(baset_ns, fuzzt_ns, HRTIMER_MODE_REL, trsp); } EXPORT_SYMBOL_GPL(torture_hrtimeout_ms); @@ -136,7 +137,7 @@ int torture_hrtimeout_jiffies(u32 baset_j, struct torture_random_state *trsp) { ktime_t baset_ns = jiffies_to_nsecs(baset_j); - return torture_hrtimeout_ns(baset_ns, jiffies_to_nsecs(1), trsp); + return torture_hrtimeout_ns(baset_ns, jiffies_to_nsecs(1), HRTIMER_MODE_REL, trsp); } EXPORT_SYMBOL_GPL(torture_hrtimeout_jiffies); @@ -153,7 +154,7 @@ int torture_hrtimeout_s(u32 baset_s, u32 fuzzt_ms, struct torture_random_state * fuzzt_ns = (u32)~0U; else fuzzt_ns = fuzzt_ms * NSEC_PER_MSEC; - return torture_hrtimeout_ns(baset_ns, fuzzt_ns, trsp); + return torture_hrtimeout_ns(baset_ns, fuzzt_ns, HRTIMER_MODE_REL, trsp); } EXPORT_SYMBOL_GPL(torture_hrtimeout_s); -- cgit v1.2.3 From 0cfecd7d754f2ef5d7e6b56ee656a8544ade920a Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 31 Jul 2023 13:10:34 -0700 Subject: torture: Move rcutorture_sched_setaffinity() out of rcutorture The rcutorture_sched_setaffinity() function is needed by locktorture, so move its declaration from rcu.h to torture.h and rename it to the more generic torture_sched_setaffinity() name. Please note that use of this function is still restricted to torture tests, and of those, currently only rcutorture and locktorture. Signed-off-by: Paul E. McKenney Signed-off-by: Frederic Weisbecker --- include/linux/torture.h | 5 +++++ kernel/rcu/rcu.h | 4 ---- kernel/rcu/rcutorture.c | 2 +- kernel/rcu/update.c | 8 ++++---- 4 files changed, 10 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/torture.h b/include/linux/torture.h index 017f0f710815..c98d0c83d117 100644 --- a/include/linux/torture.h +++ b/include/linux/torture.h @@ -121,10 +121,15 @@ void _torture_stop_kthread(char *m, struct task_struct **tp); #define torture_stop_kthread(n, tp) \ _torture_stop_kthread("Stopping " #n " task", &(tp)) +/* Scheduler-related definitions. */ #ifdef CONFIG_PREEMPTION #define torture_preempt_schedule() __preempt_schedule() #else #define torture_preempt_schedule() do { } while (0) #endif +#if IS_ENABLED(CONFIG_RCU_TORTURE_TEST) || IS_MODULE(CONFIG_RCU_TORTURE_TEST) || IS_ENABLED(CONFIG_LOCK_TORTURE_TEST) || IS_MODULE(CONFIG_LOCK_TORTURE_TEST) +long torture_sched_setaffinity(pid_t pid, const struct cpumask *in_mask); +#endif + #endif /* __LINUX_TORTURE_H */ diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h index 98e13be411af..567bd3d72e39 100644 --- a/kernel/rcu/rcu.h +++ b/kernel/rcu/rcu.h @@ -568,10 +568,6 @@ void do_trace_rcu_torture_read(const char *rcutorturename, static inline void rcu_gp_set_torture_wait(int duration) { } #endif -#if IS_ENABLED(CONFIG_RCU_TORTURE_TEST) || IS_MODULE(CONFIG_RCU_TORTURE_TEST) -long rcutorture_sched_setaffinity(pid_t pid, const struct cpumask *in_mask); -#endif - #ifdef CONFIG_TINY_SRCU static inline void srcutorture_get_gp_data(enum rcutorture_type test_type, diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index ade42d6a9d9b..7e82fb887d09 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -810,7 +810,7 @@ static void synchronize_rcu_trivial(void) int cpu; for_each_online_cpu(cpu) { - rcutorture_sched_setaffinity(current->pid, cpumask_of(cpu)); + torture_sched_setaffinity(current->pid, cpumask_of(cpu)); WARN_ON_ONCE(raw_smp_processor_id() != cpu); } } diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c index 9d3c2e6ba667..c534d6806d3d 100644 --- a/kernel/rcu/update.c +++ b/kernel/rcu/update.c @@ -525,17 +525,17 @@ EXPORT_SYMBOL_GPL(do_trace_rcu_torture_read); do { } while (0) #endif -#if IS_ENABLED(CONFIG_RCU_TORTURE_TEST) || IS_MODULE(CONFIG_RCU_TORTURE_TEST) +#if IS_ENABLED(CONFIG_RCU_TORTURE_TEST) || IS_MODULE(CONFIG_RCU_TORTURE_TEST) || IS_ENABLED(CONFIG_LOCK_TORTURE_TEST) || IS_MODULE(CONFIG_LOCK_TORTURE_TEST) /* Get rcutorture access to sched_setaffinity(). */ -long rcutorture_sched_setaffinity(pid_t pid, const struct cpumask *in_mask) +long torture_sched_setaffinity(pid_t pid, const struct cpumask *in_mask) { int ret; ret = sched_setaffinity(pid, in_mask); - WARN_ONCE(ret, "%s: sched_setaffinity() returned %d\n", __func__, ret); + WARN_ONCE(ret, "%s: sched_setaffinity(%d) returned %d\n", __func__, pid, ret); return ret; } -EXPORT_SYMBOL_GPL(rcutorture_sched_setaffinity); +EXPORT_SYMBOL_GPL(torture_sched_setaffinity); #endif #ifdef CONFIG_RCU_STALL_COMMON -- cgit v1.2.3 From 5970fbad1036d1015abe45651628b39b5bcb8a22 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sun, 24 Sep 2023 22:54:47 -0700 Subject: fscrypt: make it clearer that key_prefix is deprecated fscrypt_operations::key_prefix should not be set by any filesystems that aren't setting it already. This is already documented, but apparently it's not sufficiently clear, as both ceph and btrfs have tried to set it. Rename the field to legacy_key_prefix and improve the documentation to hopefully make it clearer. Link: https://lore.kernel.org/r/20230925055451.59499-2-ebiggers@kernel.org Signed-off-by: Eric Biggers --- fs/crypto/keysetup_v1.c | 5 +++-- fs/ext4/crypto.c | 2 +- fs/f2fs/super.c | 2 +- fs/ubifs/crypto.c | 2 +- include/linux/fscrypt.h | 14 +++++++++----- 5 files changed, 15 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/fs/crypto/keysetup_v1.c b/fs/crypto/keysetup_v1.c index 75dabd9b27f9..86b48a2b47d1 100644 --- a/fs/crypto/keysetup_v1.c +++ b/fs/crypto/keysetup_v1.c @@ -299,6 +299,7 @@ int fscrypt_setup_v1_file_key(struct fscrypt_info *ci, const u8 *raw_master_key) int fscrypt_setup_v1_file_key_via_subscribed_keyrings(struct fscrypt_info *ci) { + const struct super_block *sb = ci->ci_inode->i_sb; struct key *key; const struct fscrypt_key *payload; int err; @@ -306,8 +307,8 @@ int fscrypt_setup_v1_file_key_via_subscribed_keyrings(struct fscrypt_info *ci) key = find_and_lock_process_key(FSCRYPT_KEY_DESC_PREFIX, ci->ci_policy.v1.master_key_descriptor, ci->ci_mode->keysize, &payload); - if (key == ERR_PTR(-ENOKEY) && ci->ci_inode->i_sb->s_cop->key_prefix) { - key = find_and_lock_process_key(ci->ci_inode->i_sb->s_cop->key_prefix, + if (key == ERR_PTR(-ENOKEY) && sb->s_cop->legacy_key_prefix) { + key = find_and_lock_process_key(sb->s_cop->legacy_key_prefix, ci->ci_policy.v1.master_key_descriptor, ci->ci_mode->keysize, &payload); } diff --git a/fs/ext4/crypto.c b/fs/ext4/crypto.c index 453d4da5de52..99a4769a53f6 100644 --- a/fs/ext4/crypto.c +++ b/fs/ext4/crypto.c @@ -240,7 +240,7 @@ static void ext4_get_ino_and_lblk_bits(struct super_block *sb, } const struct fscrypt_operations ext4_cryptops = { - .key_prefix = "ext4:", + .legacy_key_prefix = "ext4:", .get_context = ext4_get_context, .set_context = ext4_set_context, .get_dummy_policy = ext4_get_dummy_policy, diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index a8c8232852bb..f60062b558fd 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -3231,7 +3231,7 @@ static struct block_device **f2fs_get_devices(struct super_block *sb, } static const struct fscrypt_operations f2fs_cryptops = { - .key_prefix = "f2fs:", + .legacy_key_prefix = "f2fs:", .get_context = f2fs_get_context, .set_context = f2fs_set_context, .get_dummy_policy = f2fs_get_dummy_policy, diff --git a/fs/ubifs/crypto.c b/fs/ubifs/crypto.c index 3125e76376ee..1be3e11da3b3 100644 --- a/fs/ubifs/crypto.c +++ b/fs/ubifs/crypto.c @@ -89,7 +89,7 @@ int ubifs_decrypt(const struct inode *inode, struct ubifs_data_node *dn, const struct fscrypt_operations ubifs_crypt_operations = { .flags = FS_CFLG_OWN_PAGES, - .key_prefix = "ubifs:", + .legacy_key_prefix = "ubifs:", .get_context = ubifs_crypt_get_context, .set_context = ubifs_crypt_set_context, .empty_dir = ubifs_crypt_empty_dir, diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h index c895b12737a1..b0037566ce30 100644 --- a/include/linux/fscrypt.h +++ b/include/linux/fscrypt.h @@ -73,12 +73,16 @@ struct fscrypt_operations { unsigned int flags; /* - * If set, this is a filesystem-specific key description prefix that - * will be accepted for "logon" keys for v1 fscrypt policies, in - * addition to the generic prefix "fscrypt:". This functionality is - * deprecated, so new filesystems shouldn't set this field. + * This field exists only for backwards compatibility reasons and should + * only be set by the filesystems that are setting it already. It + * contains the filesystem-specific key description prefix that is + * accepted for "logon" keys for v1 fscrypt policies. This + * functionality is deprecated in favor of the generic prefix + * "fscrypt:", which itself is deprecated in favor of the filesystem + * keyring ioctls such as FS_IOC_ADD_ENCRYPTION_KEY. Filesystems that + * are newly adding fscrypt support should not set this field. */ - const char *key_prefix; + const char *legacy_key_prefix; /* * Get the fscrypt context of the given inode. -- cgit v1.2.3 From 40e13e18168fd0f1a6ad10166f5042a21c47ab99 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sun, 24 Sep 2023 22:54:48 -0700 Subject: fscrypt: make the bounce page pool opt-in instead of opt-out Replace FS_CFLG_OWN_PAGES with a bit flag 'needs_bounce_pages' which has the opposite meaning. I.e., filesystems now opt into the bounce page pool instead of opt out. Make fscrypt_alloc_bounce_page() check that the bounce page pool has been initialized. I believe the opt-in makes more sense, since nothing else in fscrypt_operations is opt-out, and these days filesystems can choose to use blk-crypto which doesn't need the fscrypt bounce page pool. Also, I happen to be planning to add two more flags, and I wanted to fix the "FS_CFLG_" name anyway as it wasn't prefixed with "FSCRYPT_". Link: https://lore.kernel.org/r/20230925055451.59499-3-ebiggers@kernel.org Signed-off-by: Eric Biggers --- fs/ceph/crypto.c | 1 + fs/crypto/crypto.c | 9 ++++++++- fs/ext4/crypto.c | 1 + fs/f2fs/super.c | 1 + fs/ubifs/crypto.c | 1 - include/linux/fscrypt.h | 20 +++++++++++--------- 6 files changed, 22 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/fs/ceph/crypto.c b/fs/ceph/crypto.c index e4d5cd56a80b..cc63f1e6fdef 100644 --- a/fs/ceph/crypto.c +++ b/fs/ceph/crypto.c @@ -133,6 +133,7 @@ static const union fscrypt_policy *ceph_get_dummy_policy(struct super_block *sb) } static struct fscrypt_operations ceph_fscrypt_ops = { + .needs_bounce_pages = 1, .get_context = ceph_crypt_get_context, .set_context = ceph_crypt_set_context, .get_dummy_policy = ceph_get_dummy_policy, diff --git a/fs/crypto/crypto.c b/fs/crypto/crypto.c index 6a837e4b80dc..aed0c5ea7578 100644 --- a/fs/crypto/crypto.c +++ b/fs/crypto/crypto.c @@ -49,6 +49,13 @@ EXPORT_SYMBOL(fscrypt_enqueue_decrypt_work); struct page *fscrypt_alloc_bounce_page(gfp_t gfp_flags) { + if (WARN_ON_ONCE(!fscrypt_bounce_page_pool)) { + /* + * Oops, the filesystem called a function that uses the bounce + * page pool, but it didn't set needs_bounce_pages. + */ + return NULL; + } return mempool_alloc(fscrypt_bounce_page_pool, gfp_flags); } @@ -325,7 +332,7 @@ int fscrypt_initialize(struct super_block *sb) return 0; /* No need to allocate a bounce page pool if this FS won't use it. */ - if (sb->s_cop->flags & FS_CFLG_OWN_PAGES) + if (!sb->s_cop->needs_bounce_pages) return 0; mutex_lock(&fscrypt_init_mutex); diff --git a/fs/ext4/crypto.c b/fs/ext4/crypto.c index 99a4769a53f6..5cd7bcfae46b 100644 --- a/fs/ext4/crypto.c +++ b/fs/ext4/crypto.c @@ -240,6 +240,7 @@ static void ext4_get_ino_and_lblk_bits(struct super_block *sb, } const struct fscrypt_operations ext4_cryptops = { + .needs_bounce_pages = 1, .legacy_key_prefix = "ext4:", .get_context = ext4_get_context, .set_context = ext4_set_context, diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index f60062b558fd..55aa0ed531f2 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -3231,6 +3231,7 @@ static struct block_device **f2fs_get_devices(struct super_block *sb, } static const struct fscrypt_operations f2fs_cryptops = { + .needs_bounce_pages = 1, .legacy_key_prefix = "f2fs:", .get_context = f2fs_get_context, .set_context = f2fs_set_context, diff --git a/fs/ubifs/crypto.c b/fs/ubifs/crypto.c index 1be3e11da3b3..921f9033d0d2 100644 --- a/fs/ubifs/crypto.c +++ b/fs/ubifs/crypto.c @@ -88,7 +88,6 @@ int ubifs_decrypt(const struct inode *inode, struct ubifs_data_node *dn, } const struct fscrypt_operations ubifs_crypt_operations = { - .flags = FS_CFLG_OWN_PAGES, .legacy_key_prefix = "ubifs:", .get_context = ubifs_crypt_get_context, .set_context = ubifs_crypt_set_context, diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h index b0037566ce30..4505078e89b7 100644 --- a/include/linux/fscrypt.h +++ b/include/linux/fscrypt.h @@ -59,18 +59,20 @@ struct fscrypt_name { #ifdef CONFIG_FS_ENCRYPTION -/* - * If set, the fscrypt bounce page pool won't be allocated (unless another - * filesystem needs it). Set this if the filesystem always uses its own bounce - * pages for writes and therefore won't need the fscrypt bounce page pool. - */ -#define FS_CFLG_OWN_PAGES (1U << 1) - /* Crypto operations for filesystems */ struct fscrypt_operations { - /* Set of optional flags; see above for allowed flags */ - unsigned int flags; + /* + * If set, then fs/crypto/ will allocate a global bounce page pool the + * first time an encryption key is set up for a file. The bounce page + * pool is required by the following functions: + * + * - fscrypt_encrypt_pagecache_blocks() + * - fscrypt_zeroout_range() for files not using inline crypto + * + * If the filesystem doesn't use those, it doesn't need to set this. + */ + unsigned int needs_bounce_pages : 1; /* * This field exists only for backwards compatibility reasons and should -- cgit v1.2.3 From 753a4d531bc518633ea88ac0ed02b25a16823d51 Mon Sep 17 00:00:00 2001 From: Matthias Schiffer Date: Fri, 22 Sep 2023 22:55:16 +0200 Subject: ata: libata-sata: increase PMP SRST timeout to 10s On certain SATA controllers, softreset fails after wakeup from S2RAM with the message "softreset failed (1st FIS failed)", sometimes resulting in drives not being detected again. With the increased timeout, this issue is avoided. Instead, "softreset failed (device not ready)" is now logged 1-2 times; this later failure seems to cause fewer problems however, and the drives are detected reliably once they've spun up and the probe is retried. The issue was observed with the primary SATA controller of the QNAP TS-453B, which is an "Intel Corporation Celeron/Pentium Silver Processor SATA Controller [8086:31e3] (rev 06)" integrated in the Celeron J4125 CPU, and the following drives: - Seagate IronWolf ST12000VN0008 - Seagate IronWolf ST8000NE0004 The SATA controller seems to be more relevant to this issue than the drives, as the same drives are always detected reliably on the secondary SATA controller on the same board (an ASMedia 106x) without any "softreset failed" errors even without the increased timeout. Fixes: e7d3ef13d52a ("libata: change drive ready wait after hard reset to 5s") Cc: stable@vger.kernel.org Signed-off-by: Matthias Schiffer Signed-off-by: Damien Le Moal --- include/linux/libata.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/libata.h b/include/linux/libata.h index bf4913f4d7ac..84aca8c44fa3 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -259,7 +259,7 @@ enum { * advised to wait only for the following duration before * doing SRST. */ - ATA_TMOUT_PMP_SRST_WAIT = 5000, + ATA_TMOUT_PMP_SRST_WAIT = 10000, /* When the LPM policy is set to ATA_LPM_MAX_POWER, there might * be a spurious PHY event, so ignore the first PHY event that -- cgit v1.2.3 From b3239498353484fd6ddeb513df89c4628cd623d0 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 20 Sep 2023 21:25:12 +0300 Subject: wifi: mac80211: use bandwidth indication element for CSA In CSA, parse the (EHT) bandwidth indication element and use it (in fact prefer it if present). Signed-off-by: Johannes Berg Signed-off-by: Gregory Greenman Link: https://lore.kernel.org/r/20230920211508.43ef01920556.If4f24a61cd634ab1e50eba43899b9e992bf25602@changeid Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 23 +++++++++++++++++++++++ net/mac80211/ieee80211_i.h | 3 ++- net/mac80211/mlme.c | 5 +++-- net/mac80211/spectmgmt.c | 13 +++++++++++-- net/mac80211/util.c | 42 +++++++++++++++++++++++++++++------------- 5 files changed, 68 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 340d7e0f6bf7..f11b7022d9eb 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -3139,6 +3139,28 @@ ieee80211_eht_oper_size_ok(const u8 *data, u8 len) return len >= needed; } +#define IEEE80211_BW_IND_DIS_SUBCH_PRESENT BIT(1) + +struct ieee80211_bandwidth_indication { + u8 params; + struct ieee80211_eht_operation_info info; +} __packed; + +static inline bool +ieee80211_bandwidth_indication_size_ok(const u8 *data, u8 len) +{ + const struct ieee80211_bandwidth_indication *bwi = (const void *)data; + + if (len < sizeof(*bwi)) + return false; + + if (bwi->params & IEEE80211_BW_IND_DIS_SUBCH_PRESENT && + len < sizeof(*bwi) + 2) + return false; + + return true; +} + #define LISTEN_INT_USF GENMASK(15, 14) #define LISTEN_INT_UI GENMASK(13, 0) @@ -3596,6 +3618,7 @@ enum ieee80211_eid_ext { WLAN_EID_EXT_EHT_OPERATION = 106, WLAN_EID_EXT_EHT_MULTI_LINK = 107, WLAN_EID_EXT_EHT_CAPABILITY = 108, + WLAN_EID_EXT_BANDWIDTH_INDICATION = 135, }; /* Action category code */ diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index d5c5f865323c..e7856336b5c6 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -1677,6 +1677,7 @@ struct ieee802_11_elems { const struct ieee80211_eht_operation *eht_operation; const struct ieee80211_multi_link_elem *ml_basic; const struct ieee80211_multi_link_elem *ml_reconf; + const struct ieee80211_bandwidth_indication *bandwidth_indication; /* length of them, respectively */ u8 ext_capab_len; @@ -2463,7 +2464,7 @@ bool ieee80211_chandef_vht_oper(struct ieee80211_hw *hw, u32 vht_cap_info, const struct ieee80211_vht_operation *oper, const struct ieee80211_ht_operation *htop, struct cfg80211_chan_def *chandef); -void ieee80211_chandef_eht_oper(const struct ieee80211_eht_operation *eht_oper, +void ieee80211_chandef_eht_oper(const struct ieee80211_eht_operation_info *info, bool support_160, bool support_320, struct cfg80211_chan_def *chandef); bool ieee80211_chandef_he_6ghz_oper(struct ieee80211_sub_if_data *sdata, diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index e8f16ed235c3..a211f594f25a 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -109,7 +109,8 @@ ieee80211_extract_dis_subch_bmap(const struct ieee80211_eht_operation *eht_oper, return 0; /* set 160/320 supported to get the full AP definition */ - ieee80211_chandef_eht_oper(eht_oper, true, true, &ap_chandef); + ieee80211_chandef_eht_oper((const void *)eht_oper->optional, + true, true, &ap_chandef); ap_center_freq = ap_chandef.center_freq1; ap_bw = 20 * BIT(u8_get_bits(info->control, IEEE80211_EHT_OPER_CHAN_WIDTH)); @@ -387,7 +388,7 @@ ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata, if (eht_oper && (eht_oper->params & IEEE80211_EHT_OPER_INFO_PRESENT)) { struct cfg80211_chan_def eht_chandef = *chandef; - ieee80211_chandef_eht_oper(eht_oper, + ieee80211_chandef_eht_oper((const void *)eht_oper->optional, eht_chandef.width == NL80211_CHAN_WIDTH_160, false, &eht_chandef); diff --git a/net/mac80211/spectmgmt.c b/net/mac80211/spectmgmt.c index 871cdac2d0f4..55959b0b24c5 100644 --- a/net/mac80211/spectmgmt.c +++ b/net/mac80211/spectmgmt.c @@ -9,7 +9,7 @@ * Copyright 2007, Michael Wu * Copyright 2007-2008, Intel Corporation * Copyright 2008, Johannes Berg - * Copyright (C) 2018, 2020, 2022 Intel Corporation + * Copyright (C) 2018, 2020, 2022-2023 Intel Corporation */ #include @@ -33,12 +33,14 @@ int ieee80211_parse_ch_switch_ie(struct ieee80211_sub_if_data *sdata, struct cfg80211_chan_def new_vht_chandef = {}; const struct ieee80211_sec_chan_offs_ie *sec_chan_offs; const struct ieee80211_wide_bw_chansw_ie *wide_bw_chansw_ie; + const struct ieee80211_bandwidth_indication *bwi; int secondary_channel_offset = -1; memset(csa_ie, 0, sizeof(*csa_ie)); sec_chan_offs = elems->sec_chan_offs; wide_bw_chansw_ie = elems->wide_bw_chansw_ie; + bwi = elems->bandwidth_indication; if (conn_flags & (IEEE80211_CONN_DISABLE_HT | IEEE80211_CONN_DISABLE_40MHZ)) { @@ -132,7 +134,14 @@ int ieee80211_parse_ch_switch_ie(struct ieee80211_sub_if_data *sdata, break; } - if (wide_bw_chansw_ie) { + if (bwi) { + /* start with the CSA one */ + new_vht_chandef = csa_ie->chandef; + /* and update the width accordingly */ + /* FIXME: support 160/320 */ + ieee80211_chandef_eht_oper(&bwi->info, true, true, + &new_vht_chandef); + } else if (wide_bw_chansw_ie) { u8 new_seg1 = wide_bw_chansw_ie->new_center_freq_seg1; struct ieee80211_vht_operation vht_oper = { .chan_width = diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 88f714a75862..a1e18938ce52 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -990,6 +990,11 @@ ieee80211_parse_extension_element(u32 *crc, } } break; + case WLAN_EID_EXT_BANDWIDTH_INDICATION: + if (ieee80211_bandwidth_indication_size_ok(data, len)) + elems->bandwidth_indication = data; + calc_crc = true; + break; } if (crc && calc_crc) @@ -1005,11 +1010,11 @@ _ieee802_11_parse_elems_full(struct ieee80211_elems_parse_params *params, bool calc_crc = params->filter != 0; DECLARE_BITMAP(seen_elems, 256); u32 crc = params->crc; - const u8 *ie; bitmap_zero(seen_elems, 256); for_each_element(elem, params->start, params->len) { + const struct element *subelem; bool elem_parse_failed; u8 id = elem->id; u8 elen = elem->datalen; @@ -1267,15 +1272,27 @@ _ieee802_11_parse_elems_full(struct ieee80211_elems_parse_params *params, } /* * This is a bit tricky, but as we only care about - * the wide bandwidth channel switch element, so - * just parse it out manually. + * a few elements, parse them out manually. */ - ie = cfg80211_find_ie(WLAN_EID_WIDE_BW_CHANNEL_SWITCH, - pos, elen); - if (ie) { - if (ie[1] >= sizeof(*elems->wide_bw_chansw_ie)) + subelem = cfg80211_find_elem(WLAN_EID_WIDE_BW_CHANNEL_SWITCH, + pos, elen); + if (subelem) { + if (subelem->datalen >= sizeof(*elems->wide_bw_chansw_ie)) elems->wide_bw_chansw_ie = - (void *)(ie + 2); + (void *)subelem->data; + else + elem_parse_failed = true; + } + + subelem = cfg80211_find_ext_elem(WLAN_EID_EXT_BANDWIDTH_INDICATION, + pos, elen); + if (subelem) { + const void *edata = subelem->data + 1; + u8 edatalen = subelem->datalen - 1; + + if (ieee80211_bandwidth_indication_size_ok(edata, + edatalen)) + elems->bandwidth_indication = edata; else elem_parse_failed = true; } @@ -3746,12 +3763,10 @@ bool ieee80211_chandef_vht_oper(struct ieee80211_hw *hw, u32 vht_cap_info, return true; } -void ieee80211_chandef_eht_oper(const struct ieee80211_eht_operation *eht_oper, +void ieee80211_chandef_eht_oper(const struct ieee80211_eht_operation_info *info, bool support_160, bool support_320, struct cfg80211_chan_def *chandef) { - struct ieee80211_eht_operation_info *info = (void *)eht_oper->optional; - chandef->center_freq1 = ieee80211_channel_to_frequency(info->ccfs0, chandef->chan->band); @@ -3920,8 +3935,9 @@ bool ieee80211_chandef_he_6ghz_oper(struct ieee80211_sub_if_data *sdata, support_320 = eht_phy_cap & IEEE80211_EHT_PHY_CAP0_320MHZ_IN_6GHZ; - ieee80211_chandef_eht_oper(eht_oper, support_160, - support_320, &he_chandef); + ieee80211_chandef_eht_oper((const void *)eht_oper->optional, + support_160, support_320, + &he_chandef); } if (!cfg80211_chandef_valid(&he_chandef)) { -- cgit v1.2.3 From 62e9c64eedfeb697ba28081ccaac59a45f9a96e1 Mon Sep 17 00:00:00 2001 From: Ayala Beker Date: Wed, 20 Sep 2023 21:25:24 +0300 Subject: wifi: mac80211: add support for parsing TID to Link mapping element Add the relevant definitions for TID to Link mapping element according to the P802.11be_D4.0. Signed-off-by: Ayala Beker Signed-off-by: Gregory Greenman Link: https://lore.kernel.org/r/20230920211508.9ea9b0b4412a.I2281ab2c70e8b43a39032dc115db6a80f1f0b3f4@changeid Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 58 ++++++++++++++++++++++++++++++++++++++++++++++ net/mac80211/ieee80211_i.h | 3 +++ net/mac80211/util.c | 8 +++++++ 3 files changed, 69 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index f11b7022d9eb..f2965ff3d7c1 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -1246,6 +1246,30 @@ struct ieee80211_twt_setup { u8 params[]; } __packed; +#define IEEE80211_TTLM_MAX_CNT 2 +#define IEEE80211_TTLM_CONTROL_DIRECTION 0x03 +#define IEEE80211_TTLM_CONTROL_DEF_LINK_MAP 0x04 +#define IEEE80211_TTLM_CONTROL_SWITCH_TIME_PRESENT 0x08 +#define IEEE80211_TTLM_CONTROL_EXPECTED_DUR_PRESENT 0x10 +#define IEEE80211_TTLM_CONTROL_LINK_MAP_SIZE 0x20 + +#define IEEE80211_TTLM_DIRECTION_DOWN 0 +#define IEEE80211_TTLM_DIRECTION_UP 1 +#define IEEE80211_TTLM_DIRECTION_BOTH 2 + +/** + * struct ieee80211_ttlm_elem - TID-To-Link Mapping element + * + * Defined in section 9.4.2.314 in P802.11be_D4 + * + * @control: the first part of control field + * @optional: the second part of control field + */ +struct ieee80211_ttlm_elem { + u8 control; + u8 optional[]; +} __packed; + struct ieee80211_mgmt { __le16 frame_control; __le16 duration; @@ -3618,6 +3642,7 @@ enum ieee80211_eid_ext { WLAN_EID_EXT_EHT_OPERATION = 106, WLAN_EID_EXT_EHT_MULTI_LINK = 107, WLAN_EID_EXT_EHT_CAPABILITY = 108, + WLAN_EID_EXT_TID_TO_LINK_MAPPING = 109, WLAN_EID_EXT_BANDWIDTH_INDICATION = 135, }; @@ -5155,6 +5180,39 @@ static inline bool ieee80211_mle_reconf_sta_prof_size_ok(const u8 *data, fixed + prof->sta_info_len - 1 <= len; } +static inline bool ieee80211_tid_to_link_map_size_ok(const u8 *data, size_t len) +{ + const struct ieee80211_ttlm_elem *t2l = (const void *)data; + u8 control, fixed = sizeof(*t2l), elem_len = 0; + + if (len < fixed) + return false; + + control = t2l->control; + + if (control & IEEE80211_TTLM_CONTROL_SWITCH_TIME_PRESENT) + elem_len += 2; + if (control & IEEE80211_TTLM_CONTROL_EXPECTED_DUR_PRESENT) + elem_len += 3; + + if (!(control & IEEE80211_TTLM_CONTROL_DEF_LINK_MAP)) { + u8 bm_size; + + elem_len += 1; + if (len < fixed + elem_len) + return false; + + if (control & IEEE80211_TTLM_CONTROL_LINK_MAP_SIZE) + bm_size = 1; + else + bm_size = 2; + + elem_len += hweight8(t2l->optional[0]) * bm_size; + } + + return len >= fixed + elem_len; +} + #define for_each_mle_subelement(_elem, _data, _len) \ if (ieee80211_mle_size_ok(_data, _len)) \ for_each_element(_elem, \ diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index e7856336b5c6..d1a73095c914 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -1678,6 +1678,7 @@ struct ieee802_11_elems { const struct ieee80211_multi_link_elem *ml_basic; const struct ieee80211_multi_link_elem *ml_reconf; const struct ieee80211_bandwidth_indication *bandwidth_indication; + const struct ieee80211_ttlm_elem *ttlm[IEEE80211_TTLM_MAX_CNT]; /* length of them, respectively */ u8 ext_capab_len; @@ -1711,6 +1712,8 @@ struct ieee802_11_elems { /* The reconfiguration Multi-Link element in the original IEs */ const struct element *ml_reconf_elem; + u8 ttlm_num; + /* * store the per station profile pointer and length in case that the * parsing also handled Multi-Link element parsing for a specific link diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 97c5823da0eb..98a3bffc6991 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -995,6 +995,14 @@ ieee80211_parse_extension_element(u32 *crc, elems->bandwidth_indication = data; calc_crc = true; break; + case WLAN_EID_EXT_TID_TO_LINK_MAPPING: + calc_crc = true; + if (ieee80211_tid_to_link_map_size_ok(data, len) && + elems->ttlm_num < ARRAY_SIZE(elems->ttlm)) { + elems->ttlm[elems->ttlm_num] = (void *)data; + elems->ttlm_num++; + } + break; } if (crc && calc_crc) -- cgit v1.2.3 From ef246a1480cc484cd2aeda75737cb0848616ddf3 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 20 Sep 2023 21:25:26 +0300 Subject: wifi: mac80211: support antenna control in injection Support antenna control for injection by parsing the antenna radiotap field (which may be presented multiple times) and telling the driver about the resulting antenna bitmap. Of course there's no guarantee the driver will actually honour this, just like any other injection control. If misconfigured, i.e. the injected HT/VHT MCS needs more chains than antennas are configured, the bitmap is reset to zero, indicating no selection. For now this is only set up for two anntenas so we keep more free bits, but that can be trivially extended if any driver implements support for it that can deal with hardware with more antennas. Signed-off-by: Johannes Berg Signed-off-by: Gregory Greenman Link: https://lore.kernel.org/r/20230920211508.f71001aa4da9.I00ccb762a806ea62bc3d728fa3a0d29f4f285eeb@changeid Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 2 ++ include/net/mac80211.h | 6 +++++- net/mac80211/tx.c | 14 ++++++++++++++ 3 files changed, 21 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index f2965ff3d7c1..3b02f038d509 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -1705,6 +1705,8 @@ struct ieee80211_mcs_info { #define IEEE80211_HT_MCS_TX_MAX_STREAMS 4 #define IEEE80211_HT_MCS_TX_UNEQUAL_MODULATION 0x10 +#define IEEE80211_HT_MCS_CHAINS(mcs) ((mcs) == 32 ? 1 : (1 + ((mcs) >> 3))) + /* * 802.11n D5.0 20.3.5 / 20.6 says: * - indices 0 to 7 and 32 are single spatial stream diff --git a/include/net/mac80211.h b/include/net/mac80211.h index d4ef2a605cb4..72375eceb786 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1178,7 +1178,11 @@ struct ieee80211_tx_info { u8 use_cts_prot:1; u8 short_preamble:1; u8 skip_table:1; - /* 2 bytes free */ + + /* for injection only (bitmap) */ + u8 antennas:2; + + /* 14 bits free */ }; /* only needed before rate control */ unsigned long jiffies; diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 932516f8cc13..a984fc54644e 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -2162,6 +2162,11 @@ bool ieee80211_parse_tx_radiotap(struct sk_buff *skb, rate_found = true; break; + case IEEE80211_RADIOTAP_ANTENNA: + /* this can appear multiple times, keep a bitmap */ + info->control.antennas |= BIT(*iterator.this_arg); + break; + case IEEE80211_RADIOTAP_DATA_RETRIES: rate_retries = *iterator.this_arg; break; @@ -2256,8 +2261,17 @@ bool ieee80211_parse_tx_radiotap(struct sk_buff *skb, } if (rate_flags & IEEE80211_TX_RC_MCS) { + /* reset antennas if not enough */ + if (IEEE80211_HT_MCS_CHAINS(rate) > + hweight8(info->control.antennas)) + info->control.antennas = 0; + info->control.rates[0].idx = rate; } else if (rate_flags & IEEE80211_TX_RC_VHT_MCS) { + /* reset antennas if not enough */ + if (vht_nss > hweight8(info->control.antennas)) + info->control.antennas = 0; + ieee80211_rate_set_vht(info->control.rates, vht_mcs, vht_nss); } else if (sband) { -- cgit v1.2.3 From 823a0258912b1a4f627e455bee7e78d349c29c5b Mon Sep 17 00:00:00 2001 From: Po-Hao Huang Date: Mon, 25 Sep 2023 16:08:59 +0800 Subject: wifi: ieee80211: add UL-bandwidth definition of trigger frame Define UL-bandwidth values of trigger frame according to 802.11 std. Signed-off-by: Po-Hao Huang Signed-off-by: Ping-Ke Shih Link: https://lore.kernel.org/r/20230925080902.51449-2-pkshih@realtek.com Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 3b02f038d509..62b4469c6866 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -307,6 +307,13 @@ static inline u16 ieee80211_sn_sub(u16 sn1, u16 sn2) #define IEEE80211_TRIGGER_TYPE_BQRP 0x6 #define IEEE80211_TRIGGER_TYPE_NFRP 0x7 +/* UL-bandwidth within common_info of trigger frame */ +#define IEEE80211_TRIGGER_ULBW_MASK 0xc0000 +#define IEEE80211_TRIGGER_ULBW_20MHZ 0x0 +#define IEEE80211_TRIGGER_ULBW_40MHZ 0x1 +#define IEEE80211_TRIGGER_ULBW_80MHZ 0x2 +#define IEEE80211_TRIGGER_ULBW_160_80P80MHZ 0x3 + struct ieee80211_hdr { __le16 frame_control; __le16 duration_id; -- cgit v1.2.3 From df31b298477e65a01deff0af352be3a61524d930 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 13 Sep 2023 10:43:34 -0300 Subject: iommu: Add iommu_ops->identity_domain This allows a driver to set a global static to an IDENTITY domain and the core code will automatically use it whenever an IDENTITY domain is requested. By making it always available it means the IDENTITY can be used in error handling paths to force the iommu driver into a known state. Devices implementing global static identity domains should avoid failing their attach_dev ops. To make global static domains simpler allow drivers to omit their free function and update the iommufd selftest. Convert rockchip to use the new mechanism. Tested-by: Steven Price Tested-by: Marek Szyprowski Tested-by: Nicolin Chen Reviewed-by: Lu Baolu Reviewed-by: Jerry Snitselaar Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/1-v8-81230027b2fa+9d-iommu_all_defdom_jgg@nvidia.com Signed-off-by: Joerg Roedel --- drivers/iommu/iommu.c | 6 +++++- drivers/iommu/iommufd/selftest.c | 5 ----- drivers/iommu/rockchip-iommu.c | 9 +-------- include/linux/iommu.h | 3 +++ 4 files changed, 9 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 3bfc56df4f78..33bd11070907 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -1978,6 +1978,9 @@ static struct iommu_domain *__iommu_domain_alloc(const struct bus_type *bus, if (bus == NULL || bus->iommu_ops == NULL) return NULL; + if (alloc_type == IOMMU_DOMAIN_IDENTITY && bus->iommu_ops->identity_domain) + return bus->iommu_ops->identity_domain; + domain = bus->iommu_ops->domain_alloc(alloc_type); if (!domain) return NULL; @@ -2011,7 +2014,8 @@ void iommu_domain_free(struct iommu_domain *domain) if (domain->type == IOMMU_DOMAIN_SVA) mmdrop(domain->mm); iommu_put_dma_cookie(domain); - domain->ops->free(domain); + if (domain->ops->free) + domain->ops->free(domain); } EXPORT_SYMBOL_GPL(iommu_domain_free); diff --git a/drivers/iommu/iommufd/selftest.c b/drivers/iommu/iommufd/selftest.c index 56506d5753f1..d48a202a7c3b 100644 --- a/drivers/iommu/iommufd/selftest.c +++ b/drivers/iommu/iommufd/selftest.c @@ -111,10 +111,6 @@ struct selftest_obj { }; }; -static void mock_domain_blocking_free(struct iommu_domain *domain) -{ -} - static int mock_domain_nop_attach(struct iommu_domain *domain, struct device *dev) { @@ -122,7 +118,6 @@ static int mock_domain_nop_attach(struct iommu_domain *domain, } static const struct iommu_domain_ops mock_blocking_ops = { - .free = mock_domain_blocking_free, .attach_dev = mock_domain_nop_attach, }; diff --git a/drivers/iommu/rockchip-iommu.c b/drivers/iommu/rockchip-iommu.c index 8ff69fbf9f65..033678f2f8b3 100644 --- a/drivers/iommu/rockchip-iommu.c +++ b/drivers/iommu/rockchip-iommu.c @@ -989,13 +989,8 @@ static int rk_iommu_identity_attach(struct iommu_domain *identity_domain, return 0; } -static void rk_iommu_identity_free(struct iommu_domain *domain) -{ -} - static struct iommu_domain_ops rk_identity_ops = { .attach_dev = rk_iommu_identity_attach, - .free = rk_iommu_identity_free, }; static struct iommu_domain rk_identity_domain = { @@ -1059,9 +1054,6 @@ static struct iommu_domain *rk_iommu_domain_alloc(unsigned type) { struct rk_iommu_domain *rk_domain; - if (type == IOMMU_DOMAIN_IDENTITY) - return &rk_identity_domain; - if (type != IOMMU_DOMAIN_UNMANAGED && type != IOMMU_DOMAIN_DMA) return NULL; @@ -1186,6 +1178,7 @@ static int rk_iommu_of_xlate(struct device *dev, } static const struct iommu_ops rk_iommu_ops = { + .identity_domain = &rk_identity_domain, .domain_alloc = rk_iommu_domain_alloc, .probe_device = rk_iommu_probe_device, .release_device = rk_iommu_release_device, diff --git a/include/linux/iommu.h b/include/linux/iommu.h index c50a769d569a..d0920b2a9f1c 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -260,6 +260,8 @@ struct iommu_iotlb_gather { * will be blocked by the hardware. * @pgsize_bitmap: bitmap of all possible supported page sizes * @owner: Driver module providing these ops + * @identity_domain: An always available, always attachable identity + * translation. */ struct iommu_ops { bool (*capable)(struct device *dev, enum iommu_cap); @@ -294,6 +296,7 @@ struct iommu_ops { const struct iommu_domain_ops *default_domain_ops; unsigned long pgsize_bitmap; struct module *owner; + struct iommu_domain *identity_domain; }; /** -- cgit v1.2.3 From 1c68cbc64fe6ac01dc242ba562344303031a76fb Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 13 Sep 2023 10:43:35 -0300 Subject: iommu: Add IOMMU_DOMAIN_PLATFORM This is used when the iommu driver is taking control of the dma_ops, currently only on S390 and power spapr. It is designed to preserve the original ops->detach_dev() semantic that these S390 was built around. Provide an opaque domain type and a 'default_domain' ops value that allows the driver to trivially force any single domain as the default domain. Update iommufd selftest to use this instead of set_platform_dma_ops Reviewed-by: Lu Baolu Reviewed-by: Jerry Snitselaar Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/2-v8-81230027b2fa+9d-iommu_all_defdom_jgg@nvidia.com Signed-off-by: Joerg Roedel --- drivers/iommu/iommu.c | 13 +++++++++++++ drivers/iommu/iommufd/selftest.c | 14 +++++--------- include/linux/iommu.h | 8 ++++++++ 3 files changed, 26 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 33bd11070907..0e13e566581c 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -184,6 +184,8 @@ static const char *iommu_domain_type_str(unsigned int t) case IOMMU_DOMAIN_DMA: case IOMMU_DOMAIN_DMA_FQ: return "Translated"; + case IOMMU_DOMAIN_PLATFORM: + return "Platform"; default: return "Unknown"; } @@ -1752,6 +1754,17 @@ iommu_group_alloc_default_domain(struct iommu_group *group, int req_type) lockdep_assert_held(&group->mutex); + /* + * Allow legacy drivers to specify the domain that will be the default + * domain. This should always be either an IDENTITY/BLOCKED/PLATFORM + * domain. Do not use in new drivers. + */ + if (bus->iommu_ops->default_domain) { + if (req_type) + return ERR_PTR(-EINVAL); + return bus->iommu_ops->default_domain; + } + if (req_type) return __iommu_group_alloc_default_domain(bus, group, req_type); diff --git a/drivers/iommu/iommufd/selftest.c b/drivers/iommu/iommufd/selftest.c index d48a202a7c3b..fb981ba97c4e 100644 --- a/drivers/iommu/iommufd/selftest.c +++ b/drivers/iommu/iommufd/selftest.c @@ -281,14 +281,6 @@ static bool mock_domain_capable(struct device *dev, enum iommu_cap cap) return cap == IOMMU_CAP_CACHE_COHERENCY; } -static void mock_domain_set_plaform_dma_ops(struct device *dev) -{ - /* - * mock doesn't setup default domains because we can't hook into the - * normal probe path - */ -} - static struct iommu_device mock_iommu_device = { }; @@ -298,12 +290,16 @@ static struct iommu_device *mock_probe_device(struct device *dev) } static const struct iommu_ops mock_ops = { + /* + * IOMMU_DOMAIN_BLOCKED cannot be returned from def_domain_type() + * because it is zero. + */ + .default_domain = &mock_blocking_domain, .owner = THIS_MODULE, .pgsize_bitmap = MOCK_IO_PAGE_SIZE, .hw_info = mock_domain_hw_info, .domain_alloc = mock_domain_alloc, .capable = mock_domain_capable, - .set_platform_dma_ops = mock_domain_set_plaform_dma_ops, .device_group = generic_device_group, .probe_device = mock_probe_device, .default_domain_ops = diff --git a/include/linux/iommu.h b/include/linux/iommu.h index d0920b2a9f1c..a05480be05fd 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -64,6 +64,7 @@ struct iommu_domain_geometry { #define __IOMMU_DOMAIN_DMA_FQ (1U << 3) /* DMA-API uses flush queue */ #define __IOMMU_DOMAIN_SVA (1U << 4) /* Shared process address space */ +#define __IOMMU_DOMAIN_PLATFORM (1U << 5) #define IOMMU_DOMAIN_ALLOC_FLAGS ~__IOMMU_DOMAIN_DMA_FQ /* @@ -81,6 +82,8 @@ struct iommu_domain_geometry { * invalidation. * IOMMU_DOMAIN_SVA - DMA addresses are shared process addresses * represented by mm_struct's. + * IOMMU_DOMAIN_PLATFORM - Legacy domain for drivers that do their own + * dma_api stuff. Do not use in new drivers. */ #define IOMMU_DOMAIN_BLOCKED (0U) #define IOMMU_DOMAIN_IDENTITY (__IOMMU_DOMAIN_PT) @@ -91,6 +94,7 @@ struct iommu_domain_geometry { __IOMMU_DOMAIN_DMA_API | \ __IOMMU_DOMAIN_DMA_FQ) #define IOMMU_DOMAIN_SVA (__IOMMU_DOMAIN_SVA) +#define IOMMU_DOMAIN_PLATFORM (__IOMMU_DOMAIN_PLATFORM) struct iommu_domain { unsigned type; @@ -262,6 +266,9 @@ struct iommu_iotlb_gather { * @owner: Driver module providing these ops * @identity_domain: An always available, always attachable identity * translation. + * @default_domain: If not NULL this will always be set as the default domain. + * This should be an IDENTITY/BLOCKED/PLATFORM domain. + * Do not use in new drivers. */ struct iommu_ops { bool (*capable)(struct device *dev, enum iommu_cap); @@ -297,6 +304,7 @@ struct iommu_ops { unsigned long pgsize_bitmap; struct module *owner; struct iommu_domain *identity_domain; + struct iommu_domain *default_domain; }; /** -- cgit v1.2.3 From 24b1d476167df3e30c7a53b67765bf3c787c5160 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 13 Sep 2023 10:43:48 -0300 Subject: iommu: Remove ops->set_platform_dma_ops() All drivers are now using IDENTITY or PLATFORM domains for what this did, we can remove it now. It is no longer possible to attach to a NULL domain. Tested-by: Heiko Stuebner Tested-by: Niklas Schnelle Tested-by: Steven Price Tested-by: Marek Szyprowski Tested-by: Nicolin Chen Reviewed-by: Lu Baolu Reviewed-by: Jerry Snitselaar Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/15-v8-81230027b2fa+9d-iommu_all_defdom_jgg@nvidia.com Signed-off-by: Joerg Roedel --- drivers/iommu/iommu.c | 30 +++++------------------------- include/linux/iommu.h | 4 ---- 2 files changed, 5 insertions(+), 29 deletions(-) (limited to 'include/linux') diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 1efd6351bbc2..42a4585dd76d 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -2351,21 +2351,8 @@ static int __iommu_group_set_domain_internal(struct iommu_group *group, if (group->domain == new_domain) return 0; - /* - * New drivers should support default domains, so set_platform_dma() - * op will never be called. Otherwise the NULL domain represents some - * platform specific behavior. - */ - if (!new_domain) { - for_each_group_device(group, gdev) { - const struct iommu_ops *ops = dev_iommu_ops(gdev->dev); - - if (!WARN_ON(!ops->set_platform_dma_ops)) - ops->set_platform_dma_ops(gdev->dev); - } - group->domain = NULL; - return 0; - } + if (WARN_ON(!new_domain)) + return -EINVAL; /* * Changing the domain is done by calling attach_dev() on the new @@ -2401,19 +2388,15 @@ err_revert: */ last_gdev = gdev; for_each_group_device(group, gdev) { - const struct iommu_ops *ops = dev_iommu_ops(gdev->dev); - /* - * If set_platform_dma_ops is not present a NULL domain can - * happen only for first probe, in which case we leave - * group->domain as NULL and let release clean everything up. + * A NULL domain can happen only for first probe, in which case + * we leave group->domain as NULL and let release clean + * everything up. */ if (group->domain) WARN_ON(__iommu_device_set_domain( group, gdev->dev, group->domain, IOMMU_SET_DOMAIN_MUST_SUCCEED)); - else if (ops->set_platform_dma_ops) - ops->set_platform_dma_ops(gdev->dev); if (gdev == last_gdev) break; } @@ -3036,9 +3019,6 @@ static int iommu_setup_default_domain(struct iommu_group *group, /* * There are still some drivers which don't support default domains, so * we ignore the failure and leave group->default_domain NULL. - * - * We assume that the iommu driver starts up the device in - * 'set_platform_dma_ops' mode if it does not support default domains. */ dom = iommu_group_alloc_default_domain(group, req_type); if (!dom) { diff --git a/include/linux/iommu.h b/include/linux/iommu.h index a05480be05fd..511dfeea5272 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -243,9 +243,6 @@ struct iommu_iotlb_gather { * @release_device: Remove device from iommu driver handling * @probe_finalize: Do final setup work after the device is added to an IOMMU * group and attached to the groups domain - * @set_platform_dma_ops: Returning control back to the platform DMA ops. This op - * is to support old IOMMU drivers, new drivers should use - * default domains, and the common IOMMU DMA ops. * @device_group: find iommu group for a particular device * @get_resv_regions: Request list of reserved regions for a device * @of_xlate: add OF master IDs to iommu grouping @@ -280,7 +277,6 @@ struct iommu_ops { struct iommu_device *(*probe_device)(struct device *dev); void (*release_device)(struct device *dev); void (*probe_finalize)(struct device *dev); - void (*set_platform_dma_ops)(struct device *dev); struct iommu_group *(*device_group)(struct device *dev); /* Request/Free a list of reserved regions for a device */ -- cgit v1.2.3 From 4601cd2d7c4c82c4bafc822e1ff630a709eff206 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 13 Sep 2023 10:43:55 -0300 Subject: iommu: Add ops->domain_alloc_paging() This callback requests the driver to create only a __IOMMU_DOMAIN_PAGING domain, so it saves a few lines in a lot of drivers needlessly checking the type. More critically, this allows us to sweep out all the IOMMU_DOMAIN_UNMANAGED and IOMMU_DOMAIN_DMA checks from a lot of the drivers, simplifying what is going on in the code and ultimately removing the now-unused special cases in drivers where they did not support IOMMU_DOMAIN_DMA. domain_alloc_paging() should return a struct iommu_domain that is functionally compatible with ARM_DMA_USE_IOMMU, dma-iommu.c and iommufd. Be forwards looking and pass in a 'struct device *' argument. We can provide this when allocating the default_domain. No drivers will look at this. Tested-by: Steven Price Tested-by: Marek Szyprowski Tested-by: Nicolin Chen Reviewed-by: Lu Baolu Reviewed-by: Jerry Snitselaar Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/22-v8-81230027b2fa+9d-iommu_all_defdom_jgg@nvidia.com Signed-off-by: Joerg Roedel --- drivers/iommu/iommu.c | 17 ++++++++++++++--- include/linux/iommu.h | 3 +++ 2 files changed, 17 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 38856d542afc..fe033043be46 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -2041,6 +2041,7 @@ void iommu_set_fault_handler(struct iommu_domain *domain, EXPORT_SYMBOL_GPL(iommu_set_fault_handler); static struct iommu_domain *__iommu_domain_alloc(const struct iommu_ops *ops, + struct device *dev, unsigned int type) { struct iommu_domain *domain; @@ -2048,8 +2049,13 @@ static struct iommu_domain *__iommu_domain_alloc(const struct iommu_ops *ops, if (alloc_type == IOMMU_DOMAIN_IDENTITY && ops->identity_domain) return ops->identity_domain; + else if (type & __IOMMU_DOMAIN_PAGING && ops->domain_alloc_paging) + domain = ops->domain_alloc_paging(dev); + else if (ops->domain_alloc) + domain = ops->domain_alloc(alloc_type); + else + return NULL; - domain = ops->domain_alloc(alloc_type); if (!domain) return NULL; @@ -2074,14 +2080,19 @@ static struct iommu_domain *__iommu_domain_alloc(const struct iommu_ops *ops, static struct iommu_domain * __iommu_group_domain_alloc(struct iommu_group *group, unsigned int type) { - return __iommu_domain_alloc(group_iommu_ops(group), type); + struct device *dev = + list_first_entry(&group->devices, struct group_device, list) + ->dev; + + return __iommu_domain_alloc(group_iommu_ops(group), dev, type); } struct iommu_domain *iommu_domain_alloc(const struct bus_type *bus) { if (bus == NULL || bus->iommu_ops == NULL) return NULL; - return __iommu_domain_alloc(bus->iommu_ops, IOMMU_DOMAIN_UNMANAGED); + return __iommu_domain_alloc(bus->iommu_ops, NULL, + IOMMU_DOMAIN_UNMANAGED); } EXPORT_SYMBOL_GPL(iommu_domain_alloc); diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 511dfeea5272..3f173307434d 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -239,6 +239,8 @@ struct iommu_iotlb_gather { * use. The information type is one of enum iommu_hw_info_type defined * in include/uapi/linux/iommufd.h. * @domain_alloc: allocate iommu domain + * @domain_alloc_paging: Allocate an iommu_domain that can be used for + * UNMANAGED, DMA, and DMA_FQ domain types. * @probe_device: Add device to iommu driver handling * @release_device: Remove device from iommu driver handling * @probe_finalize: Do final setup work after the device is added to an IOMMU @@ -273,6 +275,7 @@ struct iommu_ops { /* Domain allocation and freeing by the iommu driver */ struct iommu_domain *(*domain_alloc)(unsigned iommu_domain_type); + struct iommu_domain *(*domain_alloc_paging)(struct device *dev); struct iommu_device *(*probe_device)(struct device *dev); void (*release_device)(struct device *dev); -- cgit v1.2.3 From e8f52d84cf0b6d1862ab62f7ed705f78690d11b2 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 22 Aug 2023 13:15:57 -0300 Subject: iommu: Add generic_single_device_group() This implements the common pattern seen in drivers of a single iommu_group for the entire iommu driver instance. Implement this in core code so the drivers that want this can select it from their ops. Reviewed-by: Lu Baolu Reviewed-by: Kevin Tian Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/2-v1-c869a95191f2+5e8-iommu_single_grp_jgg@nvidia.com Signed-off-by: Joerg Roedel --- drivers/iommu/iommu.c | 28 +++++++++++++++++++++++++++- include/linux/iommu.h | 3 +++ 2 files changed, 30 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 6483d6e19a5e..03202314322b 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -292,6 +292,10 @@ void iommu_device_unregister(struct iommu_device *iommu) spin_lock(&iommu_device_lock); list_del(&iommu->list); spin_unlock(&iommu_device_lock); + + /* Pairs with the alloc in generic_single_device_group() */ + iommu_group_put(iommu->singleton_group); + iommu->singleton_group = NULL; } EXPORT_SYMBOL_GPL(iommu_device_unregister); @@ -406,6 +410,7 @@ static int iommu_init_device(struct device *dev, const struct iommu_ops *ops) ret = PTR_ERR(iommu_dev); goto err_module_put; } + dev->iommu->iommu_dev = iommu_dev; ret = iommu_device_link(iommu_dev, dev); if (ret) @@ -420,7 +425,6 @@ static int iommu_init_device(struct device *dev, const struct iommu_ops *ops) } dev->iommu_group = group; - dev->iommu->iommu_dev = iommu_dev; dev->iommu->max_pasids = dev_iommu_get_max_pasids(dev); if (ops->is_attach_deferred) dev->iommu->attach_deferred = ops->is_attach_deferred(dev); @@ -434,6 +438,7 @@ err_release: err_module_put: module_put(ops->owner); err_free: + dev->iommu->iommu_dev = NULL; dev_iommu_free(dev); return ret; } @@ -1637,6 +1642,27 @@ struct iommu_group *generic_device_group(struct device *dev) } EXPORT_SYMBOL_GPL(generic_device_group); +/* + * Generic device_group call-back function. It just allocates one + * iommu-group per iommu driver instance shared by every device + * probed by that iommu driver. + */ +struct iommu_group *generic_single_device_group(struct device *dev) +{ + struct iommu_device *iommu = dev->iommu->iommu_dev; + + if (!iommu->singleton_group) { + struct iommu_group *group; + + group = iommu_group_alloc(); + if (IS_ERR(group)) + return group; + iommu->singleton_group = group; + } + return iommu_group_ref_get(iommu->singleton_group); +} +EXPORT_SYMBOL_GPL(generic_single_device_group); + /* * Use standard PCI bus topology, isolation features, and DMA alias quirks * to find or create an IOMMU group for a device. diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 3f173307434d..5b693be3d35f 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -378,6 +378,7 @@ struct iommu_domain_ops { * @list: Used by the iommu-core to keep a list of registered iommus * @ops: iommu-ops for talking to this iommu * @dev: struct device for sysfs handling + * @singleton_group: Used internally for drivers that have only one group * @max_pasids: number of supported PASIDs */ struct iommu_device { @@ -385,6 +386,7 @@ struct iommu_device { const struct iommu_ops *ops; struct fwnode_handle *fwnode; struct device *dev; + struct iommu_group *singleton_group; u32 max_pasids; }; @@ -648,6 +650,7 @@ extern struct iommu_group *pci_device_group(struct device *dev); extern struct iommu_group *generic_device_group(struct device *dev); /* FSL-MC device grouping function */ struct iommu_group *fsl_mc_device_group(struct device *dev); +extern struct iommu_group *generic_single_device_group(struct device *dev); /** * struct iommu_fwspec - per-device IOMMU instance data -- cgit v1.2.3 From 950210887670cbb7d2eb9af6fb743b70f1a1ebdc Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 19 Sep 2023 20:54:37 +0200 Subject: thermal: core: Drop trips_disabled bitmask After recent changes, thermal_zone_get_trip() cannot fail, as invoked from thermal_zone_device_register_with_trips(), so the only role of the trips_disabled bitmask is struct thermal_zone_device is to make handle_thermal_trip() skip trip points whose temperature was initially zero. However, since the unit of temperature in the thermal core is millicelsius, zero may very well be a valid temperature value at least in some usage scenarios and the trip temperature may as well change later. Thus there is no reason to permanently disable trip points with initial temperature equal to zero. Accordingly, drop the trips_disabled bitmask along with the code related to it. Signed-off-by: Rafael J. Wysocki Tested-by: Ido Schimmel Acked-by: Daniel Lezcano --- drivers/thermal/thermal_core.c | 13 ------------- include/linux/thermal.h | 2 -- 2 files changed, 15 deletions(-) (limited to 'include/linux') diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c index 58533ea75cd9..38d393f139d8 100644 --- a/drivers/thermal/thermal_core.c +++ b/drivers/thermal/thermal_core.c @@ -347,10 +347,6 @@ static void handle_thermal_trip(struct thermal_zone_device *tz, int trip_id) { struct thermal_trip trip; - /* Ignore disabled trip points */ - if (test_bit(trip_id, &tz->trips_disabled)) - return; - __thermal_zone_get_trip(tz, trip_id, &trip); if (trip.temperature == THERMAL_TEMP_INVALID) @@ -1231,7 +1227,6 @@ thermal_zone_device_register_with_trips(const char *type, struct thermal_trip *t struct thermal_zone_device *tz; int id; int result; - int count; struct thermal_governor *governor; if (!type || strlen(type) == 0) { @@ -1328,14 +1323,6 @@ thermal_zone_device_register_with_trips(const char *type, struct thermal_trip *t if (result) goto release_device; - for (count = 0; count < num_trips; count++) { - struct thermal_trip trip; - - result = thermal_zone_get_trip(tz, count, &trip); - if (result || !trip.temperature) - set_bit(count, &tz->trips_disabled); - } - /* Update 'this' zone's governor information */ mutex_lock(&thermal_governor_lock); diff --git a/include/linux/thermal.h b/include/linux/thermal.h index a5ae4af955ff..6cfcae22ba12 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -122,7 +122,6 @@ struct thermal_cooling_device { * @devdata: private pointer for device private data * @trips: an array of struct thermal_trip * @num_trips: number of trip points the thermal zone supports - * @trips_disabled; bitmap for disabled trips * @passive_delay_jiffies: number of jiffies to wait between polls when * performing passive cooling. * @polling_delay_jiffies: number of jiffies to wait between polls when @@ -163,7 +162,6 @@ struct thermal_zone_device { void *devdata; struct thermal_trip *trips; int num_trips; - unsigned long trips_disabled; /* bitmap for disabled trips */ unsigned long passive_delay_jiffies; unsigned long polling_delay_jiffies; int temperature; -- cgit v1.2.3 From bd111e987e762d82dc738232c6ed4b3c9bcc5c91 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Tue, 12 Sep 2023 17:18:43 +0100 Subject: iommu: Retire map/unmap ops With everyone now implementing the new interfaces, clean up the last remnants of the old map/unmap ops and simplify the calling logic again. Signed-off-by: Robin Murphy Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/d2afdf13b2fbf537713c3ec642dfd49d16dd9e6a.1694525662.git.robin.murphy@arm.com Signed-off-by: Joerg Roedel --- drivers/iommu/iommu.c | 60 +++++++++++---------------------------------------- include/linux/iommu.h | 6 ------ 2 files changed, 13 insertions(+), 53 deletions(-) (limited to 'include/linux') diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 03202314322b..865fa4f179a8 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -2509,30 +2509,6 @@ out_set_count: return pgsize; } -static int __iommu_map_pages(struct iommu_domain *domain, unsigned long iova, - phys_addr_t paddr, size_t size, int prot, - gfp_t gfp, size_t *mapped) -{ - const struct iommu_domain_ops *ops = domain->ops; - size_t pgsize, count; - int ret; - - pgsize = iommu_pgsize(domain, iova, paddr, size, &count); - - pr_debug("mapping: iova 0x%lx pa %pa pgsize 0x%zx count %zu\n", - iova, &paddr, pgsize, count); - - if (ops->map_pages) { - ret = ops->map_pages(domain, iova, paddr, pgsize, count, prot, - gfp, mapped); - } else { - ret = ops->map(domain, iova, paddr, pgsize, prot, gfp); - *mapped = ret ? 0 : pgsize; - } - - return ret; -} - static int __iommu_map(struct iommu_domain *domain, unsigned long iova, phys_addr_t paddr, size_t size, int prot, gfp_t gfp) { @@ -2543,8 +2519,7 @@ static int __iommu_map(struct iommu_domain *domain, unsigned long iova, phys_addr_t orig_paddr = paddr; int ret = 0; - if (unlikely(!(ops->map || ops->map_pages) || - domain->pgsize_bitmap == 0UL)) + if (unlikely(!ops->map_pages || domain->pgsize_bitmap == 0UL)) return -ENODEV; if (unlikely(!(domain->type & __IOMMU_DOMAIN_PAGING))) @@ -2567,10 +2542,14 @@ static int __iommu_map(struct iommu_domain *domain, unsigned long iova, pr_debug("map: iova 0x%lx pa %pa size 0x%zx\n", iova, &paddr, size); while (size) { - size_t mapped = 0; + size_t pgsize, count, mapped = 0; + + pgsize = iommu_pgsize(domain, iova, paddr, size, &count); - ret = __iommu_map_pages(domain, iova, paddr, size, prot, gfp, - &mapped); + pr_debug("mapping: iova 0x%lx pa %pa pgsize 0x%zx count %zu\n", + iova, &paddr, pgsize, count); + ret = ops->map_pages(domain, iova, paddr, pgsize, count, prot, + gfp, &mapped); /* * Some pages may have been mapped, even if an error occurred, * so we should account for those so they can be unmapped. @@ -2614,19 +2593,6 @@ int iommu_map(struct iommu_domain *domain, unsigned long iova, } EXPORT_SYMBOL_GPL(iommu_map); -static size_t __iommu_unmap_pages(struct iommu_domain *domain, - unsigned long iova, size_t size, - struct iommu_iotlb_gather *iotlb_gather) -{ - const struct iommu_domain_ops *ops = domain->ops; - size_t pgsize, count; - - pgsize = iommu_pgsize(domain, iova, iova, size, &count); - return ops->unmap_pages ? - ops->unmap_pages(domain, iova, pgsize, count, iotlb_gather) : - ops->unmap(domain, iova, pgsize, iotlb_gather); -} - static size_t __iommu_unmap(struct iommu_domain *domain, unsigned long iova, size_t size, struct iommu_iotlb_gather *iotlb_gather) @@ -2636,8 +2602,7 @@ static size_t __iommu_unmap(struct iommu_domain *domain, unsigned long orig_iova = iova; unsigned int min_pagesz; - if (unlikely(!(ops->unmap || ops->unmap_pages) || - domain->pgsize_bitmap == 0UL)) + if (unlikely(!ops->unmap_pages || domain->pgsize_bitmap == 0UL)) return 0; if (unlikely(!(domain->type & __IOMMU_DOMAIN_PAGING))) @@ -2664,9 +2629,10 @@ static size_t __iommu_unmap(struct iommu_domain *domain, * or we hit an area that isn't mapped. */ while (unmapped < size) { - unmapped_page = __iommu_unmap_pages(domain, iova, - size - unmapped, - iotlb_gather); + size_t pgsize, count; + + pgsize = iommu_pgsize(domain, iova, iova, size - unmapped, &count); + unmapped_page = ops->unmap_pages(domain, iova, pgsize, count, iotlb_gather); if (!unmapped_page) break; diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 5b693be3d35f..64bd20142cbe 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -322,10 +322,8 @@ struct iommu_ops { * * ENODEV - device specific errors, not able to be attached * * - treated as ENODEV by the caller. Use is discouraged * @set_dev_pasid: set an iommu domain to a pasid of device - * @map: map a physically contiguous memory region to an iommu domain * @map_pages: map a physically contiguous set of pages of the same size to * an iommu domain. - * @unmap: unmap a physically contiguous memory region from an iommu domain * @unmap_pages: unmap a number of pages of the same size from an iommu domain * @flush_iotlb_all: Synchronously flush all hardware TLBs for this domain * @iotlb_sync_map: Sync mappings created recently using @map to the hardware @@ -344,13 +342,9 @@ struct iommu_domain_ops { int (*set_dev_pasid)(struct iommu_domain *domain, struct device *dev, ioasid_t pasid); - int (*map)(struct iommu_domain *domain, unsigned long iova, - phys_addr_t paddr, size_t size, int prot, gfp_t gfp); int (*map_pages)(struct iommu_domain *domain, unsigned long iova, phys_addr_t paddr, size_t pgsize, size_t pgcount, int prot, gfp_t gfp, size_t *mapped); - size_t (*unmap)(struct iommu_domain *domain, unsigned long iova, - size_t size, struct iommu_iotlb_gather *iotlb_gather); size_t (*unmap_pages)(struct iommu_domain *domain, unsigned long iova, size_t pgsize, size_t pgcount, struct iommu_iotlb_gather *iotlb_gather); -- cgit v1.2.3 From cf08fa74c716cf20e5038d1e7dbbd7dba1b76062 Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Wed, 13 Sep 2023 16:29:17 +0800 Subject: regulator: mt6358: Add output voltage fine tuning to fixed regulators The "fixed" LDO regulators found on the MT6358 and MT6366 PMICs have either no voltage selection register, or only one valid setting. However these do have a fine voltage calibration setting that can slightly boost the output voltage from 0 mV to 100 mV, in 10 mV increments. Add support for this by changing these into linear range regulators. Some register definitions that are missing are also added. Signed-off-by: Chen-Yu Tsai Reviewed-by: AngeloGioacchino Del Regno Acked-by: Lee Jones Link: https://lore.kernel.org/r/20230913082919.1631287-3-wenst@chromium.org Signed-off-by: Mark Brown --- drivers/regulator/mt6358-regulator.c | 15 +++++++++++++-- include/linux/mfd/mt6358/registers.h | 6 ++++++ 2 files changed, 19 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/drivers/regulator/mt6358-regulator.c b/drivers/regulator/mt6358-regulator.c index a1eae45f5fee..b34ad85db771 100644 --- a/drivers/regulator/mt6358-regulator.c +++ b/drivers/regulator/mt6358-regulator.c @@ -123,10 +123,13 @@ struct mt6358_regulator_info { .type = REGULATOR_VOLTAGE, \ .id = MT6358_ID_##vreg, \ .owner = THIS_MODULE, \ - .n_voltages = 1, \ + .n_voltages = 11, \ + .vsel_reg = MT6358_##vreg##_ANA_CON0, \ + .vsel_mask = GENMASK(3, 0), \ .enable_reg = enreg, \ .enable_mask = BIT(enbit), \ .min_uV = volt, \ + .uV_step = 10000, \ }, \ .status_reg = MT6358_LDO_##vreg##_CON1, \ .qi = BIT(15), \ @@ -219,10 +222,13 @@ struct mt6358_regulator_info { .type = REGULATOR_VOLTAGE, \ .id = MT6366_ID_##vreg, \ .owner = THIS_MODULE, \ - .n_voltages = 1, \ + .n_voltages = 11, \ + .vsel_reg = MT6358_##vreg##_ANA_CON0, \ + .vsel_mask = GENMASK(3, 0), \ .enable_reg = enreg, \ .enable_mask = BIT(enbit), \ .min_uV = volt, \ + .uV_step = 10000, \ }, \ .status_reg = MT6358_LDO_##vreg##_CON1, \ .qi = BIT(15), \ @@ -476,8 +482,13 @@ static const struct regulator_ops mt6358_volt_table_ops = { .get_status = mt6358_get_status, }; +/* "Fixed" LDOs with output voltage calibration +0 ~ +10 mV */ static const struct regulator_ops mt6358_volt_fixed_ops = { .list_voltage = regulator_list_voltage_linear, + .map_voltage = regulator_map_voltage_linear, + .set_voltage_sel = regulator_set_voltage_sel_regmap, + .get_voltage_sel = mt6358_get_buck_voltage_sel, + .set_voltage_time_sel = regulator_set_voltage_time_sel, .enable = regulator_enable_regmap, .disable = regulator_disable_regmap, .is_enabled = regulator_is_enabled_regmap, diff --git a/include/linux/mfd/mt6358/registers.h b/include/linux/mfd/mt6358/registers.h index 3d33517f178c..5ea2590be710 100644 --- a/include/linux/mfd/mt6358/registers.h +++ b/include/linux/mfd/mt6358/registers.h @@ -262,6 +262,12 @@ #define MT6358_LDO_VBIF28_CON3 0x1db0 #define MT6358_VCAMA1_ANA_CON0 0x1e08 #define MT6358_VCAMA2_ANA_CON0 0x1e0c +#define MT6358_VFE28_ANA_CON0 0x1e10 +#define MT6358_VCN28_ANA_CON0 0x1e14 +#define MT6358_VBIF28_ANA_CON0 0x1e18 +#define MT6358_VAUD28_ANA_CON0 0x1e1c +#define MT6358_VAUX18_ANA_CON0 0x1e20 +#define MT6358_VXO22_ANA_CON0 0x1e24 #define MT6358_VCN33_ANA_CON0 0x1e28 #define MT6358_VSIM1_ANA_CON0 0x1e2c #define MT6358_VSIM2_ANA_CON0 0x1e30 -- cgit v1.2.3 From 581beb4fe37d17571c87058d13d298d5458e25e9 Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 25 Sep 2023 13:02:58 +0100 Subject: iov_iter: Remove last_offset from iov_iter as it was for ITER_PIPE Now that ITER_PIPE has been removed, iov_iter::last_offset is no longer used, so remove it. Signed-off-by: David Howells Link: https://lore.kernel.org/r/20230925120309.1731676-2-dhowells@redhat.com cc: Alexander Viro cc: Jens Axboe cc: Christoph Hellwig cc: Christian Brauner cc: Matthew Wilcox cc: Linus Torvalds cc: David Laight cc: linux-block@vger.kernel.org cc: linux-fsdevel@vger.kernel.org cc: linux-mm@kvack.org Signed-off-by: Christian Brauner --- include/linux/uio.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/uio.h b/include/linux/uio.h index 42bce38a8e87..2000e42a6586 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -44,10 +44,7 @@ struct iov_iter { bool nofault; bool data_source; bool user_backed; - union { - size_t iov_offset; - int last_offset; - }; + size_t iov_offset; /* * Hack alert: overlay ubuf_iovec with iovec + count, so * that the members resolve correctly regardless of the type -- cgit v1.2.3 From 7d9e44a6ad8a22135e6308d330da29271f41a98a Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 25 Sep 2023 13:03:02 +0100 Subject: iov_iter: Renumber ITER_* constants Renumber the ITER_* iterator-type constants to put things in the same order as in the iteration functions and to group user-backed iterators at the bottom. Signed-off-by: David Howells Link: https://lore.kernel.org/r/20230925120309.1731676-6-dhowells@redhat.com cc: Alexander Viro cc: Jens Axboe cc: Christoph Hellwig cc: Christian Brauner cc: Matthew Wilcox cc: Linus Torvalds cc: David Laight cc: linux-block@vger.kernel.org cc: linux-fsdevel@vger.kernel.org cc: linux-mm@kvack.org Signed-off-by: Christian Brauner --- include/linux/uio.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/uio.h b/include/linux/uio.h index 2000e42a6586..bef8e56aa45c 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -21,12 +21,12 @@ struct kvec { enum iter_type { /* iter types */ + ITER_UBUF, ITER_IOVEC, - ITER_KVEC, ITER_BVEC, + ITER_KVEC, ITER_XARRAY, ITER_DISCARD, - ITER_UBUF, }; #define ITER_SOURCE 1 // == WRITE -- cgit v1.2.3 From f1b4cb650b9a0eeba206d8f069fcdc532bfbcd74 Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 25 Sep 2023 13:03:03 +0100 Subject: iov_iter: Derive user-backedness from the iterator type Use the iterator type to determine whether an iterator is user-backed or not rather than using a special flag for it. Now that ITER_UBUF and ITER_IOVEC are 0 and 1, they can be checked with a single comparison. Signed-off-by: David Howells Link: https://lore.kernel.org/r/20230925120309.1731676-7-dhowells@redhat.com cc: Alexander Viro cc: Jens Axboe cc: Christoph Hellwig cc: Christian Brauner cc: Matthew Wilcox cc: Linus Torvalds cc: David Laight cc: linux-block@vger.kernel.org cc: linux-fsdevel@vger.kernel.org cc: linux-mm@kvack.org Signed-off-by: Christian Brauner --- include/linux/uio.h | 4 +--- lib/iov_iter.c | 1 - 2 files changed, 1 insertion(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/uio.h b/include/linux/uio.h index bef8e56aa45c..65d9143f83c8 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -43,7 +43,6 @@ struct iov_iter { bool copy_mc; bool nofault; bool data_source; - bool user_backed; size_t iov_offset; /* * Hack alert: overlay ubuf_iovec with iovec + count, so @@ -140,7 +139,7 @@ static inline unsigned char iov_iter_rw(const struct iov_iter *i) static inline bool user_backed_iter(const struct iov_iter *i) { - return i->user_backed; + return iter_is_ubuf(i) || iter_is_iovec(i); } /* @@ -380,7 +379,6 @@ static inline void iov_iter_ubuf(struct iov_iter *i, unsigned int direction, *i = (struct iov_iter) { .iter_type = ITER_UBUF, .copy_mc = false, - .user_backed = true, .data_source = direction, .ubuf = buf, .count = count, diff --git a/lib/iov_iter.c b/lib/iov_iter.c index 27234a820eeb..227c9f536b94 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -290,7 +290,6 @@ void iov_iter_init(struct iov_iter *i, unsigned int direction, .iter_type = ITER_IOVEC, .copy_mc = false, .nofault = false, - .user_backed = true, .data_source = direction, .__iov = iov, .nr_segs = nr_segs, -- cgit v1.2.3 From f1982740f5e77090bde41a9b84e257d69ec46598 Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 25 Sep 2023 13:03:04 +0100 Subject: iov_iter: Convert iterate*() to inline funcs Convert the iov_iter iteration macros to inline functions to make the code easier to follow. The functions are marked __always_inline as we don't want to end up with indirect calls in the code. This, however, leaves dealing with ->copy_mc in an awkard situation since the step function (memcpy_from_iter_mc()) needs to test the flag in the iterator, but isn't passed the iterator. This will be dealt with in a follow-up patch. The variable names in the per-type iterator functions have been harmonised as much as possible and made clearer as to the variable purpose. The iterator functions are also moved to a header file so that other operations that need to scan over an iterator can be added. For instance, the rbd driver could use this to scan a buffer to see if it is all zeros and libceph could use this to generate a crc. Signed-off-by: David Howells Link: https://lore.kernel.org/r/3710261.1691764329@warthog.procyon.org.uk/ # v1 Link: https://lore.kernel.org/r/855.1692047347@warthog.procyon.org.uk/ # v2 Link: https://lore.kernel.org/r/20230816120741.534415-1-dhowells@redhat.com/ # v3 Link: https://lore.kernel.org/r/20230925120309.1731676-8-dhowells@redhat.com cc: Alexander Viro cc: Jens Axboe cc: Christoph Hellwig cc: Christian Brauner cc: Matthew Wilcox cc: Linus Torvalds cc: David Laight cc: linux-block@vger.kernel.org cc: linux-fsdevel@vger.kernel.org cc: linux-mm@kvack.org Signed-off-by: Christian Brauner --- include/linux/iov_iter.h | 274 +++++++++++++++++++++++++++++++ lib/iov_iter.c | 416 ++++++++++++++++++++--------------------------- 2 files changed, 449 insertions(+), 241 deletions(-) create mode 100644 include/linux/iov_iter.h (limited to 'include/linux') diff --git a/include/linux/iov_iter.h b/include/linux/iov_iter.h new file mode 100644 index 000000000000..270454a6703d --- /dev/null +++ b/include/linux/iov_iter.h @@ -0,0 +1,274 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* I/O iterator iteration building functions. + * + * Copyright (C) 2023 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + */ + +#ifndef _LINUX_IOV_ITER_H +#define _LINUX_IOV_ITER_H + +#include +#include + +typedef size_t (*iov_step_f)(void *iter_base, size_t progress, size_t len, + void *priv, void *priv2); +typedef size_t (*iov_ustep_f)(void __user *iter_base, size_t progress, size_t len, + void *priv, void *priv2); + +/* + * Handle ITER_UBUF. + */ +static __always_inline +size_t iterate_ubuf(struct iov_iter *iter, size_t len, void *priv, void *priv2, + iov_ustep_f step) +{ + void __user *base = iter->ubuf; + size_t progress = 0, remain; + + remain = step(base + iter->iov_offset, 0, len, priv, priv2); + progress = len - remain; + iter->iov_offset += progress; + iter->count -= progress; + return progress; +} + +/* + * Handle ITER_IOVEC. + */ +static __always_inline +size_t iterate_iovec(struct iov_iter *iter, size_t len, void *priv, void *priv2, + iov_ustep_f step) +{ + const struct iovec *p = iter->__iov; + size_t progress = 0, skip = iter->iov_offset; + + do { + size_t remain, consumed; + size_t part = min(len, p->iov_len - skip); + + if (likely(part)) { + remain = step(p->iov_base + skip, progress, part, priv, priv2); + consumed = part - remain; + progress += consumed; + skip += consumed; + len -= consumed; + if (skip < p->iov_len) + break; + } + p++; + skip = 0; + } while (len); + + iter->nr_segs -= p - iter->__iov; + iter->__iov = p; + iter->iov_offset = skip; + iter->count -= progress; + return progress; +} + +/* + * Handle ITER_KVEC. + */ +static __always_inline +size_t iterate_kvec(struct iov_iter *iter, size_t len, void *priv, void *priv2, + iov_step_f step) +{ + const struct kvec *p = iter->kvec; + size_t progress = 0, skip = iter->iov_offset; + + do { + size_t remain, consumed; + size_t part = min(len, p->iov_len - skip); + + if (likely(part)) { + remain = step(p->iov_base + skip, progress, part, priv, priv2); + consumed = part - remain; + progress += consumed; + skip += consumed; + len -= consumed; + if (skip < p->iov_len) + break; + } + p++; + skip = 0; + } while (len); + + iter->nr_segs -= p - iter->kvec; + iter->kvec = p; + iter->iov_offset = skip; + iter->count -= progress; + return progress; +} + +/* + * Handle ITER_BVEC. + */ +static __always_inline +size_t iterate_bvec(struct iov_iter *iter, size_t len, void *priv, void *priv2, + iov_step_f step) +{ + const struct bio_vec *p = iter->bvec; + size_t progress = 0, skip = iter->iov_offset; + + do { + size_t remain, consumed; + size_t offset = p->bv_offset + skip, part; + void *kaddr = kmap_local_page(p->bv_page + offset / PAGE_SIZE); + + part = min3(len, + (size_t)(p->bv_len - skip), + (size_t)(PAGE_SIZE - offset % PAGE_SIZE)); + remain = step(kaddr + offset % PAGE_SIZE, progress, part, priv, priv2); + kunmap_local(kaddr); + consumed = part - remain; + len -= consumed; + progress += consumed; + skip += consumed; + if (skip >= p->bv_len) { + skip = 0; + p++; + } + if (remain) + break; + } while (len); + + iter->nr_segs -= p - iter->bvec; + iter->bvec = p; + iter->iov_offset = skip; + iter->count -= progress; + return progress; +} + +/* + * Handle ITER_XARRAY. + */ +static __always_inline +size_t iterate_xarray(struct iov_iter *iter, size_t len, void *priv, void *priv2, + iov_step_f step) +{ + struct folio *folio; + size_t progress = 0; + loff_t start = iter->xarray_start + iter->iov_offset; + pgoff_t index = start / PAGE_SIZE; + XA_STATE(xas, iter->xarray, index); + + rcu_read_lock(); + xas_for_each(&xas, folio, ULONG_MAX) { + size_t remain, consumed, offset, part, flen; + + if (xas_retry(&xas, folio)) + continue; + if (WARN_ON(xa_is_value(folio))) + break; + if (WARN_ON(folio_test_hugetlb(folio))) + break; + + offset = offset_in_folio(folio, start + progress); + flen = min(folio_size(folio) - offset, len); + + while (flen) { + void *base = kmap_local_folio(folio, offset); + + part = min_t(size_t, flen, + PAGE_SIZE - offset_in_page(offset)); + remain = step(base, progress, part, priv, priv2); + kunmap_local(base); + + consumed = part - remain; + progress += consumed; + len -= consumed; + + if (remain || len == 0) + goto out; + flen -= consumed; + offset += consumed; + } + } + +out: + rcu_read_unlock(); + iter->iov_offset += progress; + iter->count -= progress; + return progress; +} + +/* + * Handle ITER_DISCARD. + */ +static __always_inline +size_t iterate_discard(struct iov_iter *iter, size_t len, void *priv, void *priv2, + iov_step_f step) +{ + size_t progress = len; + + iter->count -= progress; + return progress; +} + +/** + * iterate_and_advance2 - Iterate over an iterator + * @iter: The iterator to iterate over. + * @len: The amount to iterate over. + * @priv: Data for the step functions. + * @priv2: More data for the step functions. + * @ustep: Function for UBUF/IOVEC iterators; given __user addresses. + * @step: Function for other iterators; given kernel addresses. + * + * Iterate over the next part of an iterator, up to the specified length. The + * buffer is presented in segments, which for kernel iteration are broken up by + * physical pages and mapped, with the mapped address being presented. + * + * Two step functions, @step and @ustep, must be provided, one for handling + * mapped kernel addresses and the other is given user addresses which have the + * potential to fault since no pinning is performed. + * + * The step functions are passed the address and length of the segment, @priv, + * @priv2 and the amount of data so far iterated over (which can, for example, + * be added to @priv to point to the right part of a second buffer). The step + * functions should return the amount of the segment they didn't process (ie. 0 + * indicates complete processsing). + * + * This function returns the amount of data processed (ie. 0 means nothing was + * processed and the value of @len means processes to completion). + */ +static __always_inline +size_t iterate_and_advance2(struct iov_iter *iter, size_t len, void *priv, + void *priv2, iov_ustep_f ustep, iov_step_f step) +{ + if (unlikely(iter->count < len)) + len = iter->count; + if (unlikely(!len)) + return 0; + + if (likely(iter_is_ubuf(iter))) + return iterate_ubuf(iter, len, priv, priv2, ustep); + if (likely(iter_is_iovec(iter))) + return iterate_iovec(iter, len, priv, priv2, ustep); + if (iov_iter_is_bvec(iter)) + return iterate_bvec(iter, len, priv, priv2, step); + if (iov_iter_is_kvec(iter)) + return iterate_kvec(iter, len, priv, priv2, step); + if (iov_iter_is_xarray(iter)) + return iterate_xarray(iter, len, priv, priv2, step); + return iterate_discard(iter, len, priv, priv2, step); +} + +/** + * iterate_and_advance - Iterate over an iterator + * @iter: The iterator to iterate over. + * @len: The amount to iterate over. + * @priv: Data for the step functions. + * @ustep: Function for UBUF/IOVEC iterators; given __user addresses. + * @step: Function for other iterators; given kernel addresses. + * + * As iterate_and_advance2(), but priv2 is always NULL. + */ +static __always_inline +size_t iterate_and_advance(struct iov_iter *iter, size_t len, void *priv, + iov_ustep_f ustep, iov_step_f step) +{ + return iterate_and_advance2(iter, len, priv, NULL, ustep, step); +} + +#endif /* _LINUX_IOV_ITER_H */ diff --git a/lib/iov_iter.c b/lib/iov_iter.c index 227c9f536b94..65374ee91ecd 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -13,189 +13,69 @@ #include #include #include +#include -/* covers ubuf and kbuf alike */ -#define iterate_buf(i, n, base, len, off, __p, STEP) { \ - size_t __maybe_unused off = 0; \ - len = n; \ - base = __p + i->iov_offset; \ - len -= (STEP); \ - i->iov_offset += len; \ - n = len; \ -} - -/* covers iovec and kvec alike */ -#define iterate_iovec(i, n, base, len, off, __p, STEP) { \ - size_t off = 0; \ - size_t skip = i->iov_offset; \ - do { \ - len = min(n, __p->iov_len - skip); \ - if (likely(len)) { \ - base = __p->iov_base + skip; \ - len -= (STEP); \ - off += len; \ - skip += len; \ - n -= len; \ - if (skip < __p->iov_len) \ - break; \ - } \ - __p++; \ - skip = 0; \ - } while (n); \ - i->iov_offset = skip; \ - n = off; \ -} - -#define iterate_bvec(i, n, base, len, off, p, STEP) { \ - size_t off = 0; \ - unsigned skip = i->iov_offset; \ - while (n) { \ - unsigned offset = p->bv_offset + skip; \ - unsigned left; \ - void *kaddr = kmap_local_page(p->bv_page + \ - offset / PAGE_SIZE); \ - base = kaddr + offset % PAGE_SIZE; \ - len = min(min(n, (size_t)(p->bv_len - skip)), \ - (size_t)(PAGE_SIZE - offset % PAGE_SIZE)); \ - left = (STEP); \ - kunmap_local(kaddr); \ - len -= left; \ - off += len; \ - skip += len; \ - if (skip == p->bv_len) { \ - skip = 0; \ - p++; \ - } \ - n -= len; \ - if (left) \ - break; \ - } \ - i->iov_offset = skip; \ - n = off; \ -} - -#define iterate_xarray(i, n, base, len, __off, STEP) { \ - __label__ __out; \ - size_t __off = 0; \ - struct folio *folio; \ - loff_t start = i->xarray_start + i->iov_offset; \ - pgoff_t index = start / PAGE_SIZE; \ - XA_STATE(xas, i->xarray, index); \ - \ - len = PAGE_SIZE - offset_in_page(start); \ - rcu_read_lock(); \ - xas_for_each(&xas, folio, ULONG_MAX) { \ - unsigned left; \ - size_t offset; \ - if (xas_retry(&xas, folio)) \ - continue; \ - if (WARN_ON(xa_is_value(folio))) \ - break; \ - if (WARN_ON(folio_test_hugetlb(folio))) \ - break; \ - offset = offset_in_folio(folio, start + __off); \ - while (offset < folio_size(folio)) { \ - base = kmap_local_folio(folio, offset); \ - len = min(n, len); \ - left = (STEP); \ - kunmap_local(base); \ - len -= left; \ - __off += len; \ - n -= len; \ - if (left || n == 0) \ - goto __out; \ - offset += len; \ - len = PAGE_SIZE; \ - } \ - } \ -__out: \ - rcu_read_unlock(); \ - i->iov_offset += __off; \ - n = __off; \ -} - -#define __iterate_and_advance(i, n, base, len, off, I, K) { \ - if (unlikely(i->count < n)) \ - n = i->count; \ - if (likely(n)) { \ - if (likely(iter_is_ubuf(i))) { \ - void __user *base; \ - size_t len; \ - iterate_buf(i, n, base, len, off, \ - i->ubuf, (I)) \ - } else if (likely(iter_is_iovec(i))) { \ - const struct iovec *iov = iter_iov(i); \ - void __user *base; \ - size_t len; \ - iterate_iovec(i, n, base, len, off, \ - iov, (I)) \ - i->nr_segs -= iov - iter_iov(i); \ - i->__iov = iov; \ - } else if (iov_iter_is_bvec(i)) { \ - const struct bio_vec *bvec = i->bvec; \ - void *base; \ - size_t len; \ - iterate_bvec(i, n, base, len, off, \ - bvec, (K)) \ - i->nr_segs -= bvec - i->bvec; \ - i->bvec = bvec; \ - } else if (iov_iter_is_kvec(i)) { \ - const struct kvec *kvec = i->kvec; \ - void *base; \ - size_t len; \ - iterate_iovec(i, n, base, len, off, \ - kvec, (K)) \ - i->nr_segs -= kvec - i->kvec; \ - i->kvec = kvec; \ - } else if (iov_iter_is_xarray(i)) { \ - void *base; \ - size_t len; \ - iterate_xarray(i, n, base, len, off, \ - (K)) \ - } \ - i->count -= n; \ - } \ -} -#define iterate_and_advance(i, n, base, len, off, I, K) \ - __iterate_and_advance(i, n, base, len, off, I, ((void)(K),0)) - -static int copyout(void __user *to, const void *from, size_t n) +static __always_inline +size_t copy_to_user_iter(void __user *iter_to, size_t progress, + size_t len, void *from, void *priv2) { if (should_fail_usercopy()) - return n; - if (access_ok(to, n)) { - instrument_copy_to_user(to, from, n); - n = raw_copy_to_user(to, from, n); + return len; + if (access_ok(iter_to, len)) { + from += progress; + instrument_copy_to_user(iter_to, from, len); + len = raw_copy_to_user(iter_to, from, len); } - return n; + return len; } -static int copyout_nofault(void __user *to, const void *from, size_t n) +static __always_inline +size_t copy_to_user_iter_nofault(void __user *iter_to, size_t progress, + size_t len, void *from, void *priv2) { - long res; + ssize_t res; if (should_fail_usercopy()) - return n; - - res = copy_to_user_nofault(to, from, n); + return len; - return res < 0 ? n : res; + from += progress; + res = copy_to_user_nofault(iter_to, from, len); + return res < 0 ? len : res; } -static int copyin(void *to, const void __user *from, size_t n) +static __always_inline +size_t copy_from_user_iter(void __user *iter_from, size_t progress, + size_t len, void *to, void *priv2) { - size_t res = n; + size_t res = len; if (should_fail_usercopy()) - return n; - if (access_ok(from, n)) { - instrument_copy_from_user_before(to, from, n); - res = raw_copy_from_user(to, from, n); - instrument_copy_from_user_after(to, from, n, res); + return len; + if (access_ok(iter_from, len)) { + to += progress; + instrument_copy_from_user_before(to, iter_from, len); + res = raw_copy_from_user(to, iter_from, len); + instrument_copy_from_user_after(to, iter_from, len, res); } return res; } +static __always_inline +size_t memcpy_to_iter(void *iter_to, size_t progress, + size_t len, void *from, void *priv2) +{ + memcpy(iter_to, from + progress, len); + return 0; +} + +static __always_inline +size_t memcpy_from_iter(void *iter_from, size_t progress, + size_t len, void *to, void *priv2) +{ + memcpy(to + progress, iter_from, len); + return 0; +} + /* * fault_in_iov_iter_readable - fault in iov iterator for reading * @i: iterator @@ -312,23 +192,29 @@ size_t _copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i) return 0; if (user_backed_iter(i)) might_fault(); - iterate_and_advance(i, bytes, base, len, off, - copyout(base, addr + off, len), - memcpy(base, addr + off, len) - ) - - return bytes; + return iterate_and_advance(i, bytes, (void *)addr, + copy_to_user_iter, memcpy_to_iter); } EXPORT_SYMBOL(_copy_to_iter); #ifdef CONFIG_ARCH_HAS_COPY_MC -static int copyout_mc(void __user *to, const void *from, size_t n) -{ - if (access_ok(to, n)) { - instrument_copy_to_user(to, from, n); - n = copy_mc_to_user((__force void *) to, from, n); +static __always_inline +size_t copy_to_user_iter_mc(void __user *iter_to, size_t progress, + size_t len, void *from, void *priv2) +{ + if (access_ok(iter_to, len)) { + from += progress; + instrument_copy_to_user(iter_to, from, len); + len = copy_mc_to_user(iter_to, from, len); } - return n; + return len; +} + +static __always_inline +size_t memcpy_to_iter_mc(void *iter_to, size_t progress, + size_t len, void *from, void *priv2) +{ + return copy_mc_to_kernel(iter_to, from + progress, len); } /** @@ -361,22 +247,20 @@ size_t _copy_mc_to_iter(const void *addr, size_t bytes, struct iov_iter *i) return 0; if (user_backed_iter(i)) might_fault(); - __iterate_and_advance(i, bytes, base, len, off, - copyout_mc(base, addr + off, len), - copy_mc_to_kernel(base, addr + off, len) - ) - - return bytes; + return iterate_and_advance(i, bytes, (void *)addr, + copy_to_user_iter_mc, memcpy_to_iter_mc); } EXPORT_SYMBOL_GPL(_copy_mc_to_iter); #endif /* CONFIG_ARCH_HAS_COPY_MC */ -static void *memcpy_from_iter(struct iov_iter *i, void *to, const void *from, - size_t size) +static size_t memcpy_from_iter_mc(void *iter_from, size_t progress, + size_t len, void *to, void *priv2) { - if (iov_iter_is_copy_mc(i)) - return (void *)copy_mc_to_kernel(to, from, size); - return memcpy(to, from, size); + struct iov_iter *iter = priv2; + + if (iov_iter_is_copy_mc(iter)) + return copy_mc_to_kernel(to + progress, iter_from, len); + return memcpy_from_iter(iter_from, progress, len, to, priv2); } size_t _copy_from_iter(void *addr, size_t bytes, struct iov_iter *i) @@ -386,30 +270,46 @@ size_t _copy_from_iter(void *addr, size_t bytes, struct iov_iter *i) if (user_backed_iter(i)) might_fault(); - iterate_and_advance(i, bytes, base, len, off, - copyin(addr + off, base, len), - memcpy_from_iter(i, addr + off, base, len) - ) - - return bytes; + return iterate_and_advance2(i, bytes, addr, i, + copy_from_user_iter, + memcpy_from_iter_mc); } EXPORT_SYMBOL(_copy_from_iter); +static __always_inline +size_t copy_from_user_iter_nocache(void __user *iter_from, size_t progress, + size_t len, void *to, void *priv2) +{ + return __copy_from_user_inatomic_nocache(to + progress, iter_from, len); +} + size_t _copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i) { if (WARN_ON_ONCE(!i->data_source)) return 0; - iterate_and_advance(i, bytes, base, len, off, - __copy_from_user_inatomic_nocache(addr + off, base, len), - memcpy(addr + off, base, len) - ) - - return bytes; + return iterate_and_advance(i, bytes, addr, + copy_from_user_iter_nocache, + memcpy_from_iter); } EXPORT_SYMBOL(_copy_from_iter_nocache); #ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE +static __always_inline +size_t copy_from_user_iter_flushcache(void __user *iter_from, size_t progress, + size_t len, void *to, void *priv2) +{ + return __copy_from_user_flushcache(to + progress, iter_from, len); +} + +static __always_inline +size_t memcpy_from_iter_flushcache(void *iter_from, size_t progress, + size_t len, void *to, void *priv2) +{ + memcpy_flushcache(to + progress, iter_from, len); + return 0; +} + /** * _copy_from_iter_flushcache - write destination through cpu cache * @addr: destination kernel address @@ -431,12 +331,9 @@ size_t _copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i) if (WARN_ON_ONCE(!i->data_source)) return 0; - iterate_and_advance(i, bytes, base, len, off, - __copy_from_user_flushcache(addr + off, base, len), - memcpy_flushcache(addr + off, base, len) - ) - - return bytes; + return iterate_and_advance(i, bytes, addr, + copy_from_user_iter_flushcache, + memcpy_from_iter_flushcache); } EXPORT_SYMBOL_GPL(_copy_from_iter_flushcache); #endif @@ -508,10 +405,9 @@ size_t copy_page_to_iter_nofault(struct page *page, unsigned offset, size_t byte void *kaddr = kmap_local_page(page); size_t n = min(bytes, (size_t)PAGE_SIZE - offset); - iterate_and_advance(i, n, base, len, off, - copyout_nofault(base, kaddr + offset + off, len), - memcpy(base, kaddr + offset + off, len) - ) + n = iterate_and_advance(i, bytes, kaddr, + copy_to_user_iter_nofault, + memcpy_to_iter); kunmap_local(kaddr); res += n; bytes -= n; @@ -554,14 +450,25 @@ size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes, } EXPORT_SYMBOL(copy_page_from_iter); -size_t iov_iter_zero(size_t bytes, struct iov_iter *i) +static __always_inline +size_t zero_to_user_iter(void __user *iter_to, size_t progress, + size_t len, void *priv, void *priv2) { - iterate_and_advance(i, bytes, base, len, count, - clear_user(base, len), - memset(base, 0, len) - ) + return clear_user(iter_to, len); +} - return bytes; +static __always_inline +size_t zero_to_iter(void *iter_to, size_t progress, + size_t len, void *priv, void *priv2) +{ + memset(iter_to, 0, len); + return 0; +} + +size_t iov_iter_zero(size_t bytes, struct iov_iter *i) +{ + return iterate_and_advance(i, bytes, NULL, + zero_to_user_iter, zero_to_iter); } EXPORT_SYMBOL(iov_iter_zero); @@ -586,10 +493,9 @@ size_t copy_page_from_iter_atomic(struct page *page, size_t offset, } p = kmap_atomic(page) + offset; - iterate_and_advance(i, n, base, len, off, - copyin(p + off, base, len), - memcpy_from_iter(i, p + off, base, len) - ) + n = iterate_and_advance2(i, n, p, i, + copy_from_user_iter, + memcpy_from_iter_mc); kunmap_atomic(p); copied += n; offset += n; @@ -1180,32 +1086,64 @@ ssize_t iov_iter_get_pages_alloc2(struct iov_iter *i, } EXPORT_SYMBOL(iov_iter_get_pages_alloc2); +static __always_inline +size_t copy_from_user_iter_csum(void __user *iter_from, size_t progress, + size_t len, void *to, void *priv2) +{ + __wsum next, *csum = priv2; + + next = csum_and_copy_from_user(iter_from, to + progress, len); + *csum = csum_block_add(*csum, next, progress); + return next ? 0 : len; +} + +static __always_inline +size_t memcpy_from_iter_csum(void *iter_from, size_t progress, + size_t len, void *to, void *priv2) +{ + __wsum *csum = priv2; + + *csum = csum_and_memcpy(to + progress, iter_from, len, *csum, progress); + return 0; +} + size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum, struct iov_iter *i) { - __wsum sum, next; - sum = *csum; if (WARN_ON_ONCE(!i->data_source)) return 0; - - iterate_and_advance(i, bytes, base, len, off, ({ - next = csum_and_copy_from_user(base, addr + off, len); - sum = csum_block_add(sum, next, off); - next ? 0 : len; - }), ({ - sum = csum_and_memcpy(addr + off, base, len, sum, off); - }) - ) - *csum = sum; - return bytes; + return iterate_and_advance2(i, bytes, addr, csum, + copy_from_user_iter_csum, + memcpy_from_iter_csum); } EXPORT_SYMBOL(csum_and_copy_from_iter); +static __always_inline +size_t copy_to_user_iter_csum(void __user *iter_to, size_t progress, + size_t len, void *from, void *priv2) +{ + __wsum next, *csum = priv2; + + next = csum_and_copy_to_user(from + progress, iter_to, len); + *csum = csum_block_add(*csum, next, progress); + return next ? 0 : len; +} + +static __always_inline +size_t memcpy_to_iter_csum(void *iter_to, size_t progress, + size_t len, void *from, void *priv2) +{ + __wsum *csum = priv2; + + *csum = csum_and_memcpy(iter_to, from + progress, len, *csum, progress); + return 0; +} + size_t csum_and_copy_to_iter(const void *addr, size_t bytes, void *_csstate, struct iov_iter *i) { struct csum_state *csstate = _csstate; - __wsum sum, next; + __wsum sum; if (WARN_ON_ONCE(i->data_source)) return 0; @@ -1219,14 +1157,10 @@ size_t csum_and_copy_to_iter(const void *addr, size_t bytes, void *_csstate, } sum = csum_shift(csstate->csum, csstate->off); - iterate_and_advance(i, bytes, base, len, off, ({ - next = csum_and_copy_to_user(addr + off, base, len); - sum = csum_block_add(sum, next, off); - next ? 0 : len; - }), ({ - sum = csum_and_memcpy(base, addr + off, len, sum, off); - }) - ) + + bytes = iterate_and_advance2(i, bytes, (void *)addr, &sum, + copy_to_user_iter_csum, + memcpy_to_iter_csum); csstate->csum = csum_shift(sum, csstate->off); csstate->off += bytes; return bytes; -- cgit v1.2.3 From 3acf8ace68230e9558cf916847f1cc9f208abdf1 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 20 Sep 2023 23:31:39 +0200 Subject: bpf: Add missed value to kprobe perf link info Add missed value to kprobe attached through perf link info to hold the stats of missed kprobe handler execution. The kprobe's missed counter gets incremented when kprobe handler is not executed due to another kprobe running on the same cpu. Signed-off-by: Jiri Olsa Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20230920213145.1941596-4-jolsa@kernel.org --- include/linux/trace_events.h | 6 ++++-- include/uapi/linux/bpf.h | 1 + kernel/bpf/syscall.c | 14 ++++++++------ kernel/trace/bpf_trace.c | 5 +++-- kernel/trace/trace_kprobe.c | 14 +++++++++++--- tools/include/uapi/linux/bpf.h | 1 + 6 files changed, 28 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index 21ae37e49319..5eb88a66eb68 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -761,7 +761,8 @@ struct bpf_raw_event_map *bpf_get_raw_tracepoint(const char *name); void bpf_put_raw_tracepoint(struct bpf_raw_event_map *btp); int bpf_get_perf_event_info(const struct perf_event *event, u32 *prog_id, u32 *fd_type, const char **buf, - u64 *probe_offset, u64 *probe_addr); + u64 *probe_offset, u64 *probe_addr, + unsigned long *missed); int bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *prog); int bpf_uprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *prog); #else @@ -801,7 +802,7 @@ static inline void bpf_put_raw_tracepoint(struct bpf_raw_event_map *btp) static inline int bpf_get_perf_event_info(const struct perf_event *event, u32 *prog_id, u32 *fd_type, const char **buf, u64 *probe_offset, - u64 *probe_addr) + u64 *probe_addr, unsigned long *missed) { return -EOPNOTSUPP; } @@ -877,6 +878,7 @@ extern void perf_kprobe_destroy(struct perf_event *event); extern int bpf_get_kprobe_info(const struct perf_event *event, u32 *fd_type, const char **symbol, u64 *probe_offset, u64 *probe_addr, + unsigned long *missed, bool perf_type_tracepoint); #endif #ifdef CONFIG_UPROBE_EVENTS diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 1da5b1bcce71..70bfa997e896 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -6548,6 +6548,7 @@ struct bpf_link_info { __u32 name_len; __u32 offset; /* offset from func_name */ __u64 addr; + __u64 missed; } kprobe; /* BPF_PERF_EVENT_KPROBE, BPF_PERF_EVENT_KRETPROBE */ struct { __aligned_u64 tp_name; /* in/out */ diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 85c1d908f70f..6b5280f14a53 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -3374,7 +3374,7 @@ static void bpf_perf_link_dealloc(struct bpf_link *link) static int bpf_perf_link_fill_common(const struct perf_event *event, char __user *uname, u32 ulen, u64 *probe_offset, u64 *probe_addr, - u32 *fd_type) + u32 *fd_type, unsigned long *missed) { const char *buf; u32 prog_id; @@ -3385,7 +3385,7 @@ static int bpf_perf_link_fill_common(const struct perf_event *event, return -EINVAL; err = bpf_get_perf_event_info(event, &prog_id, fd_type, &buf, - probe_offset, probe_addr); + probe_offset, probe_addr, missed); if (err) return err; if (!uname) @@ -3408,6 +3408,7 @@ static int bpf_perf_link_fill_common(const struct perf_event *event, static int bpf_perf_link_fill_kprobe(const struct perf_event *event, struct bpf_link_info *info) { + unsigned long missed; char __user *uname; u64 addr, offset; u32 ulen, type; @@ -3416,7 +3417,7 @@ static int bpf_perf_link_fill_kprobe(const struct perf_event *event, uname = u64_to_user_ptr(info->perf_event.kprobe.func_name); ulen = info->perf_event.kprobe.name_len; err = bpf_perf_link_fill_common(event, uname, ulen, &offset, &addr, - &type); + &type, &missed); if (err) return err; if (type == BPF_FD_TYPE_KRETPROBE) @@ -3425,6 +3426,7 @@ static int bpf_perf_link_fill_kprobe(const struct perf_event *event, info->perf_event.type = BPF_PERF_EVENT_KPROBE; info->perf_event.kprobe.offset = offset; + info->perf_event.kprobe.missed = missed; if (!kallsyms_show_value(current_cred())) addr = 0; info->perf_event.kprobe.addr = addr; @@ -3444,7 +3446,7 @@ static int bpf_perf_link_fill_uprobe(const struct perf_event *event, uname = u64_to_user_ptr(info->perf_event.uprobe.file_name); ulen = info->perf_event.uprobe.name_len; err = bpf_perf_link_fill_common(event, uname, ulen, &offset, &addr, - &type); + &type, NULL); if (err) return err; @@ -3480,7 +3482,7 @@ static int bpf_perf_link_fill_tracepoint(const struct perf_event *event, uname = u64_to_user_ptr(info->perf_event.tracepoint.tp_name); ulen = info->perf_event.tracepoint.name_len; info->perf_event.type = BPF_PERF_EVENT_TRACEPOINT; - return bpf_perf_link_fill_common(event, uname, ulen, NULL, NULL, NULL); + return bpf_perf_link_fill_common(event, uname, ulen, NULL, NULL, NULL, NULL); } static int bpf_perf_link_fill_perf_event(const struct perf_event *event, @@ -4813,7 +4815,7 @@ static int bpf_task_fd_query(const union bpf_attr *attr, err = bpf_get_perf_event_info(event, &prog_id, &fd_type, &buf, &probe_offset, - &probe_addr); + &probe_addr, NULL); if (!err) err = bpf_task_fd_query_copy(attr, uattr, prog_id, fd_type, buf, diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 6aec6e7d612a..f6a7d2524949 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -2384,7 +2384,8 @@ int bpf_probe_unregister(struct bpf_raw_event_map *btp, struct bpf_prog *prog) int bpf_get_perf_event_info(const struct perf_event *event, u32 *prog_id, u32 *fd_type, const char **buf, - u64 *probe_offset, u64 *probe_addr) + u64 *probe_offset, u64 *probe_addr, + unsigned long *missed) { bool is_tracepoint, is_syscall_tp; struct bpf_prog *prog; @@ -2419,7 +2420,7 @@ int bpf_get_perf_event_info(const struct perf_event *event, u32 *prog_id, #ifdef CONFIG_KPROBE_EVENTS if (flags & TRACE_EVENT_FL_KPROBE) err = bpf_get_kprobe_info(event, fd_type, buf, - probe_offset, probe_addr, + probe_offset, probe_addr, missed, event->attr.type == PERF_TYPE_TRACEPOINT); #endif #ifdef CONFIG_UPROBE_EVENTS diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 3d7a180a8427..961a78ffd6d2 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -1189,6 +1189,12 @@ static const struct file_operations kprobe_events_ops = { .write = probes_write, }; +static unsigned long trace_kprobe_missed(struct trace_kprobe *tk) +{ + return trace_kprobe_is_return(tk) ? + tk->rp.kp.nmissed + tk->rp.nmissed : tk->rp.kp.nmissed; +} + /* Probes profiling interfaces */ static int probes_profile_seq_show(struct seq_file *m, void *v) { @@ -1200,8 +1206,7 @@ static int probes_profile_seq_show(struct seq_file *m, void *v) return 0; tk = to_trace_kprobe(ev); - nmissed = trace_kprobe_is_return(tk) ? - tk->rp.kp.nmissed + tk->rp.nmissed : tk->rp.kp.nmissed; + nmissed = trace_kprobe_missed(tk); seq_printf(m, " %-44s %15lu %15lu\n", trace_probe_name(&tk->tp), trace_kprobe_nhit(tk), @@ -1547,7 +1552,8 @@ NOKPROBE_SYMBOL(kretprobe_perf_func); int bpf_get_kprobe_info(const struct perf_event *event, u32 *fd_type, const char **symbol, u64 *probe_offset, - u64 *probe_addr, bool perf_type_tracepoint) + u64 *probe_addr, unsigned long *missed, + bool perf_type_tracepoint) { const char *pevent = trace_event_name(event->tp_event); const char *group = event->tp_event->class->system; @@ -1566,6 +1572,8 @@ int bpf_get_kprobe_info(const struct perf_event *event, u32 *fd_type, *probe_addr = kallsyms_show_value(current_cred()) ? (unsigned long)tk->rp.kp.addr : 0; *symbol = tk->symbol; + if (missed) + *missed = trace_kprobe_missed(tk); return 0; } #endif /* CONFIG_PERF_EVENTS */ diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 1da5b1bcce71..70bfa997e896 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -6548,6 +6548,7 @@ struct bpf_link_info { __u32 name_len; __u32 offset; /* offset from func_name */ __u64 addr; + __u64 missed; } kprobe; /* BPF_PERF_EVENT_KPROBE, BPF_PERF_EVENT_KRETPROBE */ struct { __aligned_u64 tp_name; /* in/out */ -- cgit v1.2.3 From dd8657894c11b03c6eb0fd53fe9d7fec2072d18b Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 20 Sep 2023 23:31:40 +0200 Subject: bpf: Count missed stats in trace_call_bpf Increase misses stats in case bpf array execution is skipped because of recursion check in trace_call_bpf. Adding bpf_prog_inc_misses_counters that increase misses counts for all bpf programs in bpf_prog_array. Signed-off-by: Jiri Olsa Signed-off-by: Andrii Nakryiko Tested-by: Song Liu Reviewed-by: Song Liu Link: https://lore.kernel.org/bpf/20230920213145.1941596-5-jolsa@kernel.org --- include/linux/bpf.h | 16 ++++++++++++++++ kernel/trace/bpf_trace.c | 3 +++ 2 files changed, 19 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 30063a760b5a..a82efd34b741 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -2922,6 +2922,22 @@ static inline int sock_map_bpf_prog_query(const union bpf_attr *attr, #endif /* CONFIG_BPF_SYSCALL */ #endif /* CONFIG_NET && CONFIG_BPF_SYSCALL */ +static __always_inline void +bpf_prog_inc_misses_counters(const struct bpf_prog_array *array) +{ + const struct bpf_prog_array_item *item; + struct bpf_prog *prog; + + if (unlikely(!array)) + return; + + item = &array->items[0]; + while ((prog = READ_ONCE(item->prog))) { + bpf_prog_inc_misses_counter(prog); + item++; + } +} + #if defined(CONFIG_INET) && defined(CONFIG_BPF_SYSCALL) void bpf_sk_reuseport_detach(struct sock *sk); int bpf_fd_reuseport_array_lookup_elem(struct bpf_map *map, void *key, diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index f6a7d2524949..df697c74d519 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -117,6 +117,9 @@ unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx) * and don't send kprobe event into ring-buffer, * so return zero here */ + rcu_read_lock(); + bpf_prog_inc_misses_counters(rcu_dereference(call->prog_array)); + rcu_read_unlock(); ret = 0; goto out; } -- cgit v1.2.3 From 7a0263dc904f3467f474e4088ae092eda9a5a99b Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sun, 24 Sep 2023 22:54:50 -0700 Subject: fscrypt: replace get_ino_and_lblk_bits with just has_32bit_inodes Now that fs/crypto/ computes the filesystem's lblk_bits from its maximum file size, it is no longer necessary for filesystems to provide lblk_bits via fscrypt_operations::get_ino_and_lblk_bits. It is still necessary for fs/crypto/ to retrieve ino_bits from the filesystem. However, this is used only to decide whether inode numbers fit in 32 bits. Also, ino_bits is static for all relevant filesystems, i.e. it doesn't depend on the filesystem instance. Therefore, in the interest of keeping things as simple as possible, replace 'get_ino_and_lblk_bits' with a flag 'has_32bit_inodes'. This can always be changed back to a function if a filesystem needs it to be dynamic, but for now a static flag is all that's needed. Link: https://lore.kernel.org/r/20230925055451.59499-5-ebiggers@kernel.org Signed-off-by: Eric Biggers --- fs/crypto/policy.c | 33 +++++++++++++++------------------ fs/ext4/crypto.c | 9 +-------- fs/f2fs/super.c | 9 +-------- include/linux/fscrypt.h | 26 +++++++++++--------------- 4 files changed, 28 insertions(+), 49 deletions(-) (limited to 'include/linux') diff --git a/fs/crypto/policy.c b/fs/crypto/policy.c index 7b34949e49de..32709dad9762 100644 --- a/fs/crypto/policy.c +++ b/fs/crypto/policy.c @@ -118,11 +118,11 @@ static bool supported_direct_key_modes(const struct inode *inode, } static bool supported_iv_ino_lblk_policy(const struct fscrypt_policy_v2 *policy, - const struct inode *inode, - const char *type, int max_ino_bits) + const struct inode *inode) { + const char *type = (policy->flags & FSCRYPT_POLICY_FLAG_IV_INO_LBLK_64) + ? "IV_INO_LBLK_64" : "IV_INO_LBLK_32"; struct super_block *sb = inode->i_sb; - int ino_bits = 64, lblk_bits = 64; /* * IV_INO_LBLK_* exist only because of hardware limitations, and @@ -149,9 +149,15 @@ static bool supported_iv_ino_lblk_policy(const struct fscrypt_policy_v2 *policy, type, sb->s_id); return false; } - if (sb->s_cop->get_ino_and_lblk_bits) - sb->s_cop->get_ino_and_lblk_bits(sb, &ino_bits, &lblk_bits); - if (ino_bits > max_ino_bits) { + + /* + * IV_INO_LBLK_64 and IV_INO_LBLK_32 both require that inode numbers fit + * in 32 bits. In principle, IV_INO_LBLK_32 could support longer inode + * numbers because it hashes the inode number; however, currently the + * inode number is gotten from inode::i_ino which is 'unsigned long'. + * So for now the implementation limit is 32 bits. + */ + if (!sb->s_cop->has_32bit_inodes) { fscrypt_warn(inode, "Can't use %s policy on filesystem '%s' because its inode numbers are too long", type, sb->s_id); @@ -242,18 +248,9 @@ static bool fscrypt_supported_v2_policy(const struct fscrypt_policy_v2 *policy, policy->filenames_encryption_mode)) return false; - if ((policy->flags & FSCRYPT_POLICY_FLAG_IV_INO_LBLK_64) && - !supported_iv_ino_lblk_policy(policy, inode, "IV_INO_LBLK_64", 32)) - return false; - - /* - * IV_INO_LBLK_32 hashes the inode number, so in principle it can - * support any ino_bits. However, currently the inode number is gotten - * from inode::i_ino which is 'unsigned long'. So for now the - * implementation limit is 32 bits. - */ - if ((policy->flags & FSCRYPT_POLICY_FLAG_IV_INO_LBLK_32) && - !supported_iv_ino_lblk_policy(policy, inode, "IV_INO_LBLK_32", 32)) + if ((policy->flags & (FSCRYPT_POLICY_FLAG_IV_INO_LBLK_64 | + FSCRYPT_POLICY_FLAG_IV_INO_LBLK_32)) && + !supported_iv_ino_lblk_policy(policy, inode)) return false; if (memchr_inv(policy->__reserved, 0, sizeof(policy->__reserved))) { diff --git a/fs/ext4/crypto.c b/fs/ext4/crypto.c index 5cd7bcfae46b..9e36731701ba 100644 --- a/fs/ext4/crypto.c +++ b/fs/ext4/crypto.c @@ -232,20 +232,13 @@ static bool ext4_has_stable_inodes(struct super_block *sb) return ext4_has_feature_stable_inodes(sb); } -static void ext4_get_ino_and_lblk_bits(struct super_block *sb, - int *ino_bits_ret, int *lblk_bits_ret) -{ - *ino_bits_ret = 8 * sizeof(EXT4_SB(sb)->s_es->s_inodes_count); - *lblk_bits_ret = 8 * sizeof(ext4_lblk_t); -} - const struct fscrypt_operations ext4_cryptops = { .needs_bounce_pages = 1, + .has_32bit_inodes = 1, .legacy_key_prefix = "ext4:", .get_context = ext4_get_context, .set_context = ext4_set_context, .get_dummy_policy = ext4_get_dummy_policy, .empty_dir = ext4_empty_dir, .has_stable_inodes = ext4_has_stable_inodes, - .get_ino_and_lblk_bits = ext4_get_ino_and_lblk_bits, }; diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 55aa0ed531f2..c44915713264 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -3203,13 +3203,6 @@ static bool f2fs_has_stable_inodes(struct super_block *sb) return true; } -static void f2fs_get_ino_and_lblk_bits(struct super_block *sb, - int *ino_bits_ret, int *lblk_bits_ret) -{ - *ino_bits_ret = 8 * sizeof(nid_t); - *lblk_bits_ret = 8 * sizeof(block_t); -} - static struct block_device **f2fs_get_devices(struct super_block *sb, unsigned int *num_devs) { @@ -3232,13 +3225,13 @@ static struct block_device **f2fs_get_devices(struct super_block *sb, static const struct fscrypt_operations f2fs_cryptops = { .needs_bounce_pages = 1, + .has_32bit_inodes = 1, .legacy_key_prefix = "f2fs:", .get_context = f2fs_get_context, .set_context = f2fs_set_context, .get_dummy_policy = f2fs_get_dummy_policy, .empty_dir = f2fs_empty_dir, .has_stable_inodes = f2fs_has_stable_inodes, - .get_ino_and_lblk_bits = f2fs_get_ino_and_lblk_bits, .get_devices = f2fs_get_devices, }; #endif diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h index 4505078e89b7..09a3cacbf62a 100644 --- a/include/linux/fscrypt.h +++ b/include/linux/fscrypt.h @@ -74,6 +74,17 @@ struct fscrypt_operations { */ unsigned int needs_bounce_pages : 1; + /* + * If set, then fs/crypto/ will allow the use of encryption settings + * that assume inode numbers fit in 32 bits (i.e. + * FSCRYPT_POLICY_FLAG_IV_INO_LBLK_{32,64}), provided that the other + * prerequisites for these settings are also met. This is only useful + * if the filesystem wants to support inline encryption hardware that is + * limited to 32-bit or 64-bit data unit numbers and where programming + * keyslots is very slow. + */ + unsigned int has_32bit_inodes : 1; + /* * This field exists only for backwards compatibility reasons and should * only be set by the filesystems that are setting it already. It @@ -151,21 +162,6 @@ struct fscrypt_operations { */ bool (*has_stable_inodes)(struct super_block *sb); - /* - * Get the number of bits that the filesystem uses to represent inode - * numbers and file logical block numbers. - * - * By default, both of these are assumed to be 64-bit. This function - * can be implemented to declare that either or both of these numbers is - * shorter, which may allow the use of the - * FSCRYPT_POLICY_FLAG_IV_INO_LBLK_{32,64} flags and/or the use of - * inline crypto hardware whose maximum DUN length is less than 64 bits - * (e.g., eMMC v5.2 spec compliant hardware). This function only needs - * to be implemented if support for one of these features is needed. - */ - void (*get_ino_and_lblk_bits)(struct super_block *sb, - int *ino_bits_ret, int *lblk_bits_ret); - /* * Return an array of pointers to the block devices to which the * filesystem may write encrypted file contents, NULL if the filesystem -- cgit v1.2.3 From 5b11888471806edf699316d4dcb9b426caebbef2 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sun, 24 Sep 2023 22:54:51 -0700 Subject: fscrypt: support crypto data unit size less than filesystem block size Until now, fscrypt has always used the filesystem block size as the granularity of file contents encryption. Two scenarios have come up where a sub-block granularity of contents encryption would be useful: 1. Inline crypto hardware that only supports a crypto data unit size that is less than the filesystem block size. 2. Support for direct I/O at a granularity less than the filesystem block size, for example at the block device's logical block size in order to match the traditional direct I/O alignment requirement. (1) first came up with older eMMC inline crypto hardware that only supports a crypto data unit size of 512 bytes. That specific case ultimately went away because all systems with that hardware continued using out of tree code and never actually upgraded to the upstream inline crypto framework. But, now it's coming back in a new way: some current UFS controllers only support a data unit size of 4096 bytes, and there is a proposal to increase the filesystem block size to 16K. (2) was discussed as a "nice to have" feature, though not essential, when support for direct I/O on encrypted files was being upstreamed. Still, the fact that this feature has come up several times does suggest it would be wise to have available. Therefore, this patch implements it by using one of the reserved bytes in fscrypt_policy_v2 to allow users to select a sub-block data unit size. Supported data unit sizes are powers of 2 between 512 and the filesystem block size, inclusively. Support is implemented for both the FS-layer and inline crypto cases. This patch focuses on the basic support for sub-block data units. Some things are out of scope for this patch but may be addressed later: - Supporting sub-block data units in combination with FSCRYPT_POLICY_FLAG_IV_INO_LBLK_64, in most cases. Unfortunately this combination usually causes data unit indices to exceed 32 bits, and thus fscrypt_supported_policy() correctly disallows it. The users who potentially need this combination are using f2fs. To support it, f2fs would need to provide an option to slightly reduce its max file size. - Supporting sub-block data units in combination with FSCRYPT_POLICY_FLAG_IV_INO_LBLK_32. This has the same problem described above, but also it will need special code to make DUN wraparound still happen on a FS block boundary. - Supporting use case (2) mentioned above. The encrypted direct I/O code will need to stop requiring and assuming FS block alignment. This won't be hard, but it belongs in a separate patch. - Supporting this feature on filesystems other than ext4 and f2fs. (Filesystems declare support for it via their fscrypt_operations.) On UBIFS, sub-block data units don't make sense because UBIFS encrypts variable-length blocks as a result of compression. CephFS could support it, but a bit more work would be needed to make the fscrypt_*_block_inplace functions play nicely with sub-block data units. I don't think there's a use case for this on CephFS anyway. Link: https://lore.kernel.org/r/20230925055451.59499-6-ebiggers@kernel.org Signed-off-by: Eric Biggers --- Documentation/filesystems/fscrypt.rst | 117 ++++++++++++++++++++-------- fs/crypto/bio.c | 39 +++++----- fs/crypto/crypto.c | 139 ++++++++++++++++++---------------- fs/crypto/fscrypt_private.h | 56 +++++++++++--- fs/crypto/inline_crypt.c | 14 ++-- fs/crypto/keysetup.c | 5 ++ fs/crypto/policy.c | 34 ++++++++- fs/ext4/crypto.c | 1 + fs/f2fs/super.c | 1 + include/linux/fscrypt.h | 12 +++ include/uapi/linux/fscrypt.h | 3 +- 11 files changed, 288 insertions(+), 133 deletions(-) (limited to 'include/linux') diff --git a/Documentation/filesystems/fscrypt.rst b/Documentation/filesystems/fscrypt.rst index a624e92f2687..28700fb41a00 100644 --- a/Documentation/filesystems/fscrypt.rst +++ b/Documentation/filesystems/fscrypt.rst @@ -261,9 +261,9 @@ DIRECT_KEY policies The Adiantum encryption mode (see `Encryption modes and usage`_) is suitable for both contents and filenames encryption, and it accepts -long IVs --- long enough to hold both an 8-byte logical block number -and a 16-byte per-file nonce. Also, the overhead of each Adiantum key -is greater than that of an AES-256-XTS key. +long IVs --- long enough to hold both an 8-byte data unit index and a +16-byte per-file nonce. Also, the overhead of each Adiantum key is +greater than that of an AES-256-XTS key. Therefore, to improve performance and save memory, for Adiantum a "direct key" configuration is supported. When the user has enabled @@ -300,8 +300,8 @@ IV_INO_LBLK_32 policies IV_INO_LBLK_32 policies work like IV_INO_LBLK_64, except that for IV_INO_LBLK_32, the inode number is hashed with SipHash-2-4 (where the -SipHash key is derived from the master key) and added to the file -logical block number mod 2^32 to produce a 32-bit IV. +SipHash key is derived from the master key) and added to the file data +unit index mod 2^32 to produce a 32-bit IV. This format is optimized for use with inline encryption hardware compliant with the eMMC v5.2 standard, which supports only 32 IV bits @@ -451,31 +451,62 @@ acceleration is recommended: Contents encryption ------------------- -For file contents, each filesystem block is encrypted independently. -Starting from Linux kernel 5.5, encryption of filesystems with block -size less than system's page size is supported. - -Each block's IV is set to the logical block number within the file as -a little endian number, except that: - -- With CBC mode encryption, ESSIV is also used. Specifically, each IV - is encrypted with AES-256 where the AES-256 key is the SHA-256 hash - of the file's data encryption key. - -- With `DIRECT_KEY policies`_, the file's nonce is appended to the IV. - Currently this is only allowed with the Adiantum encryption mode. - -- With `IV_INO_LBLK_64 policies`_, the logical block number is limited - to 32 bits and is placed in bits 0-31 of the IV. The inode number - (which is also limited to 32 bits) is placed in bits 32-63. - -- With `IV_INO_LBLK_32 policies`_, the logical block number is limited - to 32 bits and is placed in bits 0-31 of the IV. The inode number - is then hashed and added mod 2^32. - -Note that because file logical block numbers are included in the IVs, -filesystems must enforce that blocks are never shifted around within -encrypted files, e.g. via "collapse range" or "insert range". +For contents encryption, each file's contents is divided into "data +units". Each data unit is encrypted independently. The IV for each +data unit incorporates the zero-based index of the data unit within +the file. This ensures that each data unit within a file is encrypted +differently, which is essential to prevent leaking information. + +Note: the encryption depending on the offset into the file means that +operations like "collapse range" and "insert range" that rearrange the +extent mapping of files are not supported on encrypted files. + +There are two cases for the sizes of the data units: + +* Fixed-size data units. This is how all filesystems other than UBIFS + work. A file's data units are all the same size; the last data unit + is zero-padded if needed. By default, the data unit size is equal + to the filesystem block size. On some filesystems, users can select + a sub-block data unit size via the ``log2_data_unit_size`` field of + the encryption policy; see `FS_IOC_SET_ENCRYPTION_POLICY`_. + +* Variable-size data units. This is what UBIFS does. Each "UBIFS + data node" is treated as a crypto data unit. Each contains variable + length, possibly compressed data, zero-padded to the next 16-byte + boundary. Users cannot select a sub-block data unit size on UBIFS. + +In the case of compression + encryption, the compressed data is +encrypted. UBIFS compression works as described above. f2fs +compression works a bit differently; it compresses a number of +filesystem blocks into a smaller number of filesystem blocks. +Therefore a f2fs-compressed file still uses fixed-size data units, and +it is encrypted in a similar way to a file containing holes. + +As mentioned in `Key hierarchy`_, the default encryption setting uses +per-file keys. In this case, the IV for each data unit is simply the +index of the data unit in the file. However, users can select an +encryption setting that does not use per-file keys. For these, some +kind of file identifier is incorporated into the IVs as follows: + +- With `DIRECT_KEY policies`_, the data unit index is placed in bits + 0-63 of the IV, and the file's nonce is placed in bits 64-191. + +- With `IV_INO_LBLK_64 policies`_, the data unit index is placed in + bits 0-31 of the IV, and the file's inode number is placed in bits + 32-63. This setting is only allowed when data unit indices and + inode numbers fit in 32 bits. + +- With `IV_INO_LBLK_32 policies`_, the file's inode number is hashed + and added to the data unit index. The resulting value is truncated + to 32 bits and placed in bits 0-31 of the IV. This setting is only + allowed when data unit indices and inode numbers fit in 32 bits. + +The byte order of the IV is always little endian. + +If the user selects FSCRYPT_MODE_AES_128_CBC for the contents mode, an +ESSIV layer is automatically included. In this case, before the IV is +passed to AES-128-CBC, it is encrypted with AES-256 where the AES-256 +key is the SHA-256 hash of the file's contents encryption key. Filenames encryption -------------------- @@ -544,7 +575,8 @@ follows:: __u8 contents_encryption_mode; __u8 filenames_encryption_mode; __u8 flags; - __u8 __reserved[4]; + __u8 log2_data_unit_size; + __u8 __reserved[3]; __u8 master_key_identifier[FSCRYPT_KEY_IDENTIFIER_SIZE]; }; @@ -586,6 +618,29 @@ This structure must be initialized as follows: The DIRECT_KEY, IV_INO_LBLK_64, and IV_INO_LBLK_32 flags are mutually exclusive. +- ``log2_data_unit_size`` is the log2 of the data unit size in bytes, + or 0 to select the default data unit size. The data unit size is + the granularity of file contents encryption. For example, setting + ``log2_data_unit_size`` to 12 causes file contents be passed to the + underlying encryption algorithm (such as AES-256-XTS) in 4096-byte + data units, each with its own IV. + + Not all filesystems support setting ``log2_data_unit_size``. ext4 + and f2fs support it since Linux v6.7. On filesystems that support + it, the supported nonzero values are 9 through the log2 of the + filesystem block size, inclusively. The default value of 0 selects + the filesystem block size. + + The main use case for ``log2_data_unit_size`` is for selecting a + data unit size smaller than the filesystem block size for + compatibility with inline encryption hardware that only supports + smaller data unit sizes. ``/sys/block/$disk/queue/crypto/`` may be + useful for checking which data unit sizes are supported by a + particular system's inline encryption hardware. + + Leave this field zeroed unless you are certain you need it. Using + an unnecessarily small data unit size reduces performance. + - For v2 encryption policies, ``__reserved`` must be zeroed. - For v1 encryption policies, ``master_key_descriptor`` specifies how diff --git a/fs/crypto/bio.c b/fs/crypto/bio.c index 62e1a3dd8357..c8cf77065272 100644 --- a/fs/crypto/bio.c +++ b/fs/crypto/bio.c @@ -111,10 +111,14 @@ out: int fscrypt_zeroout_range(const struct inode *inode, pgoff_t lblk, sector_t pblk, unsigned int len) { - const unsigned int blockbits = inode->i_blkbits; - const unsigned int blocksize = 1 << blockbits; - const unsigned int blocks_per_page_bits = PAGE_SHIFT - blockbits; - const unsigned int blocks_per_page = 1 << blocks_per_page_bits; + const struct fscrypt_info *ci = inode->i_crypt_info; + const unsigned int du_bits = ci->ci_data_unit_bits; + const unsigned int du_size = 1U << du_bits; + const unsigned int du_per_page_bits = PAGE_SHIFT - du_bits; + const unsigned int du_per_page = 1U << du_per_page_bits; + u64 du_index = (u64)lblk << (inode->i_blkbits - du_bits); + u64 du_remaining = (u64)len << (inode->i_blkbits - du_bits); + sector_t sector = pblk << (inode->i_blkbits - SECTOR_SHIFT); struct page *pages[16]; /* write up to 16 pages at a time */ unsigned int nr_pages; unsigned int i; @@ -130,8 +134,8 @@ int fscrypt_zeroout_range(const struct inode *inode, pgoff_t lblk, len); BUILD_BUG_ON(ARRAY_SIZE(pages) > BIO_MAX_VECS); - nr_pages = min_t(unsigned int, ARRAY_SIZE(pages), - (len + blocks_per_page - 1) >> blocks_per_page_bits); + nr_pages = min_t(u64, ARRAY_SIZE(pages), + (du_remaining + du_per_page - 1) >> du_per_page_bits); /* * We need at least one page for ciphertext. Allocate the first one @@ -154,21 +158,22 @@ int fscrypt_zeroout_range(const struct inode *inode, pgoff_t lblk, bio = bio_alloc(inode->i_sb->s_bdev, nr_pages, REQ_OP_WRITE, GFP_NOFS); do { - bio->bi_iter.bi_sector = pblk << (blockbits - 9); + bio->bi_iter.bi_sector = sector; i = 0; offset = 0; do { - err = fscrypt_crypt_block(inode, FS_ENCRYPT, lblk, - ZERO_PAGE(0), pages[i], - blocksize, offset, GFP_NOFS); + err = fscrypt_crypt_data_unit(ci, FS_ENCRYPT, du_index, + ZERO_PAGE(0), pages[i], + du_size, offset, + GFP_NOFS); if (err) goto out; - lblk++; - pblk++; - len--; - offset += blocksize; - if (offset == PAGE_SIZE || len == 0) { + du_index++; + sector += 1U << (du_bits - SECTOR_SHIFT); + du_remaining--; + offset += du_size; + if (offset == PAGE_SIZE || du_remaining == 0) { ret = bio_add_page(bio, pages[i++], offset, 0); if (WARN_ON_ONCE(ret != offset)) { err = -EIO; @@ -176,13 +181,13 @@ int fscrypt_zeroout_range(const struct inode *inode, pgoff_t lblk, } offset = 0; } - } while (i != nr_pages && len != 0); + } while (i != nr_pages && du_remaining != 0); err = submit_bio_wait(bio); if (err) goto out; bio_reset(bio, inode->i_sb->s_bdev, REQ_OP_WRITE); - } while (len != 0); + } while (du_remaining != 0); err = 0; out: bio_put(bio); diff --git a/fs/crypto/crypto.c b/fs/crypto/crypto.c index aed0c5ea7578..85e2f66dd663 100644 --- a/fs/crypto/crypto.c +++ b/fs/crypto/crypto.c @@ -77,14 +77,14 @@ void fscrypt_free_bounce_page(struct page *bounce_page) EXPORT_SYMBOL(fscrypt_free_bounce_page); /* - * Generate the IV for the given logical block number within the given file. - * For filenames encryption, lblk_num == 0. + * Generate the IV for the given data unit index within the given file. + * For filenames encryption, index == 0. * * Keep this in sync with fscrypt_limit_io_blocks(). fscrypt_limit_io_blocks() * needs to know about any IV generation methods where the low bits of IV don't - * simply contain the lblk_num (e.g., IV_INO_LBLK_32). + * simply contain the data unit index (e.g., IV_INO_LBLK_32). */ -void fscrypt_generate_iv(union fscrypt_iv *iv, u64 lblk_num, +void fscrypt_generate_iv(union fscrypt_iv *iv, u64 index, const struct fscrypt_info *ci) { u8 flags = fscrypt_policy_flags(&ci->ci_policy); @@ -92,29 +92,29 @@ void fscrypt_generate_iv(union fscrypt_iv *iv, u64 lblk_num, memset(iv, 0, ci->ci_mode->ivsize); if (flags & FSCRYPT_POLICY_FLAG_IV_INO_LBLK_64) { - WARN_ON_ONCE(lblk_num > U32_MAX); + WARN_ON_ONCE(index > U32_MAX); WARN_ON_ONCE(ci->ci_inode->i_ino > U32_MAX); - lblk_num |= (u64)ci->ci_inode->i_ino << 32; + index |= (u64)ci->ci_inode->i_ino << 32; } else if (flags & FSCRYPT_POLICY_FLAG_IV_INO_LBLK_32) { - WARN_ON_ONCE(lblk_num > U32_MAX); - lblk_num = (u32)(ci->ci_hashed_ino + lblk_num); + WARN_ON_ONCE(index > U32_MAX); + index = (u32)(ci->ci_hashed_ino + index); } else if (flags & FSCRYPT_POLICY_FLAG_DIRECT_KEY) { memcpy(iv->nonce, ci->ci_nonce, FSCRYPT_FILE_NONCE_SIZE); } - iv->lblk_num = cpu_to_le64(lblk_num); + iv->index = cpu_to_le64(index); } -/* Encrypt or decrypt a single filesystem block of file contents */ -int fscrypt_crypt_block(const struct inode *inode, fscrypt_direction_t rw, - u64 lblk_num, struct page *src_page, - struct page *dest_page, unsigned int len, - unsigned int offs, gfp_t gfp_flags) +/* Encrypt or decrypt a single "data unit" of file contents. */ +int fscrypt_crypt_data_unit(const struct fscrypt_info *ci, + fscrypt_direction_t rw, u64 index, + struct page *src_page, struct page *dest_page, + unsigned int len, unsigned int offs, + gfp_t gfp_flags) { union fscrypt_iv iv; struct skcipher_request *req = NULL; DECLARE_CRYPTO_WAIT(wait); struct scatterlist dst, src; - struct fscrypt_info *ci = inode->i_crypt_info; struct crypto_skcipher *tfm = ci->ci_enc_key.tfm; int res = 0; @@ -123,7 +123,7 @@ int fscrypt_crypt_block(const struct inode *inode, fscrypt_direction_t rw, if (WARN_ON_ONCE(len % FSCRYPT_CONTENTS_ALIGNMENT != 0)) return -EINVAL; - fscrypt_generate_iv(&iv, lblk_num, ci); + fscrypt_generate_iv(&iv, index, ci); req = skcipher_request_alloc(tfm, gfp_flags); if (!req) @@ -144,28 +144,29 @@ int fscrypt_crypt_block(const struct inode *inode, fscrypt_direction_t rw, res = crypto_wait_req(crypto_skcipher_encrypt(req), &wait); skcipher_request_free(req); if (res) { - fscrypt_err(inode, "%scryption failed for block %llu: %d", - (rw == FS_DECRYPT ? "De" : "En"), lblk_num, res); + fscrypt_err(ci->ci_inode, + "%scryption failed for data unit %llu: %d", + (rw == FS_DECRYPT ? "De" : "En"), index, res); return res; } return 0; } /** - * fscrypt_encrypt_pagecache_blocks() - Encrypt filesystem blocks from a - * pagecache page - * @page: The locked pagecache page containing the block(s) to encrypt - * @len: Total size of the block(s) to encrypt. Must be a nonzero - * multiple of the filesystem's block size. - * @offs: Byte offset within @page of the first block to encrypt. Must be - * a multiple of the filesystem's block size. - * @gfp_flags: Memory allocation flags. See details below. + * fscrypt_encrypt_pagecache_blocks() - Encrypt data from a pagecache page + * @page: the locked pagecache page containing the data to encrypt + * @len: size of the data to encrypt, in bytes + * @offs: offset within @page of the data to encrypt, in bytes + * @gfp_flags: memory allocation flags; see details below * - * A new bounce page is allocated, and the specified block(s) are encrypted into - * it. In the bounce page, the ciphertext block(s) will be located at the same - * offsets at which the plaintext block(s) were located in the source page; any - * other parts of the bounce page will be left uninitialized. However, normally - * blocksize == PAGE_SIZE and the whole page is encrypted at once. + * This allocates a new bounce page and encrypts the given data into it. The + * length and offset of the data must be aligned to the file's crypto data unit + * size. Alignment to the filesystem block size fulfills this requirement, as + * the filesystem block size is always a multiple of the data unit size. + * + * In the bounce page, the ciphertext data will be located at the same offset at + * which the plaintext data was located in the source page. Any other parts of + * the bounce page will be left uninitialized. * * This is for use by the filesystem's ->writepages() method. * @@ -183,28 +184,29 @@ struct page *fscrypt_encrypt_pagecache_blocks(struct page *page, { const struct inode *inode = page->mapping->host; - const unsigned int blockbits = inode->i_blkbits; - const unsigned int blocksize = 1 << blockbits; + const struct fscrypt_info *ci = inode->i_crypt_info; + const unsigned int du_bits = ci->ci_data_unit_bits; + const unsigned int du_size = 1U << du_bits; struct page *ciphertext_page; - u64 lblk_num = ((u64)page->index << (PAGE_SHIFT - blockbits)) + - (offs >> blockbits); + u64 index = ((u64)page->index << (PAGE_SHIFT - du_bits)) + + (offs >> du_bits); unsigned int i; int err; if (WARN_ON_ONCE(!PageLocked(page))) return ERR_PTR(-EINVAL); - if (WARN_ON_ONCE(len <= 0 || !IS_ALIGNED(len | offs, blocksize))) + if (WARN_ON_ONCE(len <= 0 || !IS_ALIGNED(len | offs, du_size))) return ERR_PTR(-EINVAL); ciphertext_page = fscrypt_alloc_bounce_page(gfp_flags); if (!ciphertext_page) return ERR_PTR(-ENOMEM); - for (i = offs; i < offs + len; i += blocksize, lblk_num++) { - err = fscrypt_crypt_block(inode, FS_ENCRYPT, lblk_num, - page, ciphertext_page, - blocksize, i, gfp_flags); + for (i = offs; i < offs + len; i += du_size, index++) { + err = fscrypt_crypt_data_unit(ci, FS_ENCRYPT, index, + page, ciphertext_page, + du_size, i, gfp_flags); if (err) { fscrypt_free_bounce_page(ciphertext_page); return ERR_PTR(err); @@ -231,30 +233,33 @@ EXPORT_SYMBOL(fscrypt_encrypt_pagecache_blocks); * arbitrary page, not necessarily in the original pagecache page. The @inode * and @lblk_num must be specified, as they can't be determined from @page. * + * This is not compatible with fscrypt_operations::supports_subblock_data_units. + * * Return: 0 on success; -errno on failure */ int fscrypt_encrypt_block_inplace(const struct inode *inode, struct page *page, unsigned int len, unsigned int offs, u64 lblk_num, gfp_t gfp_flags) { - return fscrypt_crypt_block(inode, FS_ENCRYPT, lblk_num, page, page, - len, offs, gfp_flags); + if (WARN_ON_ONCE(inode->i_sb->s_cop->supports_subblock_data_units)) + return -EOPNOTSUPP; + return fscrypt_crypt_data_unit(inode->i_crypt_info, FS_ENCRYPT, + lblk_num, page, page, len, offs, + gfp_flags); } EXPORT_SYMBOL(fscrypt_encrypt_block_inplace); /** - * fscrypt_decrypt_pagecache_blocks() - Decrypt filesystem blocks in a - * pagecache folio - * @folio: The locked pagecache folio containing the block(s) to decrypt - * @len: Total size of the block(s) to decrypt. Must be a nonzero - * multiple of the filesystem's block size. - * @offs: Byte offset within @folio of the first block to decrypt. Must be - * a multiple of the filesystem's block size. + * fscrypt_decrypt_pagecache_blocks() - Decrypt data from a pagecache folio + * @folio: the pagecache folio containing the data to decrypt + * @len: size of the data to decrypt, in bytes + * @offs: offset within @folio of the data to decrypt, in bytes * - * The specified block(s) are decrypted in-place within the pagecache folio, - * which must still be locked and not uptodate. - * - * This is for use by the filesystem's ->readahead() method. + * Decrypt data that has just been read from an encrypted file. The data must + * be located in a pagecache folio that is still locked and not yet uptodate. + * The length and offset of the data must be aligned to the file's crypto data + * unit size. Alignment to the filesystem block size fulfills this requirement, + * as the filesystem block size is always a multiple of the data unit size. * * Return: 0 on success; -errno on failure */ @@ -262,25 +267,26 @@ int fscrypt_decrypt_pagecache_blocks(struct folio *folio, size_t len, size_t offs) { const struct inode *inode = folio->mapping->host; - const unsigned int blockbits = inode->i_blkbits; - const unsigned int blocksize = 1 << blockbits; - u64 lblk_num = ((u64)folio->index << (PAGE_SHIFT - blockbits)) + - (offs >> blockbits); + const struct fscrypt_info *ci = inode->i_crypt_info; + const unsigned int du_bits = ci->ci_data_unit_bits; + const unsigned int du_size = 1U << du_bits; + u64 index = ((u64)folio->index << (PAGE_SHIFT - du_bits)) + + (offs >> du_bits); size_t i; int err; if (WARN_ON_ONCE(!folio_test_locked(folio))) return -EINVAL; - if (WARN_ON_ONCE(len <= 0 || !IS_ALIGNED(len | offs, blocksize))) + if (WARN_ON_ONCE(len <= 0 || !IS_ALIGNED(len | offs, du_size))) return -EINVAL; - for (i = offs; i < offs + len; i += blocksize, lblk_num++) { + for (i = offs; i < offs + len; i += du_size, index++) { struct page *page = folio_page(folio, i >> PAGE_SHIFT); - err = fscrypt_crypt_block(inode, FS_DECRYPT, lblk_num, page, - page, blocksize, i & ~PAGE_MASK, - GFP_NOFS); + err = fscrypt_crypt_data_unit(ci, FS_DECRYPT, index, page, + page, du_size, i & ~PAGE_MASK, + GFP_NOFS); if (err) return err; } @@ -302,14 +308,19 @@ EXPORT_SYMBOL(fscrypt_decrypt_pagecache_blocks); * arbitrary page, not necessarily in the original pagecache page. The @inode * and @lblk_num must be specified, as they can't be determined from @page. * + * This is not compatible with fscrypt_operations::supports_subblock_data_units. + * * Return: 0 on success; -errno on failure */ int fscrypt_decrypt_block_inplace(const struct inode *inode, struct page *page, unsigned int len, unsigned int offs, u64 lblk_num) { - return fscrypt_crypt_block(inode, FS_DECRYPT, lblk_num, page, page, - len, offs, GFP_NOFS); + if (WARN_ON_ONCE(inode->i_sb->s_cop->supports_subblock_data_units)) + return -EOPNOTSUPP; + return fscrypt_crypt_data_unit(inode->i_crypt_info, FS_DECRYPT, + lblk_num, page, page, len, offs, + GFP_NOFS); } EXPORT_SYMBOL(fscrypt_decrypt_block_inplace); diff --git a/fs/crypto/fscrypt_private.h b/fs/crypto/fscrypt_private.h index 4b113214b53a..9c5e83baa3f1 100644 --- a/fs/crypto/fscrypt_private.h +++ b/fs/crypto/fscrypt_private.h @@ -47,7 +47,8 @@ struct fscrypt_context_v2 { u8 contents_encryption_mode; u8 filenames_encryption_mode; u8 flags; - u8 __reserved[4]; + u8 log2_data_unit_size; + u8 __reserved[3]; u8 master_key_identifier[FSCRYPT_KEY_IDENTIFIER_SIZE]; u8 nonce[FSCRYPT_FILE_NONCE_SIZE]; }; @@ -165,6 +166,26 @@ fscrypt_policy_flags(const union fscrypt_policy *policy) BUG(); } +static inline int +fscrypt_policy_v2_du_bits(const struct fscrypt_policy_v2 *policy, + const struct inode *inode) +{ + return policy->log2_data_unit_size ?: inode->i_blkbits; +} + +static inline int +fscrypt_policy_du_bits(const union fscrypt_policy *policy, + const struct inode *inode) +{ + switch (policy->version) { + case FSCRYPT_POLICY_V1: + return inode->i_blkbits; + case FSCRYPT_POLICY_V2: + return fscrypt_policy_v2_du_bits(&policy->v2, inode); + } + BUG(); +} + /* * For encrypted symlinks, the ciphertext length is stored at the beginning * of the string in little-endian format. @@ -211,6 +232,16 @@ struct fscrypt_info { bool ci_inlinecrypt; #endif + /* + * log2 of the data unit size (granularity of contents encryption) of + * this file. This is computable from ci_policy and ci_inode but is + * cached here for efficiency. Only used for regular files. + */ + u8 ci_data_unit_bits; + + /* Cached value: log2 of number of data units per FS block */ + u8 ci_data_units_per_block_bits; + /* * Encryption mode used for this inode. It corresponds to either the * contents or filenames encryption mode, depending on the inode type. @@ -265,10 +296,11 @@ typedef enum { /* crypto.c */ extern struct kmem_cache *fscrypt_info_cachep; int fscrypt_initialize(struct super_block *sb); -int fscrypt_crypt_block(const struct inode *inode, fscrypt_direction_t rw, - u64 lblk_num, struct page *src_page, - struct page *dest_page, unsigned int len, - unsigned int offs, gfp_t gfp_flags); +int fscrypt_crypt_data_unit(const struct fscrypt_info *ci, + fscrypt_direction_t rw, u64 index, + struct page *src_page, struct page *dest_page, + unsigned int len, unsigned int offs, + gfp_t gfp_flags); struct page *fscrypt_alloc_bounce_page(gfp_t gfp_flags); void __printf(3, 4) __cold @@ -283,8 +315,8 @@ fscrypt_msg(const struct inode *inode, const char *level, const char *fmt, ...); union fscrypt_iv { struct { - /* logical block number within the file */ - __le64 lblk_num; + /* zero-based index of data unit within the file */ + __le64 index; /* per-file nonce; only set in DIRECT_KEY mode */ u8 nonce[FSCRYPT_FILE_NONCE_SIZE]; @@ -293,17 +325,17 @@ union fscrypt_iv { __le64 dun[FSCRYPT_MAX_IV_SIZE / sizeof(__le64)]; }; -void fscrypt_generate_iv(union fscrypt_iv *iv, u64 lblk_num, +void fscrypt_generate_iv(union fscrypt_iv *iv, u64 index, const struct fscrypt_info *ci); /* - * Return the number of bits used by the maximum file logical block number that - * is possible on the given filesystem. + * Return the number of bits used by the maximum file data unit index that is + * possible on the given filesystem, using the given log2 data unit size. */ static inline int -fscrypt_max_file_lblk_bits(const struct super_block *sb) +fscrypt_max_file_dun_bits(const struct super_block *sb, int du_bits) { - return fls64(sb->s_maxbytes - 1) - sb->s_blocksize_bits; + return fls64(sb->s_maxbytes - 1) - du_bits; } /* fname.c */ diff --git a/fs/crypto/inline_crypt.c b/fs/crypto/inline_crypt.c index 7d9f6c167de5..8c6d37d6225a 100644 --- a/fs/crypto/inline_crypt.c +++ b/fs/crypto/inline_crypt.c @@ -43,6 +43,7 @@ static unsigned int fscrypt_get_dun_bytes(const struct fscrypt_info *ci) { const struct super_block *sb = ci->ci_inode->i_sb; unsigned int flags = fscrypt_policy_flags(&ci->ci_policy); + int dun_bits; if (flags & FSCRYPT_POLICY_FLAG_DIRECT_KEY) return offsetofend(union fscrypt_iv, nonce); @@ -53,8 +54,9 @@ static unsigned int fscrypt_get_dun_bytes(const struct fscrypt_info *ci) if (flags & FSCRYPT_POLICY_FLAG_IV_INO_LBLK_32) return sizeof(__le32); - /* Default case: IVs are just the file logical block number */ - return DIV_ROUND_UP(fscrypt_max_file_lblk_bits(sb), 8); + /* Default case: IVs are just the file data unit index */ + dun_bits = fscrypt_max_file_dun_bits(sb, ci->ci_data_unit_bits); + return DIV_ROUND_UP(dun_bits, 8); } /* @@ -126,7 +128,7 @@ int fscrypt_select_encryption_impl(struct fscrypt_info *ci) * crypto configuration that the file would use. */ crypto_cfg.crypto_mode = ci->ci_mode->blk_crypto_mode; - crypto_cfg.data_unit_size = sb->s_blocksize; + crypto_cfg.data_unit_size = 1U << ci->ci_data_unit_bits; crypto_cfg.dun_bytes = fscrypt_get_dun_bytes(ci); devs = fscrypt_get_devices(sb, &num_devs); @@ -165,7 +167,8 @@ int fscrypt_prepare_inline_crypt_key(struct fscrypt_prepared_key *prep_key, return -ENOMEM; err = blk_crypto_init_key(blk_key, raw_key, crypto_mode, - fscrypt_get_dun_bytes(ci), sb->s_blocksize); + fscrypt_get_dun_bytes(ci), + 1U << ci->ci_data_unit_bits); if (err) { fscrypt_err(inode, "error %d initializing blk-crypto key", err); goto fail; @@ -232,10 +235,11 @@ EXPORT_SYMBOL_GPL(__fscrypt_inode_uses_inline_crypto); static void fscrypt_generate_dun(const struct fscrypt_info *ci, u64 lblk_num, u64 dun[BLK_CRYPTO_DUN_ARRAY_SIZE]) { + u64 index = lblk_num << ci->ci_data_units_per_block_bits; union fscrypt_iv iv; int i; - fscrypt_generate_iv(&iv, lblk_num, ci); + fscrypt_generate_iv(&iv, index, ci); BUILD_BUG_ON(FSCRYPT_MAX_IV_SIZE > BLK_CRYPTO_MAX_IV_SIZE); memset(dun, 0, BLK_CRYPTO_MAX_IV_SIZE); diff --git a/fs/crypto/keysetup.c b/fs/crypto/keysetup.c index 361f41ef46c7..608599f8aa57 100644 --- a/fs/crypto/keysetup.c +++ b/fs/crypto/keysetup.c @@ -580,6 +580,11 @@ fscrypt_setup_encryption_info(struct inode *inode, WARN_ON_ONCE(mode->ivsize > FSCRYPT_MAX_IV_SIZE); crypt_info->ci_mode = mode; + crypt_info->ci_data_unit_bits = + fscrypt_policy_du_bits(&crypt_info->ci_policy, inode); + crypt_info->ci_data_units_per_block_bits = + inode->i_blkbits - crypt_info->ci_data_unit_bits; + res = setup_file_encryption_key(crypt_info, need_dirhash_key, &mk); if (res) goto out; diff --git a/fs/crypto/policy.c b/fs/crypto/policy.c index 32709dad9762..2fb3f6a1258e 100644 --- a/fs/crypto/policy.c +++ b/fs/crypto/policy.c @@ -165,10 +165,11 @@ static bool supported_iv_ino_lblk_policy(const struct fscrypt_policy_v2 *policy, } /* - * IV_INO_LBLK_64 and IV_INO_LBLK_32 both require that file logical - * block numbers fit in 32 bits. + * IV_INO_LBLK_64 and IV_INO_LBLK_32 both require that file data unit + * indices fit in 32 bits. */ - if (fscrypt_max_file_lblk_bits(sb) > 32) { + if (fscrypt_max_file_dun_bits(sb, + fscrypt_policy_v2_du_bits(policy, inode)) > 32) { fscrypt_warn(inode, "Can't use %s policy on filesystem '%s' because its maximum file size is too large", type, sb->s_id); @@ -243,6 +244,31 @@ static bool fscrypt_supported_v2_policy(const struct fscrypt_policy_v2 *policy, return false; } + if (policy->log2_data_unit_size) { + if (!inode->i_sb->s_cop->supports_subblock_data_units) { + fscrypt_warn(inode, + "Filesystem does not support configuring crypto data unit size"); + return false; + } + if (policy->log2_data_unit_size > inode->i_blkbits || + policy->log2_data_unit_size < SECTOR_SHIFT /* 9 */) { + fscrypt_warn(inode, + "Unsupported log2_data_unit_size in encryption policy: %d", + policy->log2_data_unit_size); + return false; + } + if (policy->log2_data_unit_size != inode->i_blkbits && + (policy->flags & FSCRYPT_POLICY_FLAG_IV_INO_LBLK_32)) { + /* + * Not safe to enable yet, as we need to ensure that DUN + * wraparound can only occur on a FS block boundary. + */ + fscrypt_warn(inode, + "Sub-block data units not yet supported with IV_INO_LBLK_32"); + return false; + } + } + if ((policy->flags & FSCRYPT_POLICY_FLAG_DIRECT_KEY) && !supported_direct_key_modes(inode, policy->contents_encryption_mode, policy->filenames_encryption_mode)) @@ -329,6 +355,7 @@ static int fscrypt_new_context(union fscrypt_context *ctx_u, ctx->filenames_encryption_mode = policy->filenames_encryption_mode; ctx->flags = policy->flags; + ctx->log2_data_unit_size = policy->log2_data_unit_size; memcpy(ctx->master_key_identifier, policy->master_key_identifier, sizeof(ctx->master_key_identifier)); @@ -389,6 +416,7 @@ int fscrypt_policy_from_context(union fscrypt_policy *policy_u, policy->filenames_encryption_mode = ctx->filenames_encryption_mode; policy->flags = ctx->flags; + policy->log2_data_unit_size = ctx->log2_data_unit_size; memcpy(policy->__reserved, ctx->__reserved, sizeof(policy->__reserved)); memcpy(policy->master_key_identifier, diff --git a/fs/ext4/crypto.c b/fs/ext4/crypto.c index 9e36731701ba..7ae0b61258a7 100644 --- a/fs/ext4/crypto.c +++ b/fs/ext4/crypto.c @@ -235,6 +235,7 @@ static bool ext4_has_stable_inodes(struct super_block *sb) const struct fscrypt_operations ext4_cryptops = { .needs_bounce_pages = 1, .has_32bit_inodes = 1, + .supports_subblock_data_units = 1, .legacy_key_prefix = "ext4:", .get_context = ext4_get_context, .set_context = ext4_set_context, diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index c44915713264..66a5bf4216b7 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -3226,6 +3226,7 @@ static struct block_device **f2fs_get_devices(struct super_block *sb, static const struct fscrypt_operations f2fs_cryptops = { .needs_bounce_pages = 1, .has_32bit_inodes = 1, + .supports_subblock_data_units = 1, .legacy_key_prefix = "f2fs:", .get_context = f2fs_get_context, .set_context = f2fs_set_context, diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h index 09a3cacbf62a..b559e6f77707 100644 --- a/include/linux/fscrypt.h +++ b/include/linux/fscrypt.h @@ -85,6 +85,18 @@ struct fscrypt_operations { */ unsigned int has_32bit_inodes : 1; + /* + * If set, then fs/crypto/ will allow users to select a crypto data unit + * size that is less than the filesystem block size. This is done via + * the log2_data_unit_size field of the fscrypt policy. This flag is + * not compatible with filesystems that encrypt variable-length blocks + * (i.e. blocks that aren't all equal to filesystem's block size), for + * example as a result of compression. It's also not compatible with + * the fscrypt_encrypt_block_inplace() and + * fscrypt_decrypt_block_inplace() functions. + */ + unsigned int supports_subblock_data_units : 1; + /* * This field exists only for backwards compatibility reasons and should * only be set by the filesystems that are setting it already. It diff --git a/include/uapi/linux/fscrypt.h b/include/uapi/linux/fscrypt.h index fd1fb0d5389d..7a8f4c290187 100644 --- a/include/uapi/linux/fscrypt.h +++ b/include/uapi/linux/fscrypt.h @@ -71,7 +71,8 @@ struct fscrypt_policy_v2 { __u8 contents_encryption_mode; __u8 filenames_encryption_mode; __u8 flags; - __u8 __reserved[4]; + __u8 log2_data_unit_size; + __u8 __reserved[3]; __u8 master_key_identifier[FSCRYPT_KEY_IDENTIFIER_SIZE]; }; -- cgit v1.2.3 From e850d9a52f4cd31521c80a7ea9718b69129af4d5 Mon Sep 17 00:00:00 2001 From: Coly Li Date: Sat, 12 Aug 2023 01:05:07 +0800 Subject: badblocks: add more helper structure and routines in badblocks.h This patch adds the following helper structure and routines into badblocks.h, - struct badblocks_context This structure is used in improved badblocks code for bad table iteration. - BB_END() The macro to calculate end LBA of a bad range record from bad table. - badblocks_full() and badblocks_empty() The inline routines to check whether bad table is full or empty. - set_changed() and clear_changed() The inline routines to set and clear 'changed' tag from struct badblocks. These new helper structure and routines can help to make the code more clear, they will be used in the improved badblocks code in following patches. Signed-off-by: Coly Li Reviewed-by: Xiao Ni Cc: Dan Williams Cc: Geliang Tang Cc: Hannes Reinecke Cc: Jens Axboe Cc: NeilBrown Cc: Vishal L Verma Acked-by: Geliang Tang Link: https://lore.kernel.org/r/20230811170513.2300-2-colyli@suse.de Signed-off-by: Jens Axboe --- include/linux/badblocks.h | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) (limited to 'include/linux') diff --git a/include/linux/badblocks.h b/include/linux/badblocks.h index 2426276b9bd3..670f2dae692f 100644 --- a/include/linux/badblocks.h +++ b/include/linux/badblocks.h @@ -15,6 +15,7 @@ #define BB_OFFSET(x) (((x) & BB_OFFSET_MASK) >> 9) #define BB_LEN(x) (((x) & BB_LEN_MASK) + 1) #define BB_ACK(x) (!!((x) & BB_ACK_MASK)) +#define BB_END(x) (BB_OFFSET(x) + BB_LEN(x)) #define BB_MAKE(a, l, ack) (((a)<<9) | ((l)-1) | ((u64)(!!(ack)) << 63)) /* Bad block numbers are stored sorted in a single page. @@ -41,6 +42,12 @@ struct badblocks { sector_t size; /* in sectors */ }; +struct badblocks_context { + sector_t start; + sector_t len; + int ack; +}; + int badblocks_check(struct badblocks *bb, sector_t s, int sectors, sector_t *first_bad, int *bad_sectors); int badblocks_set(struct badblocks *bb, sector_t s, int sectors, @@ -63,4 +70,27 @@ static inline void devm_exit_badblocks(struct device *dev, struct badblocks *bb) } badblocks_exit(bb); } + +static inline int badblocks_full(struct badblocks *bb) +{ + return (bb->count >= MAX_BADBLOCKS); +} + +static inline int badblocks_empty(struct badblocks *bb) +{ + return (bb->count == 0); +} + +static inline void set_changed(struct badblocks *bb) +{ + if (bb->changed != 1) + bb->changed = 1; +} + +static inline void clear_changed(struct badblocks *bb) +{ + if (bb->changed != 0) + bb->changed = 0; +} + #endif -- cgit v1.2.3 From 948f0bf5ad6ac1e5f19f6aa8e7da4a950d66b661 Mon Sep 17 00:00:00 2001 From: Patrisious Haddad Date: Wed, 20 Sep 2023 13:07:42 +0300 Subject: IB/mlx5: Add support for 800G_8X lane speed Add a check for 800G_8X speed when querying PTYS and report it back correctly when needed. Signed-off-by: Patrisious Haddad Reviewed-by: Mark Zhang Link: https://lore.kernel.org/r/26fd0b6e1fac071c3eb779657bb3d8ba47f47c4f.1695204156.git.leon@kernel.org Reviewed-by: Jacob Keller Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx5/main.c | 4 ++++ drivers/net/ethernet/mellanox/mlx5/core/port.c | 1 + include/linux/mlx5/port.h | 1 + 3 files changed, 6 insertions(+) (limited to 'include/linux') diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index aed5cdea50e6..457d5ac7ad04 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -451,6 +451,10 @@ static int translate_eth_ext_proto_oper(u32 eth_proto_oper, u16 *active_speed, *active_width = IB_WIDTH_4X; *active_speed = IB_SPEED_NDR; break; + case MLX5E_PROT_MASK(MLX5E_800GAUI_8_800GBASE_CR8_KR8): + *active_width = IB_WIDTH_8X; + *active_speed = IB_SPEED_NDR; + break; default: return -EINVAL; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c index be70d1f23a5d..43423543f34c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c @@ -1102,6 +1102,7 @@ static const u32 mlx5e_ext_link_speed[MLX5E_EXT_LINK_MODES_NUMBER] = { [MLX5E_100GAUI_1_100GBASE_CR_KR] = 100000, [MLX5E_200GAUI_2_200GBASE_CR2_KR2] = 200000, [MLX5E_400GAUI_4_400GBASE_CR4_KR4] = 400000, + [MLX5E_800GAUI_8_800GBASE_CR8_KR8] = 800000, }; int mlx5_port_query_eth_proto(struct mlx5_core_dev *dev, u8 port, bool ext, diff --git a/include/linux/mlx5/port.h b/include/linux/mlx5/port.h index 98b2e1e149f9..794001ebd003 100644 --- a/include/linux/mlx5/port.h +++ b/include/linux/mlx5/port.h @@ -117,6 +117,7 @@ enum mlx5e_ext_link_mode { MLX5E_200GAUI_2_200GBASE_CR2_KR2 = 13, MLX5E_400GAUI_8 = 15, MLX5E_400GAUI_4_400GBASE_CR4_KR4 = 16, + MLX5E_800GAUI_8_800GBASE_CR8_KR8 = 19, MLX5E_EXT_LINK_MODES_NUMBER, }; -- cgit v1.2.3 From b28ad32442bec2f0d9cb660d7d698a1a53c13d08 Mon Sep 17 00:00:00 2001 From: Patrisious Haddad Date: Wed, 20 Sep 2023 13:07:43 +0300 Subject: IB/mlx5: Rename 400G_8X speed to comply to naming convention Rename 400G_8X speed to comply to naming convention. Signed-off-by: Patrisious Haddad Reviewed-by: Mark Zhang Link: https://lore.kernel.org/r/ac98447cac8379a43fbdb36d56e5fb2b741a97ff.1695204156.git.leon@kernel.org Reviewed-by: Jacob Keller Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx5/main.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/port.c | 2 +- include/linux/mlx5/port.h | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 457d5ac7ad04..428abc916509 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -443,7 +443,7 @@ static int translate_eth_ext_proto_oper(u32 eth_proto_oper, u16 *active_speed, *active_width = IB_WIDTH_2X; *active_speed = IB_SPEED_NDR; break; - case MLX5E_PROT_MASK(MLX5E_400GAUI_8): + case MLX5E_PROT_MASK(MLX5E_400GAUI_8_400GBASE_CR8): *active_width = IB_WIDTH_8X; *active_speed = IB_SPEED_HDR; break; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c index 43423543f34c..7d8c732818f2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c @@ -1098,7 +1098,7 @@ static const u32 mlx5e_ext_link_speed[MLX5E_EXT_LINK_MODES_NUMBER] = { [MLX5E_CAUI_4_100GBASE_CR4_KR4] = 100000, [MLX5E_100GAUI_2_100GBASE_CR2_KR2] = 100000, [MLX5E_200GAUI_4_200GBASE_CR4_KR4] = 200000, - [MLX5E_400GAUI_8] = 400000, + [MLX5E_400GAUI_8_400GBASE_CR8] = 400000, [MLX5E_100GAUI_1_100GBASE_CR_KR] = 100000, [MLX5E_200GAUI_2_200GBASE_CR2_KR2] = 200000, [MLX5E_400GAUI_4_400GBASE_CR4_KR4] = 400000, diff --git a/include/linux/mlx5/port.h b/include/linux/mlx5/port.h index 794001ebd003..26092c78a985 100644 --- a/include/linux/mlx5/port.h +++ b/include/linux/mlx5/port.h @@ -115,7 +115,7 @@ enum mlx5e_ext_link_mode { MLX5E_100GAUI_1_100GBASE_CR_KR = 11, MLX5E_200GAUI_4_200GBASE_CR4_KR4 = 12, MLX5E_200GAUI_2_200GBASE_CR2_KR2 = 13, - MLX5E_400GAUI_8 = 15, + MLX5E_400GAUI_8_400GBASE_CR8 = 15, MLX5E_400GAUI_4_400GBASE_CR4_KR4 = 16, MLX5E_800GAUI_8_800GBASE_CR8_KR8 = 19, MLX5E_EXT_LINK_MODES_NUMBER, -- cgit v1.2.3 From 9cf63f3a33e929f7eca36409914b8c12102b9984 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 22 Sep 2023 10:54:37 -0700 Subject: platform/surface: aggregator: Annotate struct ssam_event with __counted_by MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Prepare for the coming implementation by GCC and Clang of the __counted_by attribute. Flexible array members annotated with __counted_by can have their accesses bounds-checked at run-time checking via CONFIG_UBSAN_BOUNDS (for array indexing) and CONFIG_FORTIFY_SOURCE (for strcpy/memcpy-family functions). As found with Coccinelle[1], add __counted_by for struct ssam_event. [1] https://github.com/kees/kernel-tools/blob/trunk/coccinelle/examples/counted_by.cocci Cc: Maximilian Luz Cc: platform-driver-x86@vger.kernel.org Signed-off-by: Kees Cook Reviewed-by: "Gustavo A. R. Silva" Reviewed-by: Maximilian Luz Link: https://lore.kernel.org/r/20230922175436.work.031-kees@kernel.org Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- include/linux/surface_aggregator/controller.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/surface_aggregator/controller.h b/include/linux/surface_aggregator/controller.h index cb7980805920..5b67f0f47d80 100644 --- a/include/linux/surface_aggregator/controller.h +++ b/include/linux/surface_aggregator/controller.h @@ -44,7 +44,7 @@ struct ssam_event { u8 command_id; u8 instance_id; u16 length; - u8 data[]; + u8 data[] __counted_by(length); }; /** -- cgit v1.2.3 From 8d74f1da776da9b0306630b13a3025214fa44618 Mon Sep 17 00:00:00 2001 From: Raag Jadav Date: Fri, 22 Sep 2023 11:15:52 +0530 Subject: PM: sleep: Fix symbol export for _SIMPLE_ variants of _PM_OPS() Currently EXPORT_*_SIMPLE_DEV_PM_OPS() use EXPORT_*_DEV_PM_OPS() set of macros to export dev_pm_ops symbol, which export the symbol in case CONFIG_PM=y but don't take CONFIG_PM_SLEEP into consideration. Since _SIMPLE_ variants of _PM_OPS() do not include runtime PM handles and are only used in case CONFIG_PM_SLEEP=y, we should not be exporting dev_pm_ops symbol for them in case CONFIG_PM_SLEEP=n. This can be fixed by having two distinct set of export macros for both _RUNTIME_ and _SIMPLE_ variants of _PM_OPS(), such that the export of dev_pm_ops symbol used in each variant depends on CONFIG_PM and CONFIG_PM_SLEEP respectively. Introduce _DEV_SLEEP_PM_OPS() set of export macros for _SIMPLE_ variants of _PM_OPS(), which export dev_pm_ops symbol only in case CONFIG_PM_SLEEP=y and discard it otherwise. Fixes: 34e1ed189fab ("PM: Improve EXPORT_*_DEV_PM_OPS macros") Signed-off-by: Raag Jadav Reviewed-by: Paul Cercueil Signed-off-by: Rafael J. Wysocki --- include/linux/pm.h | 43 +++++++++++++++++++++++++++++-------------- 1 file changed, 29 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pm.h b/include/linux/pm.h index 1400c37b29c7..629c1633bbd0 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -374,24 +374,39 @@ const struct dev_pm_ops name = { \ RUNTIME_PM_OPS(runtime_suspend_fn, runtime_resume_fn, idle_fn) \ } -#ifdef CONFIG_PM -#define _EXPORT_DEV_PM_OPS(name, license, ns) \ +#define _EXPORT_PM_OPS(name, license, ns) \ const struct dev_pm_ops name; \ __EXPORT_SYMBOL(name, license, ns); \ const struct dev_pm_ops name -#define EXPORT_PM_FN_GPL(name) EXPORT_SYMBOL_GPL(name) -#define EXPORT_PM_FN_NS_GPL(name, ns) EXPORT_SYMBOL_NS_GPL(name, ns) -#else -#define _EXPORT_DEV_PM_OPS(name, license, ns) \ + +#define _DISCARD_PM_OPS(name, license, ns) \ static __maybe_unused const struct dev_pm_ops __static_##name + +#ifdef CONFIG_PM +#define _EXPORT_DEV_PM_OPS(name, license, ns) _EXPORT_PM_OPS(name, license, ns) +#define EXPORT_PM_FN_GPL(name) EXPORT_SYMBOL_GPL(name) +#define EXPORT_PM_FN_NS_GPL(name, ns) EXPORT_SYMBOL_NS_GPL(name, ns) +#else +#define _EXPORT_DEV_PM_OPS(name, license, ns) _DISCARD_PM_OPS(name, license, ns) #define EXPORT_PM_FN_GPL(name) #define EXPORT_PM_FN_NS_GPL(name, ns) #endif -#define EXPORT_DEV_PM_OPS(name) _EXPORT_DEV_PM_OPS(name, "", "") -#define EXPORT_GPL_DEV_PM_OPS(name) _EXPORT_DEV_PM_OPS(name, "GPL", "") -#define EXPORT_NS_DEV_PM_OPS(name, ns) _EXPORT_DEV_PM_OPS(name, "", #ns) -#define EXPORT_NS_GPL_DEV_PM_OPS(name, ns) _EXPORT_DEV_PM_OPS(name, "GPL", #ns) +#ifdef CONFIG_PM_SLEEP +#define _EXPORT_DEV_SLEEP_PM_OPS(name, license, ns) _EXPORT_PM_OPS(name, license, ns) +#else +#define _EXPORT_DEV_SLEEP_PM_OPS(name, license, ns) _DISCARD_PM_OPS(name, license, ns) +#endif + +#define EXPORT_DEV_PM_OPS(name) _EXPORT_DEV_PM_OPS(name, "", "") +#define EXPORT_GPL_DEV_PM_OPS(name) _EXPORT_DEV_PM_OPS(name, "GPL", "") +#define EXPORT_NS_DEV_PM_OPS(name, ns) _EXPORT_DEV_PM_OPS(name, "", #ns) +#define EXPORT_NS_GPL_DEV_PM_OPS(name, ns) _EXPORT_DEV_PM_OPS(name, "GPL", #ns) + +#define EXPORT_DEV_SLEEP_PM_OPS(name) _EXPORT_DEV_SLEEP_PM_OPS(name, "", "") +#define EXPORT_GPL_DEV_SLEEP_PM_OPS(name) _EXPORT_DEV_SLEEP_PM_OPS(name, "GPL", "") +#define EXPORT_NS_DEV_SLEEP_PM_OPS(name, ns) _EXPORT_DEV_SLEEP_PM_OPS(name, "", #ns) +#define EXPORT_NS_GPL_DEV_SLEEP_PM_OPS(name, ns) _EXPORT_DEV_SLEEP_PM_OPS(name, "GPL", #ns) /* * Use this if you want to use the same suspend and resume callbacks for suspend @@ -404,19 +419,19 @@ const struct dev_pm_ops name = { \ _DEFINE_DEV_PM_OPS(name, suspend_fn, resume_fn, NULL, NULL, NULL) #define EXPORT_SIMPLE_DEV_PM_OPS(name, suspend_fn, resume_fn) \ - EXPORT_DEV_PM_OPS(name) = { \ + EXPORT_DEV_SLEEP_PM_OPS(name) = { \ SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn) \ } #define EXPORT_GPL_SIMPLE_DEV_PM_OPS(name, suspend_fn, resume_fn) \ - EXPORT_GPL_DEV_PM_OPS(name) = { \ + EXPORT_GPL_DEV_SLEEP_PM_OPS(name) = { \ SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn) \ } #define EXPORT_NS_SIMPLE_DEV_PM_OPS(name, suspend_fn, resume_fn, ns) \ - EXPORT_NS_DEV_PM_OPS(name, ns) = { \ + EXPORT_NS_DEV_SLEEP_PM_OPS(name, ns) = { \ SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn) \ } #define EXPORT_NS_GPL_SIMPLE_DEV_PM_OPS(name, suspend_fn, resume_fn, ns) \ - EXPORT_NS_GPL_DEV_PM_OPS(name, ns) = { \ + EXPORT_NS_GPL_DEV_SLEEP_PM_OPS(name, ns) = { \ SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn) \ } -- cgit v1.2.3 From 4d0e179a42879f7d76a5b95a2e7e7a5afa33954a Mon Sep 17 00:00:00 2001 From: Reka Norman Date: Fri, 25 Aug 2023 12:43:55 +1000 Subject: media: cros-ec-cec: Manage an array of ports To support multiple CEC ports, change cros_ec_cec to contain an array of ports, each with their own CEC adapter, etc. For now, only create a single port and use that port everywhere, so there is no functional change. Support for multiple ports will be added in the following patches. Signed-off-by: Reka Norman Signed-off-by: Hans Verkuil --- drivers/media/cec/platform/cros-ec/cros-ec-cec.c | 147 +++++++++++++++++------ include/linux/platform_data/cros_ec_commands.h | 2 + 2 files changed, 110 insertions(+), 39 deletions(-) (limited to 'include/linux') diff --git a/drivers/media/cec/platform/cros-ec/cros-ec-cec.c b/drivers/media/cec/platform/cros-ec/cros-ec-cec.c index 8dd95fb38546..d76a25ae0cf1 100644 --- a/drivers/media/cec/platform/cros-ec/cros-ec-cec.c +++ b/drivers/media/cec/platform/cros-ec/cros-ec-cec.c @@ -21,21 +21,40 @@ #define DRV_NAME "cros-ec-cec" +/* Only one port is supported for now */ +#define CEC_NUM_PORTS 1 +#define CEC_PORT 0 + /** - * struct cros_ec_cec - Driver data for EC CEC + * struct cros_ec_cec_port - Driver data for a single EC CEC port * - * @cros_ec: Pointer to EC device - * @notifier: Notifier info for responding to EC events + * @port_num: port number * @adap: CEC adapter * @notify: CEC notifier pointer * @rx_msg: storage for a received message + * @cros_ec_cec: pointer to the parent struct */ -struct cros_ec_cec { - struct cros_ec_device *cros_ec; - struct notifier_block notifier; +struct cros_ec_cec_port { + int port_num; struct cec_adapter *adap; struct cec_notifier *notify; struct cec_msg rx_msg; + struct cros_ec_cec *cros_ec_cec; +}; + +/** + * struct cros_ec_cec - Driver data for EC CEC + * + * @cros_ec: Pointer to EC device + * @notifier: Notifier info for responding to EC events + * @num_ports: Number of CEC ports + * @ports: Array of ports + */ +struct cros_ec_cec { + struct cros_ec_device *cros_ec; + struct notifier_block notifier; + int num_ports; + struct cros_ec_cec_port *ports[EC_CEC_MAX_PORTS]; }; static void handle_cec_message(struct cros_ec_cec *cros_ec_cec) @@ -43,27 +62,28 @@ static void handle_cec_message(struct cros_ec_cec *cros_ec_cec) struct cros_ec_device *cros_ec = cros_ec_cec->cros_ec; uint8_t *cec_message = cros_ec->event_data.data.cec_message; unsigned int len = cros_ec->event_size; + struct cros_ec_cec_port *port = cros_ec_cec->ports[CEC_PORT]; if (len > CEC_MAX_MSG_SIZE) len = CEC_MAX_MSG_SIZE; - cros_ec_cec->rx_msg.len = len; - memcpy(cros_ec_cec->rx_msg.msg, cec_message, len); + port->rx_msg.len = len; + memcpy(port->rx_msg.msg, cec_message, len); - cec_received_msg(cros_ec_cec->adap, &cros_ec_cec->rx_msg); + cec_received_msg(port->adap, &port->rx_msg); } static void handle_cec_event(struct cros_ec_cec *cros_ec_cec) { struct cros_ec_device *cros_ec = cros_ec_cec->cros_ec; uint32_t events = cros_ec->event_data.data.cec_events; + struct cros_ec_cec_port *port = cros_ec_cec->ports[CEC_PORT]; if (events & EC_MKBP_CEC_SEND_OK) - cec_transmit_attempt_done(cros_ec_cec->adap, - CEC_TX_STATUS_OK); + cec_transmit_attempt_done(port->adap, CEC_TX_STATUS_OK); /* FW takes care of all retries, tell core to avoid more retries */ if (events & EC_MKBP_CEC_SEND_FAILED) - cec_transmit_attempt_done(cros_ec_cec->adap, + cec_transmit_attempt_done(port->adap, CEC_TX_STATUS_MAX_RETRIES | CEC_TX_STATUS_NACK); } @@ -93,7 +113,8 @@ static int cros_ec_cec_event(struct notifier_block *nb, static int cros_ec_cec_set_log_addr(struct cec_adapter *adap, u8 logical_addr) { - struct cros_ec_cec *cros_ec_cec = adap->priv; + struct cros_ec_cec_port *port = adap->priv; + struct cros_ec_cec *cros_ec_cec = port->cros_ec_cec; struct cros_ec_device *cros_ec = cros_ec_cec->cros_ec; struct ec_params_cec_set params = { .cmd = CEC_CMD_LOGICAL_ADDRESS, @@ -115,7 +136,8 @@ static int cros_ec_cec_set_log_addr(struct cec_adapter *adap, u8 logical_addr) static int cros_ec_cec_transmit(struct cec_adapter *adap, u8 attempts, u32 signal_free_time, struct cec_msg *cec_msg) { - struct cros_ec_cec *cros_ec_cec = adap->priv; + struct cros_ec_cec_port *port = adap->priv; + struct cros_ec_cec *cros_ec_cec = port->cros_ec_cec; struct cros_ec_device *cros_ec = cros_ec_cec->cros_ec; struct ec_params_cec_write params; int ret; @@ -135,7 +157,8 @@ static int cros_ec_cec_transmit(struct cec_adapter *adap, u8 attempts, static int cros_ec_cec_adap_enable(struct cec_adapter *adap, bool enable) { - struct cros_ec_cec *cros_ec_cec = adap->priv; + struct cros_ec_cec_port *port = adap->priv; + struct cros_ec_cec *cros_ec_cec = port->cros_ec_cec; struct cros_ec_device *cros_ec = cros_ec_cec->cros_ec; struct ec_params_cec_set params = { .cmd = CEC_CMD_ENABLE, @@ -260,11 +283,55 @@ static struct device *cros_ec_cec_find_hdmi_dev(struct device *dev, #endif +static int cros_ec_cec_init_port(struct device *dev, + struct cros_ec_cec *cros_ec_cec, + int port_num, struct device *hdmi_dev, + const char *conn) +{ + struct cros_ec_cec_port *port; + int ret; + + port = devm_kzalloc(dev, sizeof(*port), GFP_KERNEL); + if (!port) + return -ENOMEM; + + port->cros_ec_cec = cros_ec_cec; + port->port_num = port_num; + + port->adap = cec_allocate_adapter(&cros_ec_cec_ops, port, DRV_NAME, + CEC_CAP_DEFAULTS | + CEC_CAP_CONNECTOR_INFO, 1); + if (IS_ERR(port->adap)) + return PTR_ERR(port->adap); + + port->notify = cec_notifier_cec_adap_register(hdmi_dev, conn, + port->adap); + if (!port->notify) { + ret = -ENOMEM; + goto out_probe_adapter; + } + + ret = cec_register_adapter(port->adap, dev); + if (ret < 0) + goto out_probe_notify; + + cros_ec_cec->ports[port_num] = port; + + return 0; + +out_probe_notify: + cec_notifier_cec_adap_unregister(port->notify, port->adap); +out_probe_adapter: + cec_delete_adapter(port->adap); + return ret; +} + static int cros_ec_cec_probe(struct platform_device *pdev) { struct cros_ec_dev *ec_dev = dev_get_drvdata(pdev->dev.parent); struct cros_ec_device *cros_ec = ec_dev->ec_dev; struct cros_ec_cec *cros_ec_cec; + struct cros_ec_cec_port *port; struct device *hdmi_dev; const char *conn = NULL; int ret; @@ -283,18 +350,13 @@ static int cros_ec_cec_probe(struct platform_device *pdev) device_init_wakeup(&pdev->dev, 1); - cros_ec_cec->adap = cec_allocate_adapter(&cros_ec_cec_ops, cros_ec_cec, - DRV_NAME, - CEC_CAP_DEFAULTS | - CEC_CAP_CONNECTOR_INFO, 1); - if (IS_ERR(cros_ec_cec->adap)) - return PTR_ERR(cros_ec_cec->adap); + cros_ec_cec->num_ports = CEC_NUM_PORTS; - cros_ec_cec->notify = cec_notifier_cec_adap_register(hdmi_dev, conn, - cros_ec_cec->adap); - if (!cros_ec_cec->notify) { - ret = -ENOMEM; - goto out_probe_adapter; + for (int i = 0; i < cros_ec_cec->num_ports; i++) { + ret = cros_ec_cec_init_port(&pdev->dev, cros_ec_cec, i, + hdmi_dev, conn); + if (ret) + goto unregister_ports; } /* Get CEC events from the EC. */ @@ -303,20 +365,24 @@ static int cros_ec_cec_probe(struct platform_device *pdev) &cros_ec_cec->notifier); if (ret) { dev_err(&pdev->dev, "failed to register notifier\n"); - goto out_probe_notify; + goto unregister_ports; } - ret = cec_register_adapter(cros_ec_cec->adap, &pdev->dev); - if (ret < 0) - goto out_probe_notify; - return 0; -out_probe_notify: - cec_notifier_cec_adap_unregister(cros_ec_cec->notify, - cros_ec_cec->adap); -out_probe_adapter: - cec_delete_adapter(cros_ec_cec->adap); +unregister_ports: + /* + * Unregister any adapters which have been registered. We don't add the + * port to the array until the adapter has been registered successfully, + * so any non-NULL ports must have been registered. + */ + for (int i = 0; i < cros_ec_cec->num_ports; i++) { + port = cros_ec_cec->ports[i]; + if (!port) + break; + cec_notifier_cec_adap_unregister(port->notify, port->adap); + cec_unregister_adapter(port->adap); + } return ret; } @@ -324,6 +390,7 @@ static void cros_ec_cec_remove(struct platform_device *pdev) { struct cros_ec_cec *cros_ec_cec = platform_get_drvdata(pdev); struct device *dev = &pdev->dev; + struct cros_ec_cec_port *port; int ret; /* @@ -337,9 +404,11 @@ static void cros_ec_cec_remove(struct platform_device *pdev) if (ret) dev_err(dev, "failed to unregister notifier\n"); - cec_notifier_cec_adap_unregister(cros_ec_cec->notify, - cros_ec_cec->adap); - cec_unregister_adapter(cros_ec_cec->adap); + for (int i = 0; i < cros_ec_cec->num_ports; i++) { + port = cros_ec_cec->ports[i]; + cec_notifier_cec_adap_unregister(port->notify, port->adap); + cec_unregister_adapter(port->adap); + } } static struct platform_driver cros_ec_cec_driver = { diff --git a/include/linux/platform_data/cros_ec_commands.h b/include/linux/platform_data/cros_ec_commands.h index ab721cf13a98..cb2ddd10a613 100644 --- a/include/linux/platform_data/cros_ec_commands.h +++ b/include/linux/platform_data/cros_ec_commands.h @@ -4436,6 +4436,8 @@ struct ec_response_i2c_passthru_protect { * These commands are for sending and receiving message via HDMI CEC */ +#define EC_CEC_MAX_PORTS 16 + #define MAX_CEC_MSG_LEN 16 /* CEC message from the AP to be written on the CEC bus */ -- cgit v1.2.3 From e90bd1fe7cda1aa267fe683e392b4433ec2dc0d3 Mon Sep 17 00:00:00 2001 From: Reka Norman Date: Fri, 25 Aug 2023 12:43:56 +1000 Subject: media: cros-ec-cec: Support multiple ports in set/get host commands Reuse the top four bits of the cmd field to specify the port number. The reason for doing this as opposed to adding a separate uint8_t field is it avoids the need to add new versions of these commands. The change is backwards compatible since these bits were previously always zero, so the default behaviour is to always operate on port 0. Signed-off-by: Reka Norman Signed-off-by: Hans Verkuil --- drivers/media/cec/platform/cros-ec/cros-ec-cec.c | 2 ++ include/linux/platform_data/cros_ec_commands.h | 8 ++++++-- 2 files changed, 8 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/drivers/media/cec/platform/cros-ec/cros-ec-cec.c b/drivers/media/cec/platform/cros-ec/cros-ec-cec.c index d76a25ae0cf1..e969031e1e0e 100644 --- a/drivers/media/cec/platform/cros-ec/cros-ec-cec.c +++ b/drivers/media/cec/platform/cros-ec/cros-ec-cec.c @@ -118,6 +118,7 @@ static int cros_ec_cec_set_log_addr(struct cec_adapter *adap, u8 logical_addr) struct cros_ec_device *cros_ec = cros_ec_cec->cros_ec; struct ec_params_cec_set params = { .cmd = CEC_CMD_LOGICAL_ADDRESS, + .port = port->port_num, .val = logical_addr, }; int ret; @@ -162,6 +163,7 @@ static int cros_ec_cec_adap_enable(struct cec_adapter *adap, bool enable) struct cros_ec_device *cros_ec = cros_ec_cec->cros_ec; struct ec_params_cec_set params = { .cmd = CEC_CMD_ENABLE, + .port = port->port_num, .val = enable, }; int ret; diff --git a/include/linux/platform_data/cros_ec_commands.h b/include/linux/platform_data/cros_ec_commands.h index cb2ddd10a613..e8bb05db360f 100644 --- a/include/linux/platform_data/cros_ec_commands.h +++ b/include/linux/platform_data/cros_ec_commands.h @@ -4457,13 +4457,15 @@ struct ec_params_cec_write { /** * struct ec_params_cec_set - CEC parameters set * @cmd: parameter type, can be CEC_CMD_ENABLE or CEC_CMD_LOGICAL_ADDRESS + * @port: CEC port to set the parameter on * @val: in case cmd is CEC_CMD_ENABLE, this field can be 0 to disable CEC * or 1 to enable CEC functionality, in case cmd is * CEC_CMD_LOGICAL_ADDRESS, this field encodes the requested logical * address between 0 and 15 or 0xff to unregister */ struct ec_params_cec_set { - uint8_t cmd; /* enum cec_command */ + uint8_t cmd : 4; /* enum cec_command */ + uint8_t port : 4; uint8_t val; } __ec_align1; @@ -4473,9 +4475,11 @@ struct ec_params_cec_set { /** * struct ec_params_cec_get - CEC parameters get * @cmd: parameter type, can be CEC_CMD_ENABLE or CEC_CMD_LOGICAL_ADDRESS + * @port: CEC port to get the parameter on */ struct ec_params_cec_get { - uint8_t cmd; /* enum cec_command */ + uint8_t cmd : 4; /* enum cec_command */ + uint8_t port : 4; } __ec_align1; /** -- cgit v1.2.3 From adbfc747ddfb48c06d238640e16939916b7a4494 Mon Sep 17 00:00:00 2001 From: Reka Norman Date: Fri, 25 Aug 2023 12:43:57 +1000 Subject: media: cros-ec-cec: Support multiple ports in write command Add a v1 of the CEC write command which contains a port parameter. Check which versions of the write command the EC supports and use the highest supported version. If it only supports v0, check that there is only one port. With v0, the EC will assume all write commands are for port 0. Signed-off-by: Reka Norman Signed-off-by: Hans Verkuil --- drivers/media/cec/platform/cros-ec/cros-ec-cec.c | 54 ++++++++++++++++++++++-- include/linux/platform_data/cros_ec_commands.h | 12 ++++++ 2 files changed, 63 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/drivers/media/cec/platform/cros-ec/cros-ec-cec.c b/drivers/media/cec/platform/cros-ec/cros-ec-cec.c index e969031e1e0e..d674a432dfdd 100644 --- a/drivers/media/cec/platform/cros-ec/cros-ec-cec.c +++ b/drivers/media/cec/platform/cros-ec/cros-ec-cec.c @@ -47,12 +47,14 @@ struct cros_ec_cec_port { * * @cros_ec: Pointer to EC device * @notifier: Notifier info for responding to EC events + * @write_cmd_version: Highest supported version of EC_CMD_CEC_WRITE_MSG. * @num_ports: Number of CEC ports * @ports: Array of ports */ struct cros_ec_cec { struct cros_ec_device *cros_ec; struct notifier_block notifier; + int write_cmd_version; int num_ports; struct cros_ec_cec_port *ports[EC_CEC_MAX_PORTS]; }; @@ -141,12 +143,22 @@ static int cros_ec_cec_transmit(struct cec_adapter *adap, u8 attempts, struct cros_ec_cec *cros_ec_cec = port->cros_ec_cec; struct cros_ec_device *cros_ec = cros_ec_cec->cros_ec; struct ec_params_cec_write params; + struct ec_params_cec_write_v1 params_v1; int ret; - memcpy(params.msg, cec_msg->msg, cec_msg->len); + if (cros_ec_cec->write_cmd_version == 0) { + memcpy(params.msg, cec_msg->msg, cec_msg->len); + ret = cros_ec_cmd(cros_ec, 0, EC_CMD_CEC_WRITE_MSG, ¶ms, + cec_msg->len, NULL, 0); + } else { + params_v1.port = port->port_num; + params_v1.msg_len = cec_msg->len; + memcpy(params_v1.msg, cec_msg->msg, cec_msg->len); + ret = cros_ec_cmd(cros_ec, cros_ec_cec->write_cmd_version, + EC_CMD_CEC_WRITE_MSG, ¶ms_v1, + sizeof(params_v1), NULL, 0); + } - ret = cros_ec_cmd(cros_ec, 0, EC_CMD_CEC_WRITE_MSG, ¶ms, - cec_msg->len, NULL, 0); if (ret < 0) { dev_err(cros_ec->dev, "error writing CEC msg on EC: %d\n", ret); @@ -285,6 +297,38 @@ static struct device *cros_ec_cec_find_hdmi_dev(struct device *dev, #endif +static int cros_ec_cec_get_write_cmd_version(struct cros_ec_cec *cros_ec_cec) +{ + struct cros_ec_device *cros_ec = cros_ec_cec->cros_ec; + struct ec_params_get_cmd_versions_v1 params = { + .cmd = EC_CMD_CEC_WRITE_MSG, + }; + struct ec_response_get_cmd_versions response; + int ret; + + ret = cros_ec_cmd(cros_ec, 1, EC_CMD_GET_CMD_VERSIONS, ¶ms, + sizeof(params), &response, sizeof(response)); + if (ret < 0) { + dev_err(cros_ec->dev, + "error getting CEC write command version: %d\n", ret); + return ret; + } + + if (response.version_mask & EC_VER_MASK(1)) { + cros_ec_cec->write_cmd_version = 1; + } else { + if (cros_ec_cec->num_ports != 1) { + dev_err(cros_ec->dev, + "v0 write command only supports 1 port, %d reported\n", + cros_ec_cec->num_ports); + return -EINVAL; + } + cros_ec_cec->write_cmd_version = 0; + } + + return 0; +} + static int cros_ec_cec_init_port(struct device *dev, struct cros_ec_cec *cros_ec_cec, int port_num, struct device *hdmi_dev, @@ -354,6 +398,10 @@ static int cros_ec_cec_probe(struct platform_device *pdev) cros_ec_cec->num_ports = CEC_NUM_PORTS; + ret = cros_ec_cec_get_write_cmd_version(cros_ec_cec); + if (ret) + return ret; + for (int i = 0; i < cros_ec_cec->num_ports; i++) { ret = cros_ec_cec_init_port(&pdev->dev, cros_ec_cec, i, hdmi_dev, conn); diff --git a/include/linux/platform_data/cros_ec_commands.h b/include/linux/platform_data/cros_ec_commands.h index e8bb05db360f..9a0c6e28f370 100644 --- a/include/linux/platform_data/cros_ec_commands.h +++ b/include/linux/platform_data/cros_ec_commands.h @@ -4451,6 +4451,18 @@ struct ec_params_cec_write { uint8_t msg[MAX_CEC_MSG_LEN]; } __ec_align1; +/** + * struct ec_params_cec_write_v1 - Message to write to the CEC bus + * @port: CEC port to write the message on + * @msg_len: length of msg in bytes + * @msg: message content to write to the CEC bus + */ +struct ec_params_cec_write_v1 { + uint8_t port; + uint8_t msg_len; + uint8_t msg[MAX_CEC_MSG_LEN]; +} __ec_align1; + /* Set various CEC parameters */ #define EC_CMD_CEC_SET 0x00BA -- cgit v1.2.3 From 1cabf52639d16428bc0d61028dcaf38e29c5f3b5 Mon Sep 17 00:00:00 2001 From: Reka Norman Date: Fri, 25 Aug 2023 12:43:58 +1000 Subject: media: cros-ec-cec: Support multiple ports in MKBP cec_events Use the top four bits of the cec_events MKBP event to store the port number. Signed-off-by: Reka Norman Signed-off-by: Hans Verkuil --- drivers/media/cec/platform/cros-ec/cros-ec-cec.c | 13 +++++++++++-- include/linux/platform_data/cros_ec_commands.h | 10 ++++++++++ 2 files changed, 21 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/drivers/media/cec/platform/cros-ec/cros-ec-cec.c b/drivers/media/cec/platform/cros-ec/cros-ec-cec.c index d674a432dfdd..18f78b7e034a 100644 --- a/drivers/media/cec/platform/cros-ec/cros-ec-cec.c +++ b/drivers/media/cec/platform/cros-ec/cros-ec-cec.c @@ -77,8 +77,17 @@ static void handle_cec_message(struct cros_ec_cec *cros_ec_cec) static void handle_cec_event(struct cros_ec_cec *cros_ec_cec) { struct cros_ec_device *cros_ec = cros_ec_cec->cros_ec; - uint32_t events = cros_ec->event_data.data.cec_events; - struct cros_ec_cec_port *port = cros_ec_cec->ports[CEC_PORT]; + uint32_t cec_events = cros_ec->event_data.data.cec_events; + uint32_t port_num = EC_MKBP_EVENT_CEC_GET_PORT(cec_events); + uint32_t events = EC_MKBP_EVENT_CEC_GET_EVENTS(cec_events); + struct cros_ec_cec_port *port; + + if (port_num >= cros_ec_cec->num_ports) { + dev_err(cros_ec->dev, + "received CEC event for invalid port %d\n", port_num); + return; + } + port = cros_ec_cec->ports[port_num]; if (events & EC_MKBP_CEC_SEND_OK) cec_transmit_attempt_done(port->adap, CEC_TX_STATUS_OK); diff --git a/include/linux/platform_data/cros_ec_commands.h b/include/linux/platform_data/cros_ec_commands.h index 9a0c6e28f370..b7e8573a8a49 100644 --- a/include/linux/platform_data/cros_ec_commands.h +++ b/include/linux/platform_data/cros_ec_commands.h @@ -4440,6 +4440,16 @@ struct ec_response_i2c_passthru_protect { #define MAX_CEC_MSG_LEN 16 +/* + * Helper macros for packing/unpacking cec_events. + * bits[27:0] : bitmask of events from enum mkbp_cec_event + * bits[31:28]: port number + */ +#define EC_MKBP_EVENT_CEC_PACK(events, port) \ + (((events) & GENMASK(27, 0)) | (((port) & 0xf) << 28)) +#define EC_MKBP_EVENT_CEC_GET_EVENTS(event) ((event) & GENMASK(27, 0)) +#define EC_MKBP_EVENT_CEC_GET_PORT(event) (((event) >> 28) & 0xf) + /* CEC message from the AP to be written on the CEC bus */ #define EC_CMD_CEC_WRITE_MSG 0x00B8 -- cgit v1.2.3 From 425d20518c54bc6d66d733fb117a9a4046932d50 Mon Sep 17 00:00:00 2001 From: Reka Norman Date: Fri, 25 Aug 2023 12:43:59 +1000 Subject: media: cros-ec-cec: Support receiving messages from multiple ports Currently, received messages are sent from the EC in the cec_message MKBP event. Since the size of ec_response_get_next_data_v1 is 16 bytes, which is also the maximum size of a CEC message, there is no space to add a port parameter. Increasing the size of ec_response_get_next_data_v1 is an option, but this would increase EC-kernel traffic for all MKBP event types. Instead, use an event to notify that data is ready, and add a new read command to read the data. For backwards compatibility with old EC firmware, continue to handle cec_message events as well. Signed-off-by: Reka Norman Signed-off-by: Hans Verkuil --- drivers/media/cec/platform/cros-ec/cros-ec-cec.c | 59 +++++++++++++++++++++--- include/linux/platform_data/cros_ec_commands.h | 23 +++++++++ 2 files changed, 76 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/drivers/media/cec/platform/cros-ec/cros-ec-cec.c b/drivers/media/cec/platform/cros-ec/cros-ec-cec.c index 18f78b7e034a..6989e63c05be 100644 --- a/drivers/media/cec/platform/cros-ec/cros-ec-cec.c +++ b/drivers/media/cec/platform/cros-ec/cros-ec-cec.c @@ -59,19 +59,63 @@ struct cros_ec_cec { struct cros_ec_cec_port *ports[EC_CEC_MAX_PORTS]; }; +static void cros_ec_cec_received_message(struct cros_ec_cec_port *port, + uint8_t *msg, uint8_t len) +{ + if (len > CEC_MAX_MSG_SIZE) + len = CEC_MAX_MSG_SIZE; + + port->rx_msg.len = len; + memcpy(port->rx_msg.msg, msg, len); + + cec_received_msg(port->adap, &port->rx_msg); +} + static void handle_cec_message(struct cros_ec_cec *cros_ec_cec) { struct cros_ec_device *cros_ec = cros_ec_cec->cros_ec; uint8_t *cec_message = cros_ec->event_data.data.cec_message; unsigned int len = cros_ec->event_size; - struct cros_ec_cec_port *port = cros_ec_cec->ports[CEC_PORT]; + struct cros_ec_cec_port *port; + /* + * There are two ways of receiving CEC messages: + * 1. Old EC firmware which only supports one port sends the data in a + * cec_message MKBP event. + * 2. New EC firmware which supports multiple ports uses + * EC_MKBP_CEC_HAVE_DATA to notify that data is ready and + * EC_CMD_CEC_READ_MSG to read it. + * Check that the EC only has one CEC port, and then we can assume the + * message is from port 0. + */ + if (cros_ec_cec->num_ports != 1) { + dev_err(cros_ec->dev, + "received cec_message on device with %d ports\n", + cros_ec_cec->num_ports); + return; + } + port = cros_ec_cec->ports[0]; - if (len > CEC_MAX_MSG_SIZE) - len = CEC_MAX_MSG_SIZE; - port->rx_msg.len = len; - memcpy(port->rx_msg.msg, cec_message, len); + cros_ec_cec_received_message(port, cec_message, len); +} - cec_received_msg(port->adap, &port->rx_msg); +static void cros_ec_cec_read_message(struct cros_ec_cec_port *port) +{ + struct cros_ec_device *cros_ec = port->cros_ec_cec->cros_ec; + struct ec_params_cec_read params = { + .port = port->port_num, + }; + struct ec_response_cec_read response; + int ret; + + ret = cros_ec_cmd(cros_ec, 0, EC_CMD_CEC_READ_MSG, ¶ms, + sizeof(params), &response, sizeof(response)); + if (ret < 0) { + dev_err(cros_ec->dev, + "error reading CEC message on EC: %d\n", ret); + return; + } + + cros_ec_cec_received_message(port, response.msg, response.msg_len); } static void handle_cec_event(struct cros_ec_cec *cros_ec_cec) @@ -97,6 +141,9 @@ static void handle_cec_event(struct cros_ec_cec *cros_ec_cec) cec_transmit_attempt_done(port->adap, CEC_TX_STATUS_MAX_RETRIES | CEC_TX_STATUS_NACK); + + if (events & EC_MKBP_CEC_HAVE_DATA) + cros_ec_cec_read_message(port); } static int cros_ec_cec_event(struct notifier_block *nb, diff --git a/include/linux/platform_data/cros_ec_commands.h b/include/linux/platform_data/cros_ec_commands.h index b7e8573a8a49..ad61c7ff0b28 100644 --- a/include/linux/platform_data/cros_ec_commands.h +++ b/include/linux/platform_data/cros_ec_commands.h @@ -4473,6 +4473,27 @@ struct ec_params_cec_write_v1 { uint8_t msg[MAX_CEC_MSG_LEN]; } __ec_align1; +/* CEC message read from a CEC bus reported back to the AP */ +#define EC_CMD_CEC_READ_MSG 0x00B9 + +/** + * struct ec_params_cec_read - Read a message from the CEC bus + * @port: CEC port to read a message on + */ +struct ec_params_cec_read { + uint8_t port; +} __ec_align1; + +/** + * struct ec_response_cec_read - Message read from the CEC bus + * @msg_len: length of msg in bytes + * @msg: message content read from the CEC bus + */ +struct ec_response_cec_read { + uint8_t msg_len; + uint8_t msg[MAX_CEC_MSG_LEN]; +} __ec_align1; + /* Set various CEC parameters */ #define EC_CMD_CEC_SET 0x00BA @@ -4529,6 +4550,8 @@ enum mkbp_cec_event { EC_MKBP_CEC_SEND_OK = BIT(0), /* Outgoing message was not acknowledged */ EC_MKBP_CEC_SEND_FAILED = BIT(1), + /* Incoming message can be read out by AP */ + EC_MKBP_CEC_HAVE_DATA = BIT(2), }; /*****************************************************************************/ -- cgit v1.2.3 From 5d227f02ceb9cc120cf04efbd77e12da182a5f62 Mon Sep 17 00:00:00 2001 From: Reka Norman Date: Fri, 25 Aug 2023 12:44:01 +1000 Subject: media: cros-ec-cec: Get number of CEC ports from EC Add a new CEC port count host command and use it to query the number of CEC ports from the EC. If the host command is not supported then it must be old EC firmware which only supports one port, so fall back to assuming one port. This patch completes support for multiple ports in cros-ec-cec. Signed-off-by: Reka Norman Signed-off-by: Hans Verkuil --- drivers/media/cec/platform/cros-ec/cros-ec-cec.c | 40 +++++++++++++++++++++--- include/linux/platform_data/cros_ec_commands.h | 11 +++++++ 2 files changed, 46 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/drivers/media/cec/platform/cros-ec/cros-ec-cec.c b/drivers/media/cec/platform/cros-ec/cros-ec-cec.c index 371699d599de..993deb85d3e2 100644 --- a/drivers/media/cec/platform/cros-ec/cros-ec-cec.c +++ b/drivers/media/cec/platform/cros-ec/cros-ec-cec.c @@ -21,10 +21,6 @@ #define DRV_NAME "cros-ec-cec" -/* Only one port is supported for now */ -#define CEC_NUM_PORTS 1 -#define CEC_PORT 0 - /** * struct cros_ec_cec_port - Driver data for a single EC CEC port * @@ -358,6 +354,38 @@ static struct device *cros_ec_cec_find_hdmi_dev(struct device *dev, #endif +static int cros_ec_cec_get_num_ports(struct cros_ec_cec *cros_ec_cec) +{ + struct ec_response_cec_port_count response; + int ret; + + ret = cros_ec_cmd(cros_ec_cec->cros_ec, 0, EC_CMD_CEC_PORT_COUNT, NULL, + 0, &response, sizeof(response)); + if (ret < 0) { + /* + * Old EC firmware only supports one port and does not support + * the port count command, so fall back to assuming one port. + */ + cros_ec_cec->num_ports = 1; + return 0; + } + + if (response.port_count == 0) { + dev_err(cros_ec_cec->cros_ec->dev, + "EC reports 0 CEC ports\n"); + return -ENODEV; + } + + if (response.port_count > EC_CEC_MAX_PORTS) { + dev_err(cros_ec_cec->cros_ec->dev, + "EC reports too many ports: %d\n", response.port_count); + return -EINVAL; + } + + cros_ec_cec->num_ports = response.port_count; + return 0; +} + static int cros_ec_cec_get_write_cmd_version(struct cros_ec_cec *cros_ec_cec) { struct cros_ec_device *cros_ec = cros_ec_cec->cros_ec; @@ -463,7 +491,9 @@ static int cros_ec_cec_probe(struct platform_device *pdev) device_init_wakeup(&pdev->dev, 1); - cros_ec_cec->num_ports = CEC_NUM_PORTS; + ret = cros_ec_cec_get_num_ports(cros_ec_cec); + if (ret) + return ret; ret = cros_ec_cec_get_write_cmd_version(cros_ec_cec); if (ret) diff --git a/include/linux/platform_data/cros_ec_commands.h b/include/linux/platform_data/cros_ec_commands.h index ad61c7ff0b28..7dae17b62a4d 100644 --- a/include/linux/platform_data/cros_ec_commands.h +++ b/include/linux/platform_data/cros_ec_commands.h @@ -4536,6 +4536,17 @@ struct ec_response_cec_get { uint8_t val; } __ec_align1; +/* Get the number of CEC ports */ +#define EC_CMD_CEC_PORT_COUNT 0x00C1 + +/** + * struct ec_response_cec_port_count - CEC port count response + * @port_count: number of CEC ports + */ +struct ec_response_cec_port_count { + uint8_t port_count; +} __ec_align1; + /* CEC parameters command */ enum cec_command { /* CEC reading, writing and events enable */ -- cgit v1.2.3 From 24775700eaa93ff83b2a0f1e005879cdf186cdd9 Mon Sep 17 00:00:00 2001 From: Muralidhara M K Date: Tue, 26 Sep 2023 05:19:32 +0000 Subject: x86/amd_nb: Add AMD Family MI300 PCI IDs Add new Root, Device 18h Function 3, and Function 4 PCI IDS for AMD F19h Model 90h-9fh (MI300A). Signed-off-by: Muralidhara M K Signed-off-by: Suma Hegde Signed-off-by: Ingo Molnar Acked-by: Bjorn Helgaas Link: https://lore.kernel.org/r/20230926051932.193239-1-suma.hegde@amd.com --- arch/x86/kernel/amd_nb.c | 5 +++++ include/linux/pci_ids.h | 1 + 2 files changed, 6 insertions(+) (limited to 'include/linux') diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c index 356de955e78d..10c2a3c9114e 100644 --- a/arch/x86/kernel/amd_nb.c +++ b/arch/x86/kernel/amd_nb.c @@ -27,6 +27,7 @@ #define PCI_DEVICE_ID_AMD_1AH_M00H_ROOT 0x153a #define PCI_DEVICE_ID_AMD_1AH_M20H_ROOT 0x1507 #define PCI_DEVICE_ID_AMD_MI200_ROOT 0x14bb +#define PCI_DEVICE_ID_AMD_MI300_ROOT 0x14f8 #define PCI_DEVICE_ID_AMD_17H_DF_F4 0x1464 #define PCI_DEVICE_ID_AMD_17H_M10H_DF_F4 0x15ec @@ -43,6 +44,7 @@ #define PCI_DEVICE_ID_AMD_19H_M78H_DF_F4 0x12fc #define PCI_DEVICE_ID_AMD_1AH_M00H_DF_F4 0x12c4 #define PCI_DEVICE_ID_AMD_MI200_DF_F4 0x14d4 +#define PCI_DEVICE_ID_AMD_MI300_DF_F4 0x152c /* Protect the PCI config register pairs used for SMN. */ static DEFINE_MUTEX(smn_mutex); @@ -62,6 +64,7 @@ static const struct pci_device_id amd_root_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_1AH_M00H_ROOT) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_1AH_M20H_ROOT) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_MI200_ROOT) }, + { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_MI300_ROOT) }, {} }; @@ -93,6 +96,7 @@ static const struct pci_device_id amd_nb_misc_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_1AH_M00H_DF_F3) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_1AH_M20H_DF_F3) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_MI200_DF_F3) }, + { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_MI300_DF_F3) }, {} }; @@ -115,6 +119,7 @@ static const struct pci_device_id amd_nb_link_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CNB17H_F4) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_1AH_M00H_DF_F4) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_MI200_DF_F4) }, + { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_MI300_DF_F4) }, {} }; diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 5fb3d4c393a9..91b457de262e 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -579,6 +579,7 @@ #define PCI_DEVICE_ID_AMD_1AH_M00H_DF_F3 0x12c3 #define PCI_DEVICE_ID_AMD_1AH_M20H_DF_F3 0x16fb #define PCI_DEVICE_ID_AMD_MI200_DF_F3 0x14d3 +#define PCI_DEVICE_ID_AMD_MI300_DF_F3 0x152b #define PCI_DEVICE_ID_AMD_CNB17H_F3 0x1703 #define PCI_DEVICE_ID_AMD_LANCE 0x2000 #define PCI_DEVICE_ID_AMD_LANCE_HOME 0x2001 -- cgit v1.2.3 From 2d5780bbef8dbe6375d481cbea212606a80e4453 Mon Sep 17 00:00:00 2001 From: Petr Tesarik Date: Tue, 26 Sep 2023 20:55:56 +0200 Subject: swiotlb: fix the check whether a device has used software IO TLB When CONFIG_SWIOTLB_DYNAMIC=y, devices which do not use the software IO TLB can avoid swiotlb lookup. A flag is added by commit 1395706a1490 ("swiotlb: search the software IO TLB only if the device makes use of it"), the flag is correctly set, but it is then never checked. Add the actual check here. Note that this code is an alternative to the default pool check, not an additional check, because: 1. swiotlb_find_pool() also searches the default pool; 2. if dma_uses_io_tlb is false, the default swiotlb pool is not used. Tested in a KVM guest against a QEMU RAM-backed SATA disk over virtio and *not* using software IO TLB, this patch increases IOPS by approx 2% for 4-way parallel I/O. The write memory barrier in swiotlb_dyn_alloc() is not needed, because a newly allocated pool must always be observed by swiotlb_find_slots() before an address from that pool is passed to is_swiotlb_buffer(). Correctness was verified using the following litmus test: C swiotlb-new-pool (* * Result: Never * * Check that a newly allocated pool is always visible when the * corresponding swiotlb buffer is visible. *) { mem_pools = default; } P0(int **mem_pools, int *pool) { /* add_mem_pool() */ WRITE_ONCE(*pool, 999); rcu_assign_pointer(*mem_pools, pool); } P1(int **mem_pools, int *flag, int *buf) { /* swiotlb_find_slots() */ int *r0; int r1; rcu_read_lock(); r0 = READ_ONCE(*mem_pools); r1 = READ_ONCE(*r0); rcu_read_unlock(); if (r1) { WRITE_ONCE(*flag, 1); smp_mb(); } /* device driver (presumed) */ WRITE_ONCE(*buf, r1); } P2(int **mem_pools, int *flag, int *buf) { /* device driver (presumed) */ int r0 = READ_ONCE(*buf); /* is_swiotlb_buffer() */ int r1; int *r2; int r3; smp_rmb(); r1 = READ_ONCE(*flag); if (r1) { /* swiotlb_find_pool() */ rcu_read_lock(); r2 = READ_ONCE(*mem_pools); r3 = READ_ONCE(*r2); rcu_read_unlock(); } } exists (2:r0<>0 /\ 2:r3=0) (* Not found. *) Fixes: 1395706a1490 ("swiotlb: search the software IO TLB only if the device makes use of it") Reported-by: Jonathan Corbet Closes: https://lore.kernel.org/linux-iommu/87a5uz3ob8.fsf@meer.lwn.net/ Signed-off-by: Petr Tesarik Reviewed-by: Catalin Marinas Signed-off-by: Christoph Hellwig --- include/linux/swiotlb.h | 23 ++++++++++++++++------- kernel/dma/swiotlb.c | 26 ++++++++++++++++++++------ 2 files changed, 36 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index b4536626f8ff..ecde0312dd52 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -172,14 +172,23 @@ static inline bool is_swiotlb_buffer(struct device *dev, phys_addr_t paddr) if (!mem) return false; - if (IS_ENABLED(CONFIG_SWIOTLB_DYNAMIC)) { - /* Pairs with smp_wmb() in swiotlb_find_slots() and - * swiotlb_dyn_alloc(), which modify the RCU lists. - */ - smp_rmb(); - return swiotlb_find_pool(dev, paddr); - } +#ifdef CONFIG_SWIOTLB_DYNAMIC + /* + * All SWIOTLB buffer addresses must have been returned by + * swiotlb_tbl_map_single() and passed to a device driver. + * If a SWIOTLB address is checked on another CPU, then it was + * presumably loaded by the device driver from an unspecified private + * data structure. Make sure that this load is ordered before reading + * dev->dma_uses_io_tlb here and mem->pools in swiotlb_find_pool(). + * + * This barrier pairs with smp_mb() in swiotlb_find_slots(). + */ + smp_rmb(); + return READ_ONCE(dev->dma_uses_io_tlb) && + swiotlb_find_pool(dev, paddr); +#else return paddr >= mem->defpool.start && paddr < mem->defpool.end; +#endif } static inline bool is_swiotlb_force_bounce(struct device *dev) diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c index 85dd94323b98..01637677736f 100644 --- a/kernel/dma/swiotlb.c +++ b/kernel/dma/swiotlb.c @@ -728,9 +728,6 @@ static void swiotlb_dyn_alloc(struct work_struct *work) } add_mem_pool(mem, pool); - - /* Pairs with smp_rmb() in is_swiotlb_buffer(). */ - smp_wmb(); } /** @@ -1151,9 +1148,26 @@ static int swiotlb_find_slots(struct device *dev, phys_addr_t orig_addr, spin_unlock_irqrestore(&dev->dma_io_tlb_lock, flags); found: - dev->dma_uses_io_tlb = true; - /* Pairs with smp_rmb() in is_swiotlb_buffer() */ - smp_wmb(); + WRITE_ONCE(dev->dma_uses_io_tlb, true); + + /* + * The general barrier orders reads and writes against a presumed store + * of the SWIOTLB buffer address by a device driver (to a driver private + * data structure). It serves two purposes. + * + * First, the store to dev->dma_uses_io_tlb must be ordered before the + * presumed store. This guarantees that the returned buffer address + * cannot be passed to another CPU before updating dev->dma_uses_io_tlb. + * + * Second, the load from mem->pools must be ordered before the same + * presumed store. This guarantees that the returned buffer address + * cannot be observed by another CPU before an update of the RCU list + * that was made by swiotlb_dyn_alloc() on a third CPU (cf. multicopy + * atomicity). + * + * See also the comment in is_swiotlb_buffer(). + */ + smp_mb(); *retpool = pool; return index; -- cgit v1.2.3 From 2e2b547950bc09e75afe912f9683be39c2195d9d Mon Sep 17 00:00:00 2001 From: Wenchao Chen Date: Tue, 19 Sep 2023 15:47:06 +0800 Subject: mmc: core: Allow dynamical updates of the number of requests for hsq To allow dynamical updates of the current number of used in-flight requests, let's move away from using a hard-coded value to a use a corresponding variable in the struct mmc_host. This can be valuable when optimizing for certain I/O request sequences, as shown by subsequent changes. Signed-off-by: Wenchao Chen Link: https://lore.kernel.org/r/20230919074707.25517-2-wenchao.chen@unisoc.com [Ulf: Re-wrote the commitmsg to clarify the change] Signed-off-by: Ulf Hansson --- drivers/mmc/core/queue.c | 6 +----- drivers/mmc/host/mmc_hsq.c | 1 + drivers/mmc/host/mmc_hsq.h | 6 ++++++ include/linux/mmc/host.h | 1 + 4 files changed, 9 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/drivers/mmc/core/queue.c b/drivers/mmc/core/queue.c index b396e3900717..a0a2412f62a7 100644 --- a/drivers/mmc/core/queue.c +++ b/drivers/mmc/core/queue.c @@ -260,11 +260,7 @@ static blk_status_t mmc_mq_queue_rq(struct blk_mq_hw_ctx *hctx, } break; case MMC_ISSUE_ASYNC: - /* - * For MMC host software queue, we only allow 2 requests in - * flight to avoid a long latency. - */ - if (host->hsq_enabled && mq->in_flight[issue_type] > 2) { + if (host->hsq_enabled && mq->in_flight[issue_type] > host->hsq_depth) { spin_unlock_irq(&mq->lock); return BLK_STS_RESOURCE; } diff --git a/drivers/mmc/host/mmc_hsq.c b/drivers/mmc/host/mmc_hsq.c index 424dc7b07858..8556cacb21a1 100644 --- a/drivers/mmc/host/mmc_hsq.c +++ b/drivers/mmc/host/mmc_hsq.c @@ -337,6 +337,7 @@ int mmc_hsq_init(struct mmc_hsq *hsq, struct mmc_host *mmc) hsq->mmc = mmc; hsq->mmc->cqe_private = hsq; mmc->cqe_ops = &mmc_hsq_ops; + mmc->hsq_depth = HSQ_NORMAL_DEPTH; for (i = 0; i < HSQ_NUM_SLOTS; i++) hsq->tag_slot[i] = HSQ_INVALID_TAG; diff --git a/drivers/mmc/host/mmc_hsq.h b/drivers/mmc/host/mmc_hsq.h index 1808024fc6c5..aa5c4543b55f 100644 --- a/drivers/mmc/host/mmc_hsq.h +++ b/drivers/mmc/host/mmc_hsq.h @@ -5,6 +5,12 @@ #define HSQ_NUM_SLOTS 64 #define HSQ_INVALID_TAG HSQ_NUM_SLOTS +/* + * For MMC host software queue, we only allow 2 requests in + * flight to avoid a long latency. + */ +#define HSQ_NORMAL_DEPTH 2 + struct hsq_slot { struct mmc_request *mrq; }; diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index 62a6847a3b6f..2f445c651742 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -526,6 +526,7 @@ struct mmc_host { /* Host Software Queue support */ bool hsq_enabled; + int hsq_depth; u32 err_stats[MMC_ERR_MAX]; unsigned long private[] ____cacheline_aligned; -- cgit v1.2.3 From 1a6a464774947920dcedcf7409be62495c7cedd0 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 12 Sep 2023 12:44:06 +0200 Subject: timers: Tag (hr)timer softirq as hotplug safe Specific stress involving frequent CPU-hotplug operations, such as running rcutorture for example, may trigger the following message: NOHZ tick-stop error: local softirq work is pending, handler #02!!!" This happens in the CPU-down hotplug process, after CPUHP_AP_SMPBOOT_THREADS whose teardown callback parks ksoftirqd, and before the target CPU shuts down through CPUHP_AP_IDLE_DEAD. In this fragile intermediate state, softirqs waiting for threaded handling may be forever ignored and eventually reported by the idle task as in the above example. However some vectors are known to be safe as long as the corresponding subsystems have teardown callbacks handling the migration of their events. The above error message reports pending timers softirq although this vector can be considered as hotplug safe because the CPUHP_TIMERS_PREPARE teardown callback performs the necessary migration of timers after the death of the CPU. Hrtimers also have a similar hotplug handling. Therefore this error message, as far as (hr-)timers are concerned, can be considered spurious and the relevant softirq vectors can be marked as hotplug safe. Fixes: 0345691b24c0 ("tick/rcu: Stop allowing RCU_SOFTIRQ in idle") Signed-off-by: Frederic Weisbecker Signed-off-by: Thomas Gleixner Reviewed-by: Joel Fernandes (Google) Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20230912104406.312185-6-frederic@kernel.org --- include/linux/interrupt.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index a92bce40b04b..4a1dc88ddbff 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -569,8 +569,12 @@ enum * 2) rcu_report_dead() reports the final quiescent states. * * _ IRQ_POLL: irq_poll_cpu_dead() migrates the queue + * + * _ (HR)TIMER_SOFTIRQ: (hr)timers_dead_cpu() migrates the queue */ -#define SOFTIRQ_HOTPLUG_SAFE_MASK (BIT(RCU_SOFTIRQ) | BIT(IRQ_POLL_SOFTIRQ)) +#define SOFTIRQ_HOTPLUG_SAFE_MASK (BIT(TIMER_SOFTIRQ) | BIT(IRQ_POLL_SOFTIRQ) |\ + BIT(HRTIMER_SOFTIRQ) | BIT(RCU_SOFTIRQ)) + /* map softirq index to softirq name. update 'softirq_to_name' in * kernel/softirq.c when adding a new softirq. -- cgit v1.2.3 From c02a427f7b64ed5b840a0720a6cee5a17a1e7e07 Mon Sep 17 00:00:00 2001 From: Xueshi Hu Date: Tue, 12 Sep 2023 12:44:05 +0200 Subject: tick/nohz: Remove unused tick_nohz_idle_stop_tick_protected() All the caller has been removed since commit 336f560a8917 ("x86/xen: don't let xen_pv_play_dead() return") Signed-off-by: Xueshi Hu Signed-off-by: Frederic Weisbecker Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20230912104406.312185-5-frederic@kernel.org --- include/linux/tick.h | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tick.h b/include/linux/tick.h index 9459fef5b857..716d17f31c45 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -140,14 +140,6 @@ extern unsigned long tick_nohz_get_idle_calls(void); extern unsigned long tick_nohz_get_idle_calls_cpu(int cpu); extern u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time); extern u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time); - -static inline void tick_nohz_idle_stop_tick_protected(void) -{ - local_irq_disable(); - tick_nohz_idle_stop_tick(); - local_irq_enable(); -} - #else /* !CONFIG_NO_HZ_COMMON */ #define tick_nohz_enabled (0) static inline int tick_nohz_tick_stopped(void) { return 0; } @@ -170,8 +162,6 @@ static inline ktime_t tick_nohz_get_sleep_length(ktime_t *delta_next) } static inline u64 get_cpu_idle_time_us(int cpu, u64 *unused) { return -1; } static inline u64 get_cpu_iowait_time_us(int cpu, u64 *unused) { return -1; } - -static inline void tick_nohz_idle_stop_tick_protected(void) { } #endif /* !CONFIG_NO_HZ_COMMON */ #ifdef CONFIG_NO_HZ_FULL -- cgit v1.2.3 From 6260ecd04594360ae2af104fb2641317728a66e4 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 22 Sep 2023 10:51:27 -0700 Subject: irqdomain: Annotate struct irq_domain with __counted_by Prepare for the coming implementation by GCC and Clang of the __counted_by attribute. Flexible array members annotated with __counted_by can have their accesses bounds-checked at run-time checking via CONFIG_UBSAN_BOUNDS (for array indexing) and CONFIG_FORTIFY_SOURCE (for strcpy/memcpy-family functions). As found with Coccinelle[1], add __counted_by for struct irq_domain. [1] https://github.com/kees/kernel-tools/blob/trunk/coccinelle/examples/counted_by.cocci Signed-off-by: Kees Cook Signed-off-by: Thomas Gleixner Reviewed-by: Gustavo A. R. Silva Link: https://lore.kernel.org/r/20230922175127.work.214-kees@kernel.org --- include/linux/irqdomain.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index 51c254b7fec2..ee0a82c60508 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -174,7 +174,7 @@ struct irq_domain { irq_hw_number_t hwirq_max; unsigned int revmap_size; struct radix_tree_root revmap_tree; - struct irq_data __rcu *revmap[]; + struct irq_data __rcu *revmap[] __counted_by(revmap_size); }; /* Irq domain flags */ -- cgit v1.2.3 From d069ed6b752f91cea6341a9c60be42837678a7f5 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 21 Sep 2023 20:01:43 +0200 Subject: thermal: core: Allow trip pointers to be used for cooling device binding Add new helper functions, thermal_bind_cdev_to_trip() and thermal_unbind_cdev_from_trip(), to allow a trip pointer to be used for binding a cooling device to a trip point and unbinding it, respectively, and redefine the existing helpers, thermal_zone_bind_cooling_device() and thermal_zone_unbind_cooling_device(), as wrappers around the new ones, respectively. No intentional functional impact. Signed-off-by: Rafael J. Wysocki Reviewed-by: Daniel Lezcano --- drivers/thermal/thermal_core.c | 54 ++++++++++++++++++++++++++---------------- include/linux/thermal.h | 8 +++++++ 2 files changed, 42 insertions(+), 20 deletions(-) (limited to 'include/linux') diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c index 31fe14a96d13..45d0aa0b69b7 100644 --- a/drivers/thermal/thermal_core.c +++ b/drivers/thermal/thermal_core.c @@ -600,10 +600,9 @@ struct thermal_zone_device *thermal_zone_get_by_id(int id) */ /** - * thermal_zone_bind_cooling_device() - bind a cooling device to a thermal zone + * thermal_bind_cdev_to_trip - bind a cooling device to a thermal zone * @tz: pointer to struct thermal_zone_device - * @trip_index: indicates which trip point the cooling devices is - * associated with in this thermal zone. + * @trip: trip point the cooling devices is associated with in this zone. * @cdev: pointer to struct thermal_cooling_device * @upper: the Maximum cooling state for this trip point. * THERMAL_NO_LIMIT means no upper limit, @@ -621,8 +620,8 @@ struct thermal_zone_device *thermal_zone_get_by_id(int id) * * Return: 0 on success, the proper error value otherwise. */ -int thermal_zone_bind_cooling_device(struct thermal_zone_device *tz, - int trip_index, +int thermal_bind_cdev_to_trip(struct thermal_zone_device *tz, + const struct thermal_trip *trip, struct thermal_cooling_device *cdev, unsigned long upper, unsigned long lower, unsigned int weight) @@ -631,15 +630,9 @@ int thermal_zone_bind_cooling_device(struct thermal_zone_device *tz, struct thermal_instance *pos; struct thermal_zone_device *pos1; struct thermal_cooling_device *pos2; - const struct thermal_trip *trip; bool upper_no_limit; int result; - if (trip_index >= tz->num_trips || trip_index < 0) - return -EINVAL; - - trip = &tz->trips[trip_index]; - list_for_each_entry(pos1, &thermal_tz_list, node) { if (pos1 == tz) break; @@ -736,14 +729,26 @@ free_mem: kfree(dev); return result; } +EXPORT_SYMBOL_GPL(thermal_bind_cdev_to_trip); + +int thermal_zone_bind_cooling_device(struct thermal_zone_device *tz, + int trip_index, + struct thermal_cooling_device *cdev, + unsigned long upper, unsigned long lower, + unsigned int weight) +{ + if (trip_index < 0 || trip_index >= tz->num_trips) + return -EINVAL; + + return thermal_bind_cdev_to_trip(tz, &tz->trips[trip_index], cdev, + upper, lower, weight); +} EXPORT_SYMBOL_GPL(thermal_zone_bind_cooling_device); /** - * thermal_zone_unbind_cooling_device() - unbind a cooling device from a - * thermal zone. + * thermal_unbind_cdev_from_trip - unbind a cooling device from a thermal zone. * @tz: pointer to a struct thermal_zone_device. - * @trip_index: indicates which trip point the cooling devices is - * associated with in this thermal zone. + * @trip: trip point the cooling devices is associated with in this zone. * @cdev: pointer to a struct thermal_cooling_device. * * This interface function unbind a thermal cooling device from the certain @@ -752,16 +757,14 @@ EXPORT_SYMBOL_GPL(thermal_zone_bind_cooling_device); * * Return: 0 on success, the proper error value otherwise. */ -int thermal_zone_unbind_cooling_device(struct thermal_zone_device *tz, - int trip_index, - struct thermal_cooling_device *cdev) +int thermal_unbind_cdev_from_trip(struct thermal_zone_device *tz, + const struct thermal_trip *trip, + struct thermal_cooling_device *cdev) { struct thermal_instance *pos, *next; - const struct thermal_trip *trip; mutex_lock(&tz->lock); mutex_lock(&cdev->lock); - trip = &tz->trips[trip_index]; list_for_each_entry_safe(pos, next, &tz->thermal_instances, tz_node) { if (pos->tz == tz && pos->trip == trip && pos->cdev == cdev) { list_del(&pos->tz_node); @@ -784,6 +787,17 @@ unbind: kfree(pos); return 0; } +EXPORT_SYMBOL_GPL(thermal_unbind_cdev_from_trip); + +int thermal_zone_unbind_cooling_device(struct thermal_zone_device *tz, + int trip_index, + struct thermal_cooling_device *cdev) +{ + if (trip_index < 0 || trip_index >= tz->num_trips) + return -EINVAL; + + return thermal_unbind_cdev_from_trip(tz, &tz->trips[trip_index], cdev); +} EXPORT_SYMBOL_GPL(thermal_zone_unbind_cooling_device); static void thermal_release(struct device *dev) diff --git a/include/linux/thermal.h b/include/linux/thermal.h index 6cfcae22ba12..6710a4ace992 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -320,10 +320,18 @@ const char *thermal_zone_device_type(struct thermal_zone_device *tzd); int thermal_zone_device_id(struct thermal_zone_device *tzd); struct device *thermal_zone_device(struct thermal_zone_device *tzd); +int thermal_bind_cdev_to_trip(struct thermal_zone_device *tz, + const struct thermal_trip *trip, + struct thermal_cooling_device *cdev, + unsigned long upper, unsigned long lower, + unsigned int weight); int thermal_zone_bind_cooling_device(struct thermal_zone_device *, int, struct thermal_cooling_device *, unsigned long, unsigned long, unsigned int); +int thermal_unbind_cdev_from_trip(struct thermal_zone_device *tz, + const struct thermal_trip *trip, + struct thermal_cooling_device *cdev); int thermal_zone_unbind_cooling_device(struct thermal_zone_device *, int, struct thermal_cooling_device *); void thermal_zone_device_update(struct thermal_zone_device *, -- cgit v1.2.3 From 5aa4c9608d2d5fea29e211a80c29696f7d94e9f7 Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Mon, 11 Sep 2023 12:38:48 +0300 Subject: net/mlx5: Introduce ifc bits for migration in a chunk mode Introduce ifc related stuff to enable migration in a chunk mode. Signed-off-by: Yishai Hadas Link: https://lore.kernel.org/r/20230911093856.81910-2-yishaih@nvidia.com Reviewed-by: Jason Gunthorpe Signed-off-by: Leon Romanovsky --- include/linux/mlx5/mlx5_ifc.h | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index fc3db401f8a2..3265bfcb3156 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1948,7 +1948,9 @@ struct mlx5_ifc_cmd_hca_cap_2_bits { u8 reserved_at_c0[0x8]; u8 migration_multi_load[0x1]; u8 migration_tracking_state[0x1]; - u8 reserved_at_ca[0x16]; + u8 reserved_at_ca[0x6]; + u8 migration_in_chunks[0x1]; + u8 reserved_at_d1[0xf]; u8 reserved_at_e0[0xc0]; @@ -12392,7 +12394,8 @@ struct mlx5_ifc_query_vhca_migration_state_in_bits { u8 op_mod[0x10]; u8 incremental[0x1]; - u8 reserved_at_41[0xf]; + u8 chunk[0x1]; + u8 reserved_at_42[0xe]; u8 vhca_id[0x10]; u8 reserved_at_60[0x20]; @@ -12408,7 +12411,11 @@ struct mlx5_ifc_query_vhca_migration_state_out_bits { u8 required_umem_size[0x20]; - u8 reserved_at_a0[0x160]; + u8 reserved_at_a0[0x20]; + + u8 remaining_total_size[0x40]; + + u8 reserved_at_100[0x100]; }; struct mlx5_ifc_save_vhca_state_in_bits { @@ -12440,7 +12447,7 @@ struct mlx5_ifc_save_vhca_state_out_bits { u8 actual_image_size[0x20]; - u8 reserved_at_60[0x20]; + u8 next_required_umem_size[0x20]; }; struct mlx5_ifc_load_vhca_state_in_bits { -- cgit v1.2.3 From fb99ef17865035a6657786d4b2af11a27ba23f9b Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Fri, 25 Aug 2023 15:41:14 +0900 Subject: ata: libata-scsi: link ata port and scsi device There is no direct device ancestry defined between an ata_device and its scsi device which prevents the power management code from correctly ordering suspend and resume operations. Create such ancestry with the ata device as the parent to ensure that the scsi device (child) is suspended before the ata device and that resume handles the ata device before the scsi device. The parent-child (supplier-consumer) relationship is established between the ata_port (parent) and the scsi device (child) with the function device_add_link(). The parent used is not the ata_device as the PM operations are defined per port and the status of all devices connected through that port is controlled from the port operations. The device link is established with the new function ata_scsi_slave_alloc(), and this function is used to define the ->slave_alloc callback of the scsi host template of all ata drivers. Fixes: a19a93e4c6a9 ("scsi: core: pm: Rely on the device driver core for async power management") Cc: stable@vger.kernel.org Signed-off-by: Damien Le Moal Reviewed-by: Hannes Reinecke Reviewed-by: Niklas Cassel Tested-by: Geert Uytterhoeven Reviewed-by: Martin K. Petersen Reviewed-by: John Garry --- drivers/ata/libata-scsi.c | 45 ++++++++++++++++++++++++++++++++++++++++----- include/linux/libata.h | 2 ++ 2 files changed, 42 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index fb73c145b49a..8b43290ca2cd 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -1089,6 +1089,42 @@ int ata_scsi_dev_config(struct scsi_device *sdev, struct ata_device *dev) return 0; } +/** + * ata_scsi_slave_alloc - Early setup of SCSI device + * @sdev: SCSI device to examine + * + * This is called from scsi_alloc_sdev() when the scsi device + * associated with an ATA device is scanned on a port. + * + * LOCKING: + * Defined by SCSI layer. We don't really care. + */ + +int ata_scsi_slave_alloc(struct scsi_device *sdev) +{ + struct ata_port *ap = ata_shost_to_port(sdev->host); + struct device_link *link; + + ata_scsi_sdev_config(sdev); + + /* + * Create a link from the ata_port device to the scsi device to ensure + * that PM does suspend/resume in the correct order: the scsi device is + * consumer (child) and the ata port the supplier (parent). + */ + link = device_link_add(&sdev->sdev_gendev, &ap->tdev, + DL_FLAG_STATELESS | + DL_FLAG_PM_RUNTIME | DL_FLAG_RPM_ACTIVE); + if (!link) { + ata_port_err(ap, "Failed to create link to scsi device %s\n", + dev_name(&sdev->sdev_gendev)); + return -ENODEV; + } + + return 0; +} +EXPORT_SYMBOL_GPL(ata_scsi_slave_alloc); + /** * ata_scsi_slave_config - Set SCSI device attributes * @sdev: SCSI device to examine @@ -1105,14 +1141,11 @@ int ata_scsi_slave_config(struct scsi_device *sdev) { struct ata_port *ap = ata_shost_to_port(sdev->host); struct ata_device *dev = __ata_scsi_find_dev(ap, sdev); - int rc = 0; - - ata_scsi_sdev_config(sdev); if (dev) - rc = ata_scsi_dev_config(sdev, dev); + return ata_scsi_dev_config(sdev, dev); - return rc; + return 0; } EXPORT_SYMBOL_GPL(ata_scsi_slave_config); @@ -1136,6 +1169,8 @@ void ata_scsi_slave_destroy(struct scsi_device *sdev) unsigned long flags; struct ata_device *dev; + device_link_remove(&sdev->sdev_gendev, &ap->tdev); + spin_lock_irqsave(ap->lock, flags); dev = __ata_scsi_find_dev(ap, sdev); if (dev && dev->sdev) { diff --git a/include/linux/libata.h b/include/linux/libata.h index 84aca8c44fa3..3ce1ab408114 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -1148,6 +1148,7 @@ extern int ata_std_bios_param(struct scsi_device *sdev, struct block_device *bdev, sector_t capacity, int geom[]); extern void ata_scsi_unlock_native_capacity(struct scsi_device *sdev); +extern int ata_scsi_slave_alloc(struct scsi_device *sdev); extern int ata_scsi_slave_config(struct scsi_device *sdev); extern void ata_scsi_slave_destroy(struct scsi_device *sdev); extern int ata_scsi_change_queue_depth(struct scsi_device *sdev, @@ -1396,6 +1397,7 @@ extern const struct attribute_group *ata_common_sdev_groups[]; .this_id = ATA_SHT_THIS_ID, \ .emulated = ATA_SHT_EMULATED, \ .proc_name = drv_name, \ + .slave_alloc = ata_scsi_slave_alloc, \ .slave_destroy = ata_scsi_slave_destroy, \ .bios_param = ata_std_bios_param, \ .unlock_native_capacity = ata_scsi_unlock_native_capacity,\ -- cgit v1.2.3 From aa3998dbeb3abce63653b7f6d4542e7dcd022590 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Sat, 26 Aug 2023 09:43:39 +0900 Subject: ata: libata-scsi: Disable scsi device manage_system_start_stop The introduction of a device link to create a consumer/supplier relationship between the scsi device of an ATA device and the ATA port of that ATA device fixes the ordering of system suspend and resume operations. For suspend, the scsi device is suspended first and the ata port after it. This is fine as this allows the synchronize cache and START STOP UNIT commands issued by the scsi disk driver to be executed before the ata port is disabled. For resume operations, the ata port is resumed first, followed by the scsi device. This allows having the request queue of the scsi device to be unfrozen after the ata port resume is scheduled in EH, thus avoiding to see new requests prematurely issued to the ATA device. Since libata sets manage_system_start_stop to 1, the scsi disk resume operation also results in issuing a START STOP UNIT command to the device being resumed so that the device exits standby power mode. However, restoring the ATA device to the active power mode must be synchronized with libata EH processing of the port resume operation to avoid either 1) seeing the start stop unit command being received too early when the port is not yet resumed and ready to accept commands, or after the port resume process issues commands such as IDENTIFY to revalidate the device. In this last case, the risk is that the device revalidation fails with timeout errors as the drive is still spun down. Commit 0a8589055936 ("ata,scsi: do not issue START STOP UNIT on resume") disabled issuing the START STOP UNIT command to avoid issues with it. But this is incorrect as transitioning a device to the active power mode from the standby power mode set on suspend requires a media access command. The IDENTIFY, READ LOG and SET FEATURES commands executed in libata EH context triggered by the ata port resume operation may thus fail. Fix these synchronization issues is by handling a device power mode transitions for system suspend and resume directly in libata EH context, without relying on the scsi disk driver management triggered with the manage_system_start_stop flag. To do this, the following libata helper functions are introduced: 1) ata_dev_power_set_standby(): This function issues a STANDBY IMMEDIATE command to transitiom a device to the standby power mode. For HDDs, this spins down the disks. This function applies only to ATA and ZAC devices and does nothing otherwise. This function also does nothing for devices that have the ATA_FLAG_NO_POWEROFF_SPINDOWN or ATA_FLAG_NO_HIBERNATE_SPINDOWN flag set. For suspend, call ata_dev_power_set_standby() in ata_eh_handle_port_suspend() before the port is disabled and frozen. ata_eh_unload() is also modified to transition all enabled devices to the standby power mode when the system is shutdown or devices removed. 2) ata_dev_power_set_active() and This function applies to ATA or ZAC devices and issues a VERIFY command for 1 sector at LBA 0 to transition the device to the active power mode. For HDDs, since this function will complete only once the disk spin up. Its execution uses the same timeouts as for reset, to give the drive enough time to complete spinup without triggering a command timeout. For resume, call ata_dev_power_set_active() in ata_eh_revalidate_and_attach() after the port has been enabled and before any other command is issued to the device. With these changes, the manage_system_start_stop and no_start_on_resume scsi device flags do not need to be set in ata_scsi_dev_config(). The flag manage_runtime_start_stop is still set to allow the sd driver to spinup/spindown a disk through the sd runtime operations. Fixes: 0a8589055936 ("ata,scsi: do not issue START STOP UNIT on resume") Cc: stable@vger.kernel.org Signed-off-by: Damien Le Moal Reviewed-by: Hannes Reinecke Tested-by: Geert Uytterhoeven Reviewed-by: Martin K. Petersen --- drivers/ata/libata-core.c | 90 +++++++++++++++++++++++++++++++++++++++++++++++ drivers/ata/libata-eh.c | 46 +++++++++++++++++++++++- drivers/ata/libata-scsi.c | 16 ++++----- drivers/ata/libata.h | 2 ++ include/linux/libata.h | 6 ++-- 5 files changed, 148 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 8e35afe5e560..a0bc01606b30 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -1972,6 +1972,96 @@ retry: return rc; } +/** + * ata_dev_power_set_standby - Set a device power mode to standby + * @dev: target device + * + * Issue a STANDBY IMMEDIATE command to set a device power mode to standby. + * For an HDD device, this spins down the disks. + * + * LOCKING: + * Kernel thread context (may sleep). + */ +void ata_dev_power_set_standby(struct ata_device *dev) +{ + unsigned long ap_flags = dev->link->ap->flags; + struct ata_taskfile tf; + unsigned int err_mask; + + /* Issue STANDBY IMMEDIATE command only if supported by the device */ + if (dev->class != ATA_DEV_ATA && dev->class != ATA_DEV_ZAC) + return; + + /* + * Some odd clown BIOSes issue spindown on power off (ACPI S4 or S5) + * causing some drives to spin up and down again. For these, do nothing + * if we are being called on shutdown. + */ + if ((ap_flags & ATA_FLAG_NO_POWEROFF_SPINDOWN) && + system_state == SYSTEM_POWER_OFF) + return; + + if ((ap_flags & ATA_FLAG_NO_HIBERNATE_SPINDOWN) && + system_entering_hibernation()) + return; + + ata_tf_init(dev, &tf); + tf.flags |= ATA_TFLAG_DEVICE | ATA_TFLAG_ISADDR; + tf.protocol = ATA_PROT_NODATA; + tf.command = ATA_CMD_STANDBYNOW1; + + ata_dev_notice(dev, "Entering standby power mode\n"); + + err_mask = ata_exec_internal(dev, &tf, NULL, DMA_NONE, NULL, 0, 0); + if (err_mask) + ata_dev_err(dev, "STANDBY IMMEDIATE failed (err_mask=0x%x)\n", + err_mask); +} + +/** + * ata_dev_power_set_active - Set a device power mode to active + * @dev: target device + * + * Issue a VERIFY command to enter to ensure that the device is in the + * active power mode. For a spun-down HDD (standby or idle power mode), + * the VERIFY command will complete after the disk spins up. + * + * LOCKING: + * Kernel thread context (may sleep). + */ +void ata_dev_power_set_active(struct ata_device *dev) +{ + struct ata_taskfile tf; + unsigned int err_mask; + + /* + * Issue READ VERIFY SECTORS command for 1 sector at lba=0 only + * if supported by the device. + */ + if (dev->class != ATA_DEV_ATA && dev->class != ATA_DEV_ZAC) + return; + + ata_tf_init(dev, &tf); + tf.flags |= ATA_TFLAG_DEVICE | ATA_TFLAG_ISADDR; + tf.protocol = ATA_PROT_NODATA; + tf.command = ATA_CMD_VERIFY; + tf.nsect = 1; + if (dev->flags & ATA_DFLAG_LBA) { + tf.flags |= ATA_TFLAG_LBA; + tf.device |= ATA_LBA; + } else { + /* CHS */ + tf.lbal = 0x1; /* sect */ + } + + ata_dev_notice(dev, "Entering active power mode\n"); + + err_mask = ata_exec_internal(dev, &tf, NULL, DMA_NONE, NULL, 0, 0); + if (err_mask) + ata_dev_err(dev, "VERIFY failed (err_mask=0x%x)\n", + err_mask); +} + /** * ata_read_log_page - read a specific log page * @dev: target device diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c index 4cf4f57e57b8..b1b2c276371e 100644 --- a/drivers/ata/libata-eh.c +++ b/drivers/ata/libata-eh.c @@ -147,6 +147,8 @@ ata_eh_cmd_timeout_table[ATA_EH_CMD_TIMEOUT_TABLE_SIZE] = { .timeouts = ata_eh_other_timeouts, }, { .commands = CMDS(ATA_CMD_FLUSH, ATA_CMD_FLUSH_EXT), .timeouts = ata_eh_flush_timeouts }, + { .commands = CMDS(ATA_CMD_VERIFY), + .timeouts = ata_eh_reset_timeouts }, }; #undef CMDS @@ -498,7 +500,19 @@ static void ata_eh_unload(struct ata_port *ap) struct ata_device *dev; unsigned long flags; - /* Restore SControl IPM and SPD for the next driver and + /* + * Unless we are restarting, transition all enabled devices to + * standby power mode. + */ + if (system_state != SYSTEM_RESTART) { + ata_for_each_link(link, ap, PMP_FIRST) { + ata_for_each_dev(dev, link, ENABLED) + ata_dev_power_set_standby(dev); + } + } + + /* + * Restore SControl IPM and SPD for the next driver and * disable attached devices. */ ata_for_each_link(link, ap, PMP_FIRST) { @@ -684,6 +698,10 @@ void ata_scsi_port_error_handler(struct Scsi_Host *host, struct ata_port *ap) ehc->saved_xfer_mode[devno] = dev->xfer_mode; if (ata_ncq_enabled(dev)) ehc->saved_ncq_enabled |= 1 << devno; + + /* If we are resuming, wake up the device */ + if (ap->pflags & ATA_PFLAG_RESUMING) + ehc->i.dev_action[devno] |= ATA_EH_SET_ACTIVE; } } @@ -743,6 +761,8 @@ void ata_scsi_port_error_handler(struct Scsi_Host *host, struct ata_port *ap) /* clean up */ spin_lock_irqsave(ap->lock, flags); + ap->pflags &= ~ATA_PFLAG_RESUMING; + if (ap->pflags & ATA_PFLAG_LOADING) ap->pflags &= ~ATA_PFLAG_LOADING; else if ((ap->pflags & ATA_PFLAG_SCSI_HOTPLUG) && @@ -1218,6 +1238,13 @@ void ata_eh_detach_dev(struct ata_device *dev) struct ata_eh_context *ehc = &link->eh_context; unsigned long flags; + /* + * If the device is still enabled, transition it to standby power mode + * (i.e. spin down HDDs). + */ + if (ata_dev_enabled(dev)) + ata_dev_power_set_standby(dev); + ata_dev_disable(dev); spin_lock_irqsave(ap->lock, flags); @@ -3016,6 +3043,15 @@ static int ata_eh_revalidate_and_attach(struct ata_link *link, if (ehc->i.flags & ATA_EHI_DID_RESET) readid_flags |= ATA_READID_POSTRESET; + /* + * When resuming, before executing any command, make sure to + * transition the device to the active power mode. + */ + if ((action & ATA_EH_SET_ACTIVE) && ata_dev_enabled(dev)) { + ata_dev_power_set_active(dev); + ata_eh_done(link, dev, ATA_EH_SET_ACTIVE); + } + if ((action & ATA_EH_REVALIDATE) && ata_dev_enabled(dev)) { WARN_ON(dev->class == ATA_DEV_PMP); @@ -3989,6 +4025,7 @@ static void ata_eh_handle_port_suspend(struct ata_port *ap) unsigned long flags; int rc = 0; struct ata_device *dev; + struct ata_link *link; /* are we suspending? */ spin_lock_irqsave(ap->lock, flags); @@ -4001,6 +4038,12 @@ static void ata_eh_handle_port_suspend(struct ata_port *ap) WARN_ON(ap->pflags & ATA_PFLAG_SUSPENDED); + /* Set all devices attached to the port in standby mode */ + ata_for_each_link(link, ap, HOST_FIRST) { + ata_for_each_dev(dev, link, ENABLED) + ata_dev_power_set_standby(dev); + } + /* * If we have a ZPODD attached, check its zero * power ready status before the port is frozen. @@ -4083,6 +4126,7 @@ static void ata_eh_handle_port_resume(struct ata_port *ap) /* update the flags */ spin_lock_irqsave(ap->lock, flags); ap->pflags &= ~(ATA_PFLAG_PM_PENDING | ATA_PFLAG_SUSPENDED); + ap->pflags |= ATA_PFLAG_RESUMING; spin_unlock_irqrestore(ap->lock, flags); } #endif /* CONFIG_PM */ diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index 73428ad0c8d2..a0e58d22d222 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -1050,15 +1050,13 @@ int ata_scsi_dev_config(struct scsi_device *sdev, struct ata_device *dev) } } else { sdev->sector_size = ata_id_logical_sector_size(dev->id); + /* - * Stop the drive on suspend but do not issue START STOP UNIT - * on resume as this is not necessary and may fail: the device - * will be woken up by ata_port_pm_resume() with a port reset - * and device revalidation. + * Ask the sd driver to issue START STOP UNIT on runtime suspend + * and resume only. For system level suspend/resume, devices + * power state is handled directly by libata EH. */ - sdev->manage_system_start_stop = true; sdev->manage_runtime_start_stop = true; - sdev->no_start_on_resume = 1; } /* @@ -1231,7 +1229,7 @@ static unsigned int ata_scsi_start_stop_xlat(struct ata_queued_cmd *qc) } if (cdb[4] & 0x1) { - tf->nsect = 1; /* 1 sector, lba=0 */ + tf->nsect = 1; /* 1 sector, lba=0 */ if (qc->dev->flags & ATA_DFLAG_LBA) { tf->flags |= ATA_TFLAG_LBA; @@ -1247,7 +1245,7 @@ static unsigned int ata_scsi_start_stop_xlat(struct ata_queued_cmd *qc) tf->lbah = 0x0; /* cyl high */ } - tf->command = ATA_CMD_VERIFY; /* READ VERIFY */ + tf->command = ATA_CMD_VERIFY; /* READ VERIFY */ } else { /* Some odd clown BIOSen issue spindown on power off (ACPI S4 * or S5) causing some drives to spin up and down again. @@ -1257,7 +1255,7 @@ static unsigned int ata_scsi_start_stop_xlat(struct ata_queued_cmd *qc) goto skip; if ((qc->ap->flags & ATA_FLAG_NO_HIBERNATE_SPINDOWN) && - system_entering_hibernation()) + system_entering_hibernation()) goto skip; /* Issue ATA STANDBY IMMEDIATE command */ diff --git a/drivers/ata/libata.h b/drivers/ata/libata.h index 6e7d352803bd..820299bd9d06 100644 --- a/drivers/ata/libata.h +++ b/drivers/ata/libata.h @@ -60,6 +60,8 @@ extern int ata_dev_reread_id(struct ata_device *dev, unsigned int readid_flags); extern int ata_dev_revalidate(struct ata_device *dev, unsigned int new_class, unsigned int readid_flags); extern int ata_dev_configure(struct ata_device *dev); +extern void ata_dev_power_set_standby(struct ata_device *dev); +extern void ata_dev_power_set_active(struct ata_device *dev); extern int sata_down_spd_limit(struct ata_link *link, u32 spd_limit); extern int ata_down_xfermask_limit(struct ata_device *dev, unsigned int sel); extern unsigned int ata_dev_set_feature(struct ata_device *dev, diff --git a/include/linux/libata.h b/include/linux/libata.h index 3ce1ab408114..2a7d2af0ed80 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -192,6 +192,7 @@ enum { ATA_PFLAG_UNLOADING = (1 << 9), /* driver is being unloaded */ ATA_PFLAG_UNLOADED = (1 << 10), /* driver is unloaded */ + ATA_PFLAG_RESUMING = (1 << 16), /* port is being resumed */ ATA_PFLAG_SUSPENDED = (1 << 17), /* port is suspended (power) */ ATA_PFLAG_PM_PENDING = (1 << 18), /* PM operation pending */ ATA_PFLAG_INIT_GTM_VALID = (1 << 19), /* initial gtm data valid */ @@ -318,9 +319,10 @@ enum { ATA_EH_ENABLE_LINK = (1 << 3), ATA_EH_PARK = (1 << 5), /* unload heads and stop I/O */ ATA_EH_GET_SUCCESS_SENSE = (1 << 6), /* Get sense data for successful cmd */ + ATA_EH_SET_ACTIVE = (1 << 7), /* Set a device to active power mode */ ATA_EH_PERDEV_MASK = ATA_EH_REVALIDATE | ATA_EH_PARK | - ATA_EH_GET_SUCCESS_SENSE, + ATA_EH_GET_SUCCESS_SENSE | ATA_EH_SET_ACTIVE, ATA_EH_ALL_ACTIONS = ATA_EH_REVALIDATE | ATA_EH_RESET | ATA_EH_ENABLE_LINK, @@ -357,7 +359,7 @@ enum { /* This should match the actual table size of * ata_eh_cmd_timeout_table in libata-eh.c. */ - ATA_EH_CMD_TIMEOUT_TABLE_SIZE = 7, + ATA_EH_CMD_TIMEOUT_TABLE_SIZE = 8, /* Horkage types. May be set by libata or controller on drives (some horkage may be drive/controller pair dependent */ -- cgit v1.2.3 From 528ce6781726e022bc5dc84034360e6e8f1b89bd Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Thu, 28 Sep 2023 20:43:24 +0800 Subject: io_uring: retain top 8bits of uring_cmd flags for kernel internal use Retain top 8bits of uring_cmd flags for kernel internal use, so that we can move IORING_URING_CMD_POLLED out of uapi header. Reviewed-by: Gabriel Krisman Bertazi Reviewed-by: Anuj Gupta Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- include/linux/io_uring.h | 3 +++ include/uapi/linux/io_uring.h | 5 ++--- io_uring/io_uring.c | 3 +++ io_uring/uring_cmd.c | 2 +- 4 files changed, 9 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/io_uring.h b/include/linux/io_uring.h index 106cdc55ff3b..ae08d6f66e62 100644 --- a/include/linux/io_uring.h +++ b/include/linux/io_uring.h @@ -22,6 +22,9 @@ enum io_uring_cmd_flags { IO_URING_F_IOPOLL = (1 << 10), }; +/* only top 8 bits of sqe->uring_cmd_flags for kernel internal use */ +#define IORING_URING_CMD_POLLED (1U << 31) + struct io_uring_cmd { struct file *file; const struct io_uring_sqe *sqe; diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 683ac2b74721..425f64eee44e 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -249,13 +249,12 @@ enum io_uring_op { }; /* - * sqe->uring_cmd_flags + * sqe->uring_cmd_flags top 8bits aren't available for userspace * IORING_URING_CMD_FIXED use registered buffer; pass this flag * along with setting sqe->buf_index. - * IORING_URING_CMD_POLLED driver use only */ #define IORING_URING_CMD_FIXED (1U << 0) -#define IORING_URING_CMD_POLLED (1U << 31) +#define IORING_URING_CMD_MASK IORING_URING_CMD_FIXED /* diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 2dff4772bf14..cb6bd9907045 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -4669,6 +4669,9 @@ static int __init io_uring_init(void) BUILD_BUG_ON(sizeof(atomic_t) != sizeof(u32)); + /* top 8bits are for internal use */ + BUILD_BUG_ON((IORING_URING_CMD_MASK & 0xff000000) != 0); + io_uring_optable_init(); /* diff --git a/io_uring/uring_cmd.c b/io_uring/uring_cmd.c index 537795fddc87..a0b0ec5473bf 100644 --- a/io_uring/uring_cmd.c +++ b/io_uring/uring_cmd.c @@ -91,7 +91,7 @@ int io_uring_cmd_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) return -EINVAL; ioucmd->flags = READ_ONCE(sqe->uring_cmd_flags); - if (ioucmd->flags & ~IORING_URING_CMD_FIXED) + if (ioucmd->flags & ~IORING_URING_CMD_MASK) return -EINVAL; if (ioucmd->flags & IORING_URING_CMD_FIXED) { -- cgit v1.2.3 From 93b8cc60c37b9d17732b7a297e5dca29b50a990d Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Thu, 28 Sep 2023 20:43:25 +0800 Subject: io_uring: cancelable uring_cmd uring_cmd may never complete, such as ublk, in which uring cmd isn't completed until one new block request is coming from ublk block device. Add cancelable uring_cmd to provide mechanism to driver for cancelling pending commands in its own way. Add API of io_uring_cmd_mark_cancelable() for driver to mark one command as cancelable, then io_uring will cancel this command in io_uring_cancel_generic(). ->uring_cmd() callback is reused for canceling command in driver's way, then driver gets notified with the cancelling from io_uring. Add API of io_uring_cmd_get_task() to help driver cancel handler deal with the canceling. Reviewed-by: Gabriel Krisman Bertazi Suggested-by: Jens Axboe Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- include/linux/io_uring.h | 15 ++++++++++++++ include/linux/io_uring_types.h | 6 ++++++ io_uring/io_uring.c | 33 +++++++++++++++++++++++++++++ io_uring/uring_cmd.c | 47 ++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 101 insertions(+) (limited to 'include/linux') diff --git a/include/linux/io_uring.h b/include/linux/io_uring.h index ae08d6f66e62..b4391e0a9bc8 100644 --- a/include/linux/io_uring.h +++ b/include/linux/io_uring.h @@ -20,9 +20,13 @@ enum io_uring_cmd_flags { IO_URING_F_SQE128 = (1 << 8), IO_URING_F_CQE32 = (1 << 9), IO_URING_F_IOPOLL = (1 << 10), + + /* set when uring wants to cancel a previously issued command */ + IO_URING_F_CANCEL = (1 << 11), }; /* only top 8 bits of sqe->uring_cmd_flags for kernel internal use */ +#define IORING_URING_CMD_CANCELABLE (1U << 30) #define IORING_URING_CMD_POLLED (1U << 31) struct io_uring_cmd { @@ -85,6 +89,9 @@ static inline void io_uring_free(struct task_struct *tsk) __io_uring_free(tsk); } int io_uring_cmd_sock(struct io_uring_cmd *cmd, unsigned int issue_flags); +void io_uring_cmd_mark_cancelable(struct io_uring_cmd *cmd, + unsigned int issue_flags); +struct task_struct *io_uring_cmd_get_task(struct io_uring_cmd *cmd); #else static inline int io_uring_cmd_import_fixed(u64 ubuf, unsigned long len, int rw, struct iov_iter *iter, void *ioucmd) @@ -125,6 +132,14 @@ static inline int io_uring_cmd_sock(struct io_uring_cmd *cmd, { return -EOPNOTSUPP; } +static inline void io_uring_cmd_mark_cancelable(struct io_uring_cmd *cmd, + unsigned int issue_flags) +{ +} +static inline struct task_struct *io_uring_cmd_get_task(struct io_uring_cmd *cmd) +{ + return NULL; +} #endif #endif diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index fe1c5d4ec56c..e178461fa513 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -265,6 +265,12 @@ struct io_ring_ctx { */ struct io_wq_work_list iopoll_list; bool poll_multi_queue; + + /* + * Any cancelable uring_cmd is added to this list in + * ->uring_cmd() by io_uring_cmd_insert_cancelable() + */ + struct hlist_head cancelable_uring_cmd; } ____cacheline_aligned_in_smp; struct { diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index cb6bd9907045..08c9ea46bb95 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -352,6 +352,7 @@ static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p) INIT_HLIST_HEAD(&ctx->waitid_list); INIT_DELAYED_WORK(&ctx->fallback_work, io_fallback_req_func); INIT_WQ_LIST(&ctx->submit_state.compl_reqs); + INIT_HLIST_HEAD(&ctx->cancelable_uring_cmd); return ctx; err: kfree(ctx->cancel_table.hbs); @@ -3258,6 +3259,37 @@ static __cold bool io_uring_try_cancel_iowq(struct io_ring_ctx *ctx) return ret; } +static bool io_uring_try_cancel_uring_cmd(struct io_ring_ctx *ctx, + struct task_struct *task, bool cancel_all) +{ + struct hlist_node *tmp; + struct io_kiocb *req; + bool ret = false; + + lockdep_assert_held(&ctx->uring_lock); + + hlist_for_each_entry_safe(req, tmp, &ctx->cancelable_uring_cmd, + hash_node) { + struct io_uring_cmd *cmd = io_kiocb_to_cmd(req, + struct io_uring_cmd); + struct file *file = req->file; + + if (!cancel_all && req->task != task) + continue; + + if (cmd->flags & IORING_URING_CMD_CANCELABLE) { + /* ->sqe isn't available if no async data */ + if (!req_has_async_data(req)) + cmd->sqe = NULL; + file->f_op->uring_cmd(cmd, IO_URING_F_CANCEL); + ret = true; + } + } + io_submit_flush_completions(ctx); + + return ret; +} + static __cold bool io_uring_try_cancel_requests(struct io_ring_ctx *ctx, struct task_struct *task, bool cancel_all) @@ -3306,6 +3338,7 @@ static __cold bool io_uring_try_cancel_requests(struct io_ring_ctx *ctx, mutex_lock(&ctx->uring_lock); ret |= io_poll_remove_all(ctx, task, cancel_all); ret |= io_waitid_remove_all(ctx, task, cancel_all); + ret |= io_uring_try_cancel_uring_cmd(ctx, task, cancel_all); mutex_unlock(&ctx->uring_lock); ret |= io_kill_timeouts(ctx, task, cancel_all); if (task) diff --git a/io_uring/uring_cmd.c b/io_uring/uring_cmd.c index a0b0ec5473bf..00a5e5621a28 100644 --- a/io_uring/uring_cmd.c +++ b/io_uring/uring_cmd.c @@ -13,6 +13,51 @@ #include "rsrc.h" #include "uring_cmd.h" +static void io_uring_cmd_del_cancelable(struct io_uring_cmd *cmd, + unsigned int issue_flags) +{ + struct io_kiocb *req = cmd_to_io_kiocb(cmd); + struct io_ring_ctx *ctx = req->ctx; + + if (!(cmd->flags & IORING_URING_CMD_CANCELABLE)) + return; + + cmd->flags &= ~IORING_URING_CMD_CANCELABLE; + io_ring_submit_lock(ctx, issue_flags); + hlist_del(&req->hash_node); + io_ring_submit_unlock(ctx, issue_flags); +} + +/* + * Mark this command as concelable, then io_uring_try_cancel_uring_cmd() + * will try to cancel this issued command by sending ->uring_cmd() with + * issue_flags of IO_URING_F_CANCEL. + * + * The command is guaranteed to not be done when calling ->uring_cmd() + * with IO_URING_F_CANCEL, but it is driver's responsibility to deal + * with race between io_uring canceling and normal completion. + */ +void io_uring_cmd_mark_cancelable(struct io_uring_cmd *cmd, + unsigned int issue_flags) +{ + struct io_kiocb *req = cmd_to_io_kiocb(cmd); + struct io_ring_ctx *ctx = req->ctx; + + if (!(cmd->flags & IORING_URING_CMD_CANCELABLE)) { + cmd->flags |= IORING_URING_CMD_CANCELABLE; + io_ring_submit_lock(ctx, issue_flags); + hlist_add_head(&req->hash_node, &ctx->cancelable_uring_cmd); + io_ring_submit_unlock(ctx, issue_flags); + } +} +EXPORT_SYMBOL_GPL(io_uring_cmd_mark_cancelable); + +struct task_struct *io_uring_cmd_get_task(struct io_uring_cmd *cmd) +{ + return cmd_to_io_kiocb(cmd)->task; +} +EXPORT_SYMBOL_GPL(io_uring_cmd_get_task); + static void io_uring_cmd_work(struct io_kiocb *req, struct io_tw_state *ts) { struct io_uring_cmd *ioucmd = io_kiocb_to_cmd(req, struct io_uring_cmd); @@ -56,6 +101,8 @@ void io_uring_cmd_done(struct io_uring_cmd *ioucmd, ssize_t ret, ssize_t res2, { struct io_kiocb *req = cmd_to_io_kiocb(ioucmd); + io_uring_cmd_del_cancelable(ioucmd, issue_flags); + if (ret < 0) req_set_fail(req); -- cgit v1.2.3 From a941b784b15ff65e1a3b6a259c6d6cf7fa0bb3c3 Mon Sep 17 00:00:00 2001 From: Nipun Gupta Date: Fri, 15 Sep 2023 10:24:21 +0530 Subject: cdx: add support for bus mastering Introduce cdx_set_master() and cdx_clear_master() APIs to support enable and disable of bus mastering. Drivers need to use these APIs to enable/disable DMAs from the CDX devices. Signed-off-by: Nipun Gupta Reviewed-by: Pieter Jansen van Vuuren Link: https://lore.kernel.org/r/20230915045423.31630-1-nipun.gupta@amd.com Signed-off-by: Alex Williamson --- drivers/cdx/cdx.c | 32 ++++++++++++++++++ drivers/cdx/controller/cdx_controller.c | 4 +++ drivers/cdx/controller/mcdi_functions.c | 58 +++++++++++++++++++++++++++++++++ drivers/cdx/controller/mcdi_functions.h | 13 ++++++++ include/linux/cdx/cdx_bus.h | 18 ++++++++++ 5 files changed, 125 insertions(+) (limited to 'include/linux') diff --git a/drivers/cdx/cdx.c b/drivers/cdx/cdx.c index d2cad4c670a0..9efb7584f952 100644 --- a/drivers/cdx/cdx.c +++ b/drivers/cdx/cdx.c @@ -182,6 +182,38 @@ cdx_match_id(const struct cdx_device_id *ids, struct cdx_device *dev) return NULL; } +int cdx_set_master(struct cdx_device *cdx_dev) +{ + struct cdx_controller *cdx = cdx_dev->cdx; + struct cdx_device_config dev_config; + int ret = -EOPNOTSUPP; + + dev_config.type = CDX_DEV_BUS_MASTER_CONF; + dev_config.bus_master_enable = true; + if (cdx->ops->dev_configure) + ret = cdx->ops->dev_configure(cdx, cdx_dev->bus_num, + cdx_dev->dev_num, &dev_config); + + return ret; +} +EXPORT_SYMBOL_GPL(cdx_set_master); + +int cdx_clear_master(struct cdx_device *cdx_dev) +{ + struct cdx_controller *cdx = cdx_dev->cdx; + struct cdx_device_config dev_config; + int ret = -EOPNOTSUPP; + + dev_config.type = CDX_DEV_BUS_MASTER_CONF; + dev_config.bus_master_enable = false; + if (cdx->ops->dev_configure) + ret = cdx->ops->dev_configure(cdx, cdx_dev->bus_num, + cdx_dev->dev_num, &dev_config); + + return ret; +} +EXPORT_SYMBOL_GPL(cdx_clear_master); + /** * cdx_bus_match - device to driver matching callback * @dev: the cdx device to match against diff --git a/drivers/cdx/controller/cdx_controller.c b/drivers/cdx/controller/cdx_controller.c index bb4ae7970e21..7828dac8edb1 100644 --- a/drivers/cdx/controller/cdx_controller.c +++ b/drivers/cdx/controller/cdx_controller.c @@ -56,6 +56,10 @@ static int cdx_configure_device(struct cdx_controller *cdx, case CDX_DEV_RESET_CONF: ret = cdx_mcdi_reset_device(cdx->priv, bus_num, dev_num); break; + case CDX_DEV_BUS_MASTER_CONF: + ret = cdx_mcdi_bus_master_enable(cdx->priv, bus_num, dev_num, + dev_config->bus_master_enable); + break; default: ret = -EINVAL; } diff --git a/drivers/cdx/controller/mcdi_functions.c b/drivers/cdx/controller/mcdi_functions.c index 0158f26533dd..fc82435d5dea 100644 --- a/drivers/cdx/controller/mcdi_functions.c +++ b/drivers/cdx/controller/mcdi_functions.c @@ -137,3 +137,61 @@ int cdx_mcdi_reset_device(struct cdx_mcdi *cdx, u8 bus_num, u8 dev_num) return ret; } + +static int cdx_mcdi_ctrl_flag_get(struct cdx_mcdi *cdx, u8 bus_num, + u8 dev_num, u32 *flags) +{ + MCDI_DECLARE_BUF(inbuf, MC_CMD_CDX_DEVICE_CONTROL_GET_IN_LEN); + MCDI_DECLARE_BUF(outbuf, MC_CMD_CDX_DEVICE_CONTROL_GET_OUT_LEN); + size_t outlen; + int ret; + + MCDI_SET_DWORD(inbuf, CDX_DEVICE_CONTROL_GET_IN_BUS, bus_num); + MCDI_SET_DWORD(inbuf, CDX_DEVICE_CONTROL_GET_IN_DEVICE, dev_num); + ret = cdx_mcdi_rpc(cdx, MC_CMD_CDX_DEVICE_CONTROL_GET, inbuf, + sizeof(inbuf), outbuf, sizeof(outbuf), &outlen); + if (ret) + return ret; + + if (outlen != MC_CMD_CDX_DEVICE_CONTROL_GET_OUT_LEN) + return -EIO; + + *flags = MCDI_DWORD(outbuf, CDX_DEVICE_CONTROL_GET_OUT_FLAGS); + + return 0; +} + +static int cdx_mcdi_ctrl_flag_set(struct cdx_mcdi *cdx, u8 bus_num, + u8 dev_num, bool enable, int bit_pos) +{ + MCDI_DECLARE_BUF(inbuf, MC_CMD_CDX_DEVICE_CONTROL_SET_IN_LEN); + u32 flags; + int ret; + + /* + * Get flags and then set/reset bit at bit_pos according to + * the input params. + */ + ret = cdx_mcdi_ctrl_flag_get(cdx, bus_num, dev_num, &flags); + if (ret) + return ret; + + flags = flags & (u32)(~(BIT(bit_pos))); + if (enable) + flags |= (1 << bit_pos); + + MCDI_SET_DWORD(inbuf, CDX_DEVICE_CONTROL_SET_IN_BUS, bus_num); + MCDI_SET_DWORD(inbuf, CDX_DEVICE_CONTROL_SET_IN_DEVICE, dev_num); + MCDI_SET_DWORD(inbuf, CDX_DEVICE_CONTROL_SET_IN_FLAGS, flags); + ret = cdx_mcdi_rpc(cdx, MC_CMD_CDX_DEVICE_CONTROL_SET, inbuf, + sizeof(inbuf), NULL, 0, NULL); + + return ret; +} + +int cdx_mcdi_bus_master_enable(struct cdx_mcdi *cdx, u8 bus_num, + u8 dev_num, bool enable) +{ + return cdx_mcdi_ctrl_flag_set(cdx, bus_num, dev_num, enable, + MC_CMD_CDX_DEVICE_CONTROL_SET_IN_BUS_MASTER_ENABLE_LBN); +} diff --git a/drivers/cdx/controller/mcdi_functions.h b/drivers/cdx/controller/mcdi_functions.h index 7440ace5539a..a448d6581eb4 100644 --- a/drivers/cdx/controller/mcdi_functions.h +++ b/drivers/cdx/controller/mcdi_functions.h @@ -58,4 +58,17 @@ int cdx_mcdi_get_dev_config(struct cdx_mcdi *cdx, int cdx_mcdi_reset_device(struct cdx_mcdi *cdx, u8 bus_num, u8 dev_num); +/** + * cdx_mcdi_bus_master_enable - Set/Reset bus mastering for cdx device + * represented by bus_num:dev_num + * @cdx: pointer to MCDI interface. + * @bus_num: Bus number. + * @dev_num: Device number. + * @enable: Enable bus mastering if set, disable otherwise. + * + * Return: 0 on success, <0 on failure + */ +int cdx_mcdi_bus_master_enable(struct cdx_mcdi *cdx, u8 bus_num, + u8 dev_num, bool enable); + #endif /* CDX_MCDI_FUNCTIONS_H */ diff --git a/include/linux/cdx/cdx_bus.h b/include/linux/cdx/cdx_bus.h index bead71b7bc73..8320ec3b9e37 100644 --- a/include/linux/cdx/cdx_bus.h +++ b/include/linux/cdx/cdx_bus.h @@ -21,11 +21,13 @@ struct cdx_controller; enum { + CDX_DEV_BUS_MASTER_CONF, CDX_DEV_RESET_CONF, }; struct cdx_device_config { u8 type; + bool bus_master_enable; }; typedef int (*cdx_scan_cb)(struct cdx_controller *cdx); @@ -170,4 +172,20 @@ extern struct bus_type cdx_bus_type; */ int cdx_dev_reset(struct device *dev); +/** + * cdx_set_master - enables bus-mastering for CDX device + * @cdx_dev: the CDX device to enable + * + * Return: 0 for success, -errno on failure + */ +int cdx_set_master(struct cdx_device *cdx_dev); + +/** + * cdx_clear_master - disables bus-mastering for CDX device + * @cdx_dev: the CDX device to disable + * + * Return: 0 for success, -errno on failure + */ +int cdx_clear_master(struct cdx_device *cdx_dev); + #endif /* _CDX_BUS_H_ */ -- cgit v1.2.3 From d427da2323b093a65d8317783e76ab8fad2e2ef0 Mon Sep 17 00:00:00 2001 From: Sui Jingfeng Date: Fri, 25 Aug 2023 14:27:10 +0800 Subject: PCI: Add pci_get_base_class() helper There is no function to get all PCI devices in a system by matching against the base class code only, ignoring the sub-class code and the programming interface. Add pci_get_base_class() to suit the need. For example, if a driver wants to process all PCI display devices in a system, it can do so like this: pdev = NULL; while ((pdev = pci_get_base_class(PCI_BASE_CLASS_DISPLAY, pdev))) { do_something_for_pci_display_device(pdev); } Link: https://lore.kernel.org/r/20230825062714.6325-2-sui.jingfeng@linux.dev Signed-off-by: Sui Jingfeng [bhelgaas: reword commit log] Signed-off-by: Bjorn Helgaas Reviewed-by: Alex Deucher --- drivers/pci/search.c | 31 +++++++++++++++++++++++++++++++ include/linux/pci.h | 5 +++++ 2 files changed, 36 insertions(+) (limited to 'include/linux') diff --git a/drivers/pci/search.c b/drivers/pci/search.c index b4c138a6ec02..53840634fbfc 100644 --- a/drivers/pci/search.c +++ b/drivers/pci/search.c @@ -363,6 +363,37 @@ struct pci_dev *pci_get_class(unsigned int class, struct pci_dev *from) } EXPORT_SYMBOL(pci_get_class); +/** + * pci_get_base_class - searching for a PCI device by matching against the base class code only + * @class: search for a PCI device with this base class code + * @from: Previous PCI device found in search, or %NULL for new search. + * + * Iterates through the list of known PCI devices. If a PCI device is found + * with a matching base class code, the reference count to the device is + * incremented. See pci_match_one_device() to figure out how does this works. + * A new search is initiated by passing %NULL as the @from argument. + * Otherwise if @from is not %NULL, searches continue from next device on the + * global list. The reference count for @from is always decremented if it is + * not %NULL. + * + * Returns: + * A pointer to a matched PCI device, %NULL Otherwise. + */ +struct pci_dev *pci_get_base_class(unsigned int class, struct pci_dev *from) +{ + struct pci_device_id id = { + .vendor = PCI_ANY_ID, + .device = PCI_ANY_ID, + .subvendor = PCI_ANY_ID, + .subdevice = PCI_ANY_ID, + .class_mask = 0xFF0000, + .class = class << 16, + }; + + return pci_get_dev_by_id(&id, from); +} +EXPORT_SYMBOL(pci_get_base_class); + /** * pci_dev_present - Returns 1 if device matching the device list is present, 0 if not. * @ids: A pointer to a null terminated list of struct pci_device_id structures diff --git a/include/linux/pci.h b/include/linux/pci.h index 8c7c2c3c6c65..40ac1288a2cc 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1181,6 +1181,8 @@ struct pci_dev *pci_get_slot(struct pci_bus *bus, unsigned int devfn); struct pci_dev *pci_get_domain_bus_and_slot(int domain, unsigned int bus, unsigned int devfn); struct pci_dev *pci_get_class(unsigned int class, struct pci_dev *from); +struct pci_dev *pci_get_base_class(unsigned int class, struct pci_dev *from); + int pci_dev_present(const struct pci_device_id *ids); int pci_bus_read_config_byte(struct pci_bus *bus, unsigned int devfn, @@ -1924,6 +1926,9 @@ static inline struct pci_dev *pci_get_class(unsigned int class, struct pci_dev *from) { return NULL; } +static inline struct pci_dev *pci_get_base_class(unsigned int class, + struct pci_dev *from) +{ return NULL; } static inline int pci_dev_present(const struct pci_device_id *ids) { return 0; } -- cgit v1.2.3 From 194bb58c6090e39bd7d9b9c888a079213628e1f6 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 8 Jun 2023 11:57:40 -0600 Subject: io_uring: add support for futex wake and wait Add support for FUTEX_WAKE/WAIT primitives. IORING_OP_FUTEX_WAKE is mix of FUTEX_WAKE and FUTEX_WAKE_BITSET, as it does support passing in a bitset. Similary, IORING_OP_FUTEX_WAIT is a mix of FUTEX_WAIT and FUTEX_WAIT_BITSET. For both of them, they are using the futex2 interface. FUTEX_WAKE is straight forward, as those can always be done directly from the io_uring submission without needing async handling. For FUTEX_WAIT, things are a bit more complicated. If the futex isn't ready, then we rely on a callback via futex_queue->wake() when someone wakes up the futex. From that calback, we queue up task_work with the original task, which will post a CQE and wake it, if necessary. Cancelations are supported, both from the application point-of-view, but also to be able to cancel pending waits if the ring exits before all events have occurred. The return value of futex_unqueue() is used to gate who wins the potential race between cancelation and futex wakeups. Whomever gets a 'ret == 1' return from that claims ownership of the io_uring futex request. This is just the barebones wait/wake support. PI or REQUEUE support is not added at this point, unclear if we might look into that later. Likewise, explicit timeouts are not supported either. It is expected that users that need timeouts would do so via the usual io_uring mechanism to do that using linked timeouts. The SQE format is as follows: `addr` Address of futex `fd` futex2(2) FUTEX2_* flags `futex_flags` io_uring specific command flags. None valid now. `addr2` Value of futex `addr3` Mask to wake/wait Acked-by: Peter Zijlstra (Intel) Signed-off-by: Jens Axboe --- include/linux/io_uring_types.h | 5 + include/uapi/linux/io_uring.h | 3 + io_uring/Makefile | 1 + io_uring/cancel.c | 5 + io_uring/cancel.h | 4 + io_uring/futex.c | 235 +++++++++++++++++++++++++++++++++++++++++ io_uring/futex.h | 34 ++++++ io_uring/io_uring.c | 7 ++ io_uring/opdef.c | 23 ++++ 9 files changed, 317 insertions(+) create mode 100644 io_uring/futex.c create mode 100644 io_uring/futex.h (limited to 'include/linux') diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index e178461fa513..990984614fca 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -321,6 +321,11 @@ struct io_ring_ctx { struct hlist_head waitid_list; +#ifdef CONFIG_FUTEX + struct hlist_head futex_list; + struct io_alloc_cache futex_cache; +#endif + const struct cred *sq_creds; /* cred used for __io_sq_thread() */ struct io_sq_data *sq_data; /* if using sq thread polling */ diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 425f64eee44e..04f9fba38d4b 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -66,6 +66,7 @@ struct io_uring_sqe { __u32 msg_ring_flags; __u32 uring_cmd_flags; __u32 waitid_flags; + __u32 futex_flags; }; __u64 user_data; /* data to be passed back at completion time */ /* pack this to avoid bogus arm OABI complaints */ @@ -243,6 +244,8 @@ enum io_uring_op { IORING_OP_SENDMSG_ZC, IORING_OP_READ_MULTISHOT, IORING_OP_WAITID, + IORING_OP_FUTEX_WAIT, + IORING_OP_FUTEX_WAKE, /* this goes last, obviously */ IORING_OP_LAST, diff --git a/io_uring/Makefile b/io_uring/Makefile index 7bd64e442567..e5be47e4fc3b 100644 --- a/io_uring/Makefile +++ b/io_uring/Makefile @@ -10,3 +10,4 @@ obj-$(CONFIG_IO_URING) += io_uring.o xattr.o nop.o fs.o splice.o \ cancel.o kbuf.o rsrc.o rw.o opdef.o \ notif.o waitid.o obj-$(CONFIG_IO_WQ) += io-wq.o +obj-$(CONFIG_FUTEX) += futex.o diff --git a/io_uring/cancel.c b/io_uring/cancel.c index eb77a51c5a79..3c19cccb1aec 100644 --- a/io_uring/cancel.c +++ b/io_uring/cancel.c @@ -16,6 +16,7 @@ #include "poll.h" #include "timeout.h" #include "waitid.h" +#include "futex.h" #include "cancel.h" struct io_cancel { @@ -124,6 +125,10 @@ int io_try_cancel(struct io_uring_task *tctx, struct io_cancel_data *cd, if (ret != -ENOENT) return ret; + ret = io_futex_cancel(ctx, cd, issue_flags); + if (ret != -ENOENT) + return ret; + spin_lock(&ctx->completion_lock); if (!(cd->flags & IORING_ASYNC_CANCEL_FD)) ret = io_timeout_cancel(ctx, cd); diff --git a/io_uring/cancel.h b/io_uring/cancel.h index fc98622e6166..c0a8e7c520b6 100644 --- a/io_uring/cancel.h +++ b/io_uring/cancel.h @@ -1,4 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 +#ifndef IORING_CANCEL_H +#define IORING_CANCEL_H #include @@ -22,3 +24,5 @@ void init_hash_table(struct io_hash_table *table, unsigned size); int io_sync_cancel(struct io_ring_ctx *ctx, void __user *arg); bool io_cancel_req_match(struct io_kiocb *req, struct io_cancel_data *cd); + +#endif diff --git a/io_uring/futex.c b/io_uring/futex.c new file mode 100644 index 000000000000..eb4406ac46fb --- /dev/null +++ b/io_uring/futex.c @@ -0,0 +1,235 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include +#include + +#include + +#include "../kernel/futex/futex.h" +#include "io_uring.h" +#include "rsrc.h" +#include "futex.h" + +struct io_futex { + struct file *file; + u32 __user *uaddr; + unsigned long futex_val; + unsigned long futex_mask; + u32 futex_flags; +}; + +struct io_futex_data { + union { + struct futex_q q; + struct io_cache_entry cache; + }; + struct io_kiocb *req; +}; + +void io_futex_cache_init(struct io_ring_ctx *ctx) +{ + io_alloc_cache_init(&ctx->futex_cache, IO_NODE_ALLOC_CACHE_MAX, + sizeof(struct io_futex_data)); +} + +static void io_futex_cache_entry_free(struct io_cache_entry *entry) +{ + kfree(container_of(entry, struct io_futex_data, cache)); +} + +void io_futex_cache_free(struct io_ring_ctx *ctx) +{ + io_alloc_cache_free(&ctx->futex_cache, io_futex_cache_entry_free); +} + +static void io_futex_complete(struct io_kiocb *req, struct io_tw_state *ts) +{ + struct io_futex_data *ifd = req->async_data; + struct io_ring_ctx *ctx = req->ctx; + + io_tw_lock(ctx, ts); + if (!io_alloc_cache_put(&ctx->futex_cache, &ifd->cache)) + kfree(ifd); + req->async_data = NULL; + hlist_del_init(&req->hash_node); + io_req_task_complete(req, ts); +} + +static bool __io_futex_cancel(struct io_ring_ctx *ctx, struct io_kiocb *req) +{ + struct io_futex_data *ifd = req->async_data; + + /* futex wake already done or in progress */ + if (!futex_unqueue(&ifd->q)) + return false; + + hlist_del_init(&req->hash_node); + io_req_set_res(req, -ECANCELED, 0); + req->io_task_work.func = io_futex_complete; + io_req_task_work_add(req); + return true; +} + +int io_futex_cancel(struct io_ring_ctx *ctx, struct io_cancel_data *cd, + unsigned int issue_flags) +{ + struct hlist_node *tmp; + struct io_kiocb *req; + int nr = 0; + + if (cd->flags & (IORING_ASYNC_CANCEL_FD|IORING_ASYNC_CANCEL_FD_FIXED)) + return -ENOENT; + + io_ring_submit_lock(ctx, issue_flags); + hlist_for_each_entry_safe(req, tmp, &ctx->futex_list, hash_node) { + if (req->cqe.user_data != cd->data && + !(cd->flags & IORING_ASYNC_CANCEL_ANY)) + continue; + if (__io_futex_cancel(ctx, req)) + nr++; + if (!(cd->flags & IORING_ASYNC_CANCEL_ALL)) + break; + } + io_ring_submit_unlock(ctx, issue_flags); + + if (nr) + return nr; + + return -ENOENT; +} + +bool io_futex_remove_all(struct io_ring_ctx *ctx, struct task_struct *task, + bool cancel_all) +{ + struct hlist_node *tmp; + struct io_kiocb *req; + bool found = false; + + lockdep_assert_held(&ctx->uring_lock); + + hlist_for_each_entry_safe(req, tmp, &ctx->futex_list, hash_node) { + if (!io_match_task_safe(req, task, cancel_all)) + continue; + __io_futex_cancel(ctx, req); + found = true; + } + + return found; +} + +int io_futex_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) +{ + struct io_futex *iof = io_kiocb_to_cmd(req, struct io_futex); + u32 flags; + + if (unlikely(sqe->len || sqe->futex_flags || sqe->buf_index || + sqe->file_index)) + return -EINVAL; + + iof->uaddr = u64_to_user_ptr(READ_ONCE(sqe->addr)); + iof->futex_val = READ_ONCE(sqe->addr2); + iof->futex_mask = READ_ONCE(sqe->addr3); + flags = READ_ONCE(sqe->fd); + + if (flags & ~FUTEX2_VALID_MASK) + return -EINVAL; + + iof->futex_flags = futex2_to_flags(flags); + if (!futex_flags_valid(iof->futex_flags)) + return -EINVAL; + + if (!futex_validate_input(iof->futex_flags, iof->futex_val) || + !futex_validate_input(iof->futex_flags, iof->futex_mask)) + return -EINVAL; + + return 0; +} + +static void io_futex_wake_fn(struct wake_q_head *wake_q, struct futex_q *q) +{ + struct io_futex_data *ifd = container_of(q, struct io_futex_data, q); + struct io_kiocb *req = ifd->req; + + if (unlikely(!__futex_wake_mark(q))) + return; + + io_req_set_res(req, 0, 0); + req->io_task_work.func = io_futex_complete; + io_req_task_work_add(req); +} + +static struct io_futex_data *io_alloc_ifd(struct io_ring_ctx *ctx) +{ + struct io_cache_entry *entry; + + entry = io_alloc_cache_get(&ctx->futex_cache); + if (entry) + return container_of(entry, struct io_futex_data, cache); + + return kmalloc(sizeof(struct io_futex_data), GFP_NOWAIT); +} + +int io_futex_wait(struct io_kiocb *req, unsigned int issue_flags) +{ + struct io_futex *iof = io_kiocb_to_cmd(req, struct io_futex); + struct io_ring_ctx *ctx = req->ctx; + struct io_futex_data *ifd = NULL; + struct futex_hash_bucket *hb; + int ret; + + if (!iof->futex_mask) { + ret = -EINVAL; + goto done; + } + + io_ring_submit_lock(ctx, issue_flags); + ifd = io_alloc_ifd(ctx); + if (!ifd) { + ret = -ENOMEM; + goto done_unlock; + } + + req->async_data = ifd; + ifd->q = futex_q_init; + ifd->q.bitset = iof->futex_mask; + ifd->q.wake = io_futex_wake_fn; + ifd->req = req; + + ret = futex_wait_setup(iof->uaddr, iof->futex_val, iof->futex_flags, + &ifd->q, &hb); + if (!ret) { + hlist_add_head(&req->hash_node, &ctx->futex_list); + io_ring_submit_unlock(ctx, issue_flags); + + futex_queue(&ifd->q, hb); + return IOU_ISSUE_SKIP_COMPLETE; + } + +done_unlock: + io_ring_submit_unlock(ctx, issue_flags); +done: + if (ret < 0) + req_set_fail(req); + io_req_set_res(req, ret, 0); + kfree(ifd); + return IOU_OK; +} + +int io_futex_wake(struct io_kiocb *req, unsigned int issue_flags) +{ + struct io_futex *iof = io_kiocb_to_cmd(req, struct io_futex); + int ret; + + /* + * Strict flags - ensure that waking 0 futexes yields a 0 result. + * See commit 43adf8449510 ("futex: FLAGS_STRICT") for details. + */ + ret = futex_wake(iof->uaddr, FLAGS_STRICT | iof->futex_flags, + iof->futex_val, iof->futex_mask); + if (ret < 0) + req_set_fail(req); + io_req_set_res(req, ret, 0); + return IOU_OK; +} diff --git a/io_uring/futex.h b/io_uring/futex.h new file mode 100644 index 000000000000..ddc9e0d73c52 --- /dev/null +++ b/io_uring/futex.h @@ -0,0 +1,34 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "cancel.h" + +int io_futex_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe); +int io_futex_wait(struct io_kiocb *req, unsigned int issue_flags); +int io_futex_wake(struct io_kiocb *req, unsigned int issue_flags); + +#if defined(CONFIG_FUTEX) +int io_futex_cancel(struct io_ring_ctx *ctx, struct io_cancel_data *cd, + unsigned int issue_flags); +bool io_futex_remove_all(struct io_ring_ctx *ctx, struct task_struct *task, + bool cancel_all); +void io_futex_cache_init(struct io_ring_ctx *ctx); +void io_futex_cache_free(struct io_ring_ctx *ctx); +#else +static inline int io_futex_cancel(struct io_ring_ctx *ctx, + struct io_cancel_data *cd, + unsigned int issue_flags) +{ + return 0; +} +static inline bool io_futex_remove_all(struct io_ring_ctx *ctx, + struct task_struct *task, bool cancel_all) +{ + return false; +} +static inline void io_futex_cache_init(struct io_ring_ctx *ctx) +{ +} +static inline void io_futex_cache_free(struct io_ring_ctx *ctx) +{ +} +#endif diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 08c9ea46bb95..3c1c111d02cb 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -93,6 +93,7 @@ #include "net.h" #include "notif.h" #include "waitid.h" +#include "futex.h" #include "timeout.h" #include "poll.h" @@ -330,6 +331,7 @@ static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p) sizeof(struct async_poll)); io_alloc_cache_init(&ctx->netmsg_cache, IO_ALLOC_CACHE_MAX, sizeof(struct io_async_msghdr)); + io_futex_cache_init(ctx); init_completion(&ctx->ref_comp); xa_init_flags(&ctx->personalities, XA_FLAGS_ALLOC1); mutex_init(&ctx->uring_lock); @@ -350,6 +352,9 @@ static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p) ctx->submit_state.free_list.next = NULL; INIT_WQ_LIST(&ctx->locked_free_list); INIT_HLIST_HEAD(&ctx->waitid_list); +#ifdef CONFIG_FUTEX + INIT_HLIST_HEAD(&ctx->futex_list); +#endif INIT_DELAYED_WORK(&ctx->fallback_work, io_fallback_req_func); INIT_WQ_LIST(&ctx->submit_state.compl_reqs); INIT_HLIST_HEAD(&ctx->cancelable_uring_cmd); @@ -2895,6 +2900,7 @@ static __cold void io_ring_ctx_free(struct io_ring_ctx *ctx) io_eventfd_unregister(ctx); io_alloc_cache_free(&ctx->apoll_cache, io_apoll_cache_free); io_alloc_cache_free(&ctx->netmsg_cache, io_netmsg_cache_free); + io_futex_cache_free(ctx); io_destroy_buffers(ctx); mutex_unlock(&ctx->uring_lock); if (ctx->sq_creds) @@ -3338,6 +3344,7 @@ static __cold bool io_uring_try_cancel_requests(struct io_ring_ctx *ctx, mutex_lock(&ctx->uring_lock); ret |= io_poll_remove_all(ctx, task, cancel_all); ret |= io_waitid_remove_all(ctx, task, cancel_all); + ret |= io_futex_remove_all(ctx, task, cancel_all); ret |= io_uring_try_cancel_uring_cmd(ctx, task, cancel_all); mutex_unlock(&ctx->uring_lock); ret |= io_kill_timeouts(ctx, task, cancel_all); diff --git a/io_uring/opdef.c b/io_uring/opdef.c index aadcbf7136b0..31a3a421e94d 100644 --- a/io_uring/opdef.c +++ b/io_uring/opdef.c @@ -34,6 +34,7 @@ #include "cancel.h" #include "rw.h" #include "waitid.h" +#include "futex.h" static int io_no_issue(struct io_kiocb *req, unsigned int issue_flags) { @@ -444,6 +445,22 @@ const struct io_issue_def io_issue_defs[] = { .prep = io_waitid_prep, .issue = io_waitid, }, + [IORING_OP_FUTEX_WAIT] = { +#if defined(CONFIG_FUTEX) + .prep = io_futex_prep, + .issue = io_futex_wait, +#else + .prep = io_eopnotsupp_prep, +#endif + }, + [IORING_OP_FUTEX_WAKE] = { +#if defined(CONFIG_FUTEX) + .prep = io_futex_prep, + .issue = io_futex_wake, +#else + .prep = io_eopnotsupp_prep, +#endif + }, }; const struct io_cold_def io_cold_defs[] = { @@ -670,6 +687,12 @@ const struct io_cold_def io_cold_defs[] = { .name = "WAITID", .async_size = sizeof(struct io_waitid_async), }, + [IORING_OP_FUTEX_WAIT] = { + .name = "FUTEX_WAIT", + }, + [IORING_OP_FUTEX_WAKE] = { + .name = "FUTEX_WAKE", + }, }; const char *io_uring_get_opcode(u8 opcode) -- cgit v1.2.3 From d77008421afda6208b1256c9b218457acd174ca6 Mon Sep 17 00:00:00 2001 From: Elena Reshetova Date: Thu, 17 Aug 2023 21:14:57 -0700 Subject: groups: Convert group_info.usage to refcount_t atomic_t variables are currently used to implement reference counters with the following properties: - counter is initialized to 1 using atomic_set() - a resource is freed upon counter reaching zero - once counter reaches zero, its further increments aren't allowed - counter schema uses basic atomic operations (set, inc, inc_not_zero, dec_and_test, etc.) Such atomic variables should be converted to a newly provided refcount_t type and API that prevents accidental counter overflows and underflows. This is important since overflows and underflows can lead to use-after-free situation and be exploitable. The variable group_info.usage is used as pure reference counter. Convert it to refcount_t and fix up the operations. **Important note for maintainers: Some functions from refcount_t API defined in refcount.h have different memory ordering guarantees than their atomic counterparts. Please check Documentation/core-api/refcount-vs-atomic.rst for more information. Normally the differences should not matter since refcount_t provides enough guarantees to satisfy the refcounting use cases, but in some rare cases it might matter. Please double check that you don't have some undocumented memory guarantees for this variable usage. For the group_info.usage it might make a difference in following places: - put_group_info(): decrement in refcount_dec_and_test() only provides RELEASE ordering and ACQUIRE ordering on success vs. fully ordered atomic counterpart Suggested-by: Kees Cook Signed-off-by: Elena Reshetova Reviewed-by: David Windsor Reviewed-by: Hans Liljestrand Link: https://lore.kernel.org/r/20230818041456.gonna.009-kees@kernel.org Signed-off-by: Kees Cook --- include/linux/cred.h | 7 ++++--- kernel/cred.c | 2 +- kernel/groups.c | 2 +- 3 files changed, 6 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cred.h b/include/linux/cred.h index f923528d5cc4..92f8d772da6f 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -23,7 +24,7 @@ struct inode; * COW Supplementary groups list */ struct group_info { - atomic_t usage; + refcount_t usage; int ngroups; kgid_t gid[]; } __randomize_layout; @@ -39,7 +40,7 @@ struct group_info { */ static inline struct group_info *get_group_info(struct group_info *gi) { - atomic_inc(&gi->usage); + refcount_inc(&gi->usage); return gi; } @@ -49,7 +50,7 @@ static inline struct group_info *get_group_info(struct group_info *gi) */ #define put_group_info(group_info) \ do { \ - if (atomic_dec_and_test(&(group_info)->usage)) \ + if (refcount_dec_and_test(&(group_info)->usage)) \ groups_free(group_info); \ } while (0) diff --git a/kernel/cred.c b/kernel/cred.c index 98cb4eca23fb..4dc0b27b5462 100644 --- a/kernel/cred.c +++ b/kernel/cred.c @@ -36,7 +36,7 @@ do { \ static struct kmem_cache *cred_jar; /* init to 2 - one for init_task, one to ensure it is never freed */ -static struct group_info init_groups = { .usage = ATOMIC_INIT(2) }; +static struct group_info init_groups = { .usage = REFCOUNT_INIT(2) }; /* * The initial credentials for the initial task diff --git a/kernel/groups.c b/kernel/groups.c index 9aaed2a31073..9b43da22647d 100644 --- a/kernel/groups.c +++ b/kernel/groups.c @@ -19,7 +19,7 @@ struct group_info *groups_alloc(int gidsetsize) if (!gi) return NULL; - atomic_set(&gi->usage, 1); + refcount_set(&gi->usage, 1); gi->ngroups = gidsetsize; return gi; } -- cgit v1.2.3 From ce60f27bb62dfeb1bf827350520f34abc84e0933 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 20 Sep 2023 05:09:58 +0100 Subject: mm: abstract moving to the next PFN In order to fix the L1TF vulnerability, x86 can invert the PTE bits for PROT_NONE VMAs, which means we cannot move from one PTE to the next by adding 1 to the PFN field of the PTE. This results in the BUG reported at [1]. Abstract advancing the PTE to the next PFN through a pte_next_pfn() function/macro. Link: https://lkml.kernel.org/r/20230920040958.866520-1-willy@infradead.org Fixes: bcc6cc832573 ("mm: add default definition of set_ptes()") Signed-off-by: Matthew Wilcox (Oracle) Reported-by: syzbot+55cc72f8cc3a549119df@syzkaller.appspotmail.com Closes: https://lkml.kernel.org/r/000000000000d099fa0604f03351@google.com [1] Reviewed-by: Yin Fengwei Cc: Dave Hansen Cc: David Hildenbrand Cc: Thomas Gleixner Signed-off-by: Andrew Morton --- arch/x86/include/asm/pgtable.h | 8 ++++++++ include/linux/pgtable.h | 10 +++++++++- 2 files changed, 17 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index d6ad98ca1288..e02b179ec659 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -955,6 +955,14 @@ static inline int pte_same(pte_t a, pte_t b) return a.pte == b.pte; } +static inline pte_t pte_next_pfn(pte_t pte) +{ + if (__pte_needs_invert(pte_val(pte))) + return __pte(pte_val(pte) - (1UL << PFN_PTE_SHIFT)); + return __pte(pte_val(pte) + (1UL << PFN_PTE_SHIFT)); +} +#define pte_next_pfn pte_next_pfn + static inline int pte_present(pte_t a) { return pte_flags(a) & (_PAGE_PRESENT | _PAGE_PROTNONE); diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index 1fba072b3dac..af7639c3b0a3 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -206,6 +206,14 @@ static inline int pmd_young(pmd_t pmd) #endif #ifndef set_ptes + +#ifndef pte_next_pfn +static inline pte_t pte_next_pfn(pte_t pte) +{ + return __pte(pte_val(pte) + (1UL << PFN_PTE_SHIFT)); +} +#endif + /** * set_ptes - Map consecutive pages to a contiguous range of addresses. * @mm: Address space to map the pages into. @@ -231,7 +239,7 @@ static inline void set_ptes(struct mm_struct *mm, unsigned long addr, if (--nr == 0) break; ptep++; - pte = __pte(pte_val(pte) + (1UL << PFN_PTE_SHIFT)); + pte = pte_next_pfn(pte); } arch_leave_lazy_mmu_mode(); } -- cgit v1.2.3 From 5c590804b6b0ff933ed4e5cee5d76de3a5048d9f Mon Sep 17 00:00:00 2001 From: "Liam R. Howlett" Date: Thu, 21 Sep 2023 14:12:35 -0400 Subject: maple_tree: add mas_is_active() to detect in-tree walks Patch series "maple_tree: Fix mas_prev() state regression". Pedro Falcato retported an mprotect regression [1] which was bisected back to the iterator changes for maple tree. Root cause analysis showed the mas_prev() running off the end of the VMA space (previous from 0) followed by mas_find(), would skip the first value. This patchset introduces maple state underflow/overflow so the sequence of calls on the maple state will return what the user expects. Users who encounter this bug may see mprotect(), userfaultfd_register(), and mlock() fail on VMAs mapped with address 0. This patch (of 2): Instead of constantly checking each possibility of the maple state, create a fast path that will skip over checking unlikely states. Link: https://lkml.kernel.org/r/20230921181236.509072-1-Liam.Howlett@oracle.com Link: https://lkml.kernel.org/r/20230921181236.509072-2-Liam.Howlett@oracle.com Signed-off-by: Liam R. Howlett Cc: Pedro Falcato Cc: Signed-off-by: Andrew Morton --- include/linux/maple_tree.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/maple_tree.h b/include/linux/maple_tree.h index e41c70ac7744..f66f5f78f8cf 100644 --- a/include/linux/maple_tree.h +++ b/include/linux/maple_tree.h @@ -511,6 +511,15 @@ static inline bool mas_is_paused(const struct ma_state *mas) return mas->node == MAS_PAUSE; } +/* Check if the mas is pointing to a node or not */ +static inline bool mas_is_active(struct ma_state *mas) +{ + if ((unsigned long)mas->node >= MAPLE_RESERVED_RANGE) + return true; + + return false; +} + /** * mas_reset() - Reset a Maple Tree operation state. * @mas: Maple Tree operation state. -- cgit v1.2.3 From a8091f039c1ebf5cb0d5261e3613f18eb2a5d8b7 Mon Sep 17 00:00:00 2001 From: "Liam R. Howlett" Date: Thu, 21 Sep 2023 14:12:36 -0400 Subject: maple_tree: add MAS_UNDERFLOW and MAS_OVERFLOW states When updating the maple tree iterator to avoid rewalks, an issue was introduced when shifting beyond the limits. This can be seen by trying to go to the previous address of 0, which would set the maple node to MAS_NONE and keep the range as the last entry. Subsequent calls to mas_find() would then search upwards from mas->last and skip the value at mas->index/mas->last. This showed up as a bug in mprotect which skips the actual VMA at the current range after attempting to go to the previous VMA from 0. Since MAS_NONE may already be set when searching for a value that isn't contained within a node, changing the handling of MAS_NONE in mas_find() would make the code more complicated and error prone. Furthermore, there was no way to tell which limit was hit, and thus which action to take (next or the entry at the current range). This solution is to add two states to track what happened with the previous iterator action. This allows for the expected behaviour of the next command to return the correct item (either the item at the range requested, or the next/previous). Tests are also added and updated accordingly. Link: https://lkml.kernel.org/r/20230921181236.509072-3-Liam.Howlett@oracle.com Link: https://gist.github.com/heatd/85d2971fae1501b55b6ea401fbbe485b Link: https://lore.kernel.org/linux-mm/20230921181236.509072-1-Liam.Howlett@oracle.com/ Fixes: 39193685d585 ("maple_tree: try harder to keep active node with mas_prev()") Signed-off-by: Liam R. Howlett Reported-by: Pedro Falcato Closes: https://gist.github.com/heatd/85d2971fae1501b55b6ea401fbbe485b Closes: https://bugs.archlinux.org/task/79656 Cc: Signed-off-by: Andrew Morton --- include/linux/maple_tree.h | 2 + lib/maple_tree.c | 221 +++++++++++++++++++++++++++++++++------------ lib/test_maple_tree.c | 87 +++++++++++++++--- 3 files changed, 237 insertions(+), 73 deletions(-) (limited to 'include/linux') diff --git a/include/linux/maple_tree.h b/include/linux/maple_tree.h index f66f5f78f8cf..d01e850b570f 100644 --- a/include/linux/maple_tree.h +++ b/include/linux/maple_tree.h @@ -428,6 +428,8 @@ struct ma_wr_state { #define MAS_ROOT ((struct maple_enode *)5UL) #define MAS_NONE ((struct maple_enode *)9UL) #define MAS_PAUSE ((struct maple_enode *)17UL) +#define MAS_OVERFLOW ((struct maple_enode *)33UL) +#define MAS_UNDERFLOW ((struct maple_enode *)65UL) #define MA_ERROR(err) \ ((struct maple_enode *)(((unsigned long)err << 2) | 2UL)) diff --git a/lib/maple_tree.c b/lib/maple_tree.c index ee1ff0c59fd7..0e00a84e8e8f 100644 --- a/lib/maple_tree.c +++ b/lib/maple_tree.c @@ -256,6 +256,22 @@ bool mas_is_err(struct ma_state *mas) return xa_is_err(mas->node); } +static __always_inline bool mas_is_overflow(struct ma_state *mas) +{ + if (unlikely(mas->node == MAS_OVERFLOW)) + return true; + + return false; +} + +static __always_inline bool mas_is_underflow(struct ma_state *mas) +{ + if (unlikely(mas->node == MAS_UNDERFLOW)) + return true; + + return false; +} + static inline bool mas_searchable(struct ma_state *mas) { if (mas_is_none(mas)) @@ -4415,10 +4431,13 @@ no_entry: * * @mas: The maple state * @max: The minimum starting range + * @empty: Can be empty + * @set_underflow: Set the @mas->node to underflow state on limit. * * Return: The entry in the previous slot which is possibly NULL */ -static void *mas_prev_slot(struct ma_state *mas, unsigned long min, bool empty) +static void *mas_prev_slot(struct ma_state *mas, unsigned long min, bool empty, + bool set_underflow) { void *entry; void __rcu **slots; @@ -4435,7 +4454,6 @@ retry: if (unlikely(mas_rewalk_if_dead(mas, node, save_point))) goto retry; -again: if (mas->min <= min) { pivot = mas_safe_min(mas, pivots, mas->offset); @@ -4443,9 +4461,10 @@ again: goto retry; if (pivot <= min) - return NULL; + goto underflow; } +again: if (likely(mas->offset)) { mas->offset--; mas->last = mas->index - 1; @@ -4457,7 +4476,7 @@ again: } if (mas_is_none(mas)) - return NULL; + goto underflow; mas->last = mas->max; node = mas_mn(mas); @@ -4474,10 +4493,19 @@ again: if (likely(entry)) return entry; - if (!empty) + if (!empty) { + if (mas->index <= min) + goto underflow; + goto again; + } return entry; + +underflow: + if (set_underflow) + mas->node = MAS_UNDERFLOW; + return NULL; } /* @@ -4567,10 +4595,13 @@ no_entry: * @mas: The maple state * @max: The maximum starting range * @empty: Can be empty + * @set_overflow: Should @mas->node be set to overflow when the limit is + * reached. * * Return: The entry in the next slot which is possibly NULL */ -static void *mas_next_slot(struct ma_state *mas, unsigned long max, bool empty) +static void *mas_next_slot(struct ma_state *mas, unsigned long max, bool empty, + bool set_overflow) { void __rcu **slots; unsigned long *pivots; @@ -4589,22 +4620,22 @@ retry: if (unlikely(mas_rewalk_if_dead(mas, node, save_point))) goto retry; -again: if (mas->max >= max) { if (likely(mas->offset < data_end)) pivot = pivots[mas->offset]; else - return NULL; /* must be mas->max */ + goto overflow; if (unlikely(mas_rewalk_if_dead(mas, node, save_point))) goto retry; if (pivot >= max) - return NULL; + goto overflow; } if (likely(mas->offset < data_end)) { mas->index = pivots[mas->offset] + 1; +again: mas->offset++; if (likely(mas->offset < data_end)) mas->last = pivots[mas->offset]; @@ -4616,8 +4647,11 @@ again: goto retry; } - if (mas_is_none(mas)) + if (WARN_ON_ONCE(mas_is_none(mas))) { + mas->node = MAS_OVERFLOW; return NULL; + goto overflow; + } mas->offset = 0; mas->index = mas->min; @@ -4636,12 +4670,20 @@ again: return entry; if (!empty) { - if (!mas->offset) - data_end = 2; + if (mas->last >= max) + goto overflow; + + mas->index = mas->last + 1; + /* Node cannot end on NULL, so it's safe to short-cut here */ goto again; } return entry; + +overflow: + if (set_overflow) + mas->node = MAS_OVERFLOW; + return NULL; } /* @@ -4651,17 +4693,20 @@ again: * * Set the @mas->node to the next entry and the range_start to * the beginning value for the entry. Does not check beyond @limit. - * Sets @mas->index and @mas->last to the limit if it is hit. + * Sets @mas->index and @mas->last to the range, Does not update @mas->index and + * @mas->last on overflow. * Restarts on dead nodes. * * Return: the next entry or %NULL. */ static inline void *mas_next_entry(struct ma_state *mas, unsigned long limit) { - if (mas->last >= limit) + if (mas->last >= limit) { + mas->node = MAS_OVERFLOW; return NULL; + } - return mas_next_slot(mas, limit, false); + return mas_next_slot(mas, limit, false, true); } /* @@ -4837,7 +4882,7 @@ void *mas_walk(struct ma_state *mas) { void *entry; - if (mas_is_none(mas) || mas_is_paused(mas) || mas_is_ptr(mas)) + if (!mas_is_active(mas) || !mas_is_start(mas)) mas->node = MAS_START; retry: entry = mas_state_walk(mas); @@ -5294,14 +5339,22 @@ static inline void mte_destroy_walk(struct maple_enode *enode, static void mas_wr_store_setup(struct ma_wr_state *wr_mas) { - if (mas_is_start(wr_mas->mas)) - return; + if (!mas_is_active(wr_mas->mas)) { + if (mas_is_start(wr_mas->mas)) + return; - if (unlikely(mas_is_paused(wr_mas->mas))) - goto reset; + if (unlikely(mas_is_paused(wr_mas->mas))) + goto reset; - if (unlikely(mas_is_none(wr_mas->mas))) - goto reset; + if (unlikely(mas_is_none(wr_mas->mas))) + goto reset; + + if (unlikely(mas_is_overflow(wr_mas->mas))) + goto reset; + + if (unlikely(mas_is_underflow(wr_mas->mas))) + goto reset; + } /* * A less strict version of mas_is_span_wr() where we allow spanning @@ -5595,8 +5648,25 @@ static inline bool mas_next_setup(struct ma_state *mas, unsigned long max, { bool was_none = mas_is_none(mas); - if (mas_is_none(mas) || mas_is_paused(mas)) + if (unlikely(mas->last >= max)) { + mas->node = MAS_OVERFLOW; + return true; + } + + if (mas_is_active(mas)) + return false; + + if (mas_is_none(mas) || mas_is_paused(mas)) { + mas->node = MAS_START; + } else if (mas_is_overflow(mas)) { + /* Overflowed before, but the max changed */ mas->node = MAS_START; + } else if (mas_is_underflow(mas)) { + mas->node = MAS_START; + *entry = mas_walk(mas); + if (*entry) + return true; + } if (mas_is_start(mas)) *entry = mas_walk(mas); /* Retries on dead nodes handled by mas_walk */ @@ -5615,6 +5685,7 @@ static inline bool mas_next_setup(struct ma_state *mas, unsigned long max, if (mas_is_none(mas)) return true; + return false; } @@ -5637,7 +5708,7 @@ void *mas_next(struct ma_state *mas, unsigned long max) return entry; /* Retries on dead nodes handled by mas_next_slot */ - return mas_next_slot(mas, max, false); + return mas_next_slot(mas, max, false, true); } EXPORT_SYMBOL_GPL(mas_next); @@ -5660,7 +5731,7 @@ void *mas_next_range(struct ma_state *mas, unsigned long max) return entry; /* Retries on dead nodes handled by mas_next_slot */ - return mas_next_slot(mas, max, true); + return mas_next_slot(mas, max, true, true); } EXPORT_SYMBOL_GPL(mas_next_range); @@ -5691,18 +5762,31 @@ EXPORT_SYMBOL_GPL(mt_next); static inline bool mas_prev_setup(struct ma_state *mas, unsigned long min, void **entry) { - if (mas->index <= min) - goto none; + if (unlikely(mas->index <= min)) { + mas->node = MAS_UNDERFLOW; + return true; + } - if (mas_is_none(mas) || mas_is_paused(mas)) + if (mas_is_active(mas)) + return false; + + if (mas_is_overflow(mas)) { mas->node = MAS_START; + *entry = mas_walk(mas); + if (*entry) + return true; + } - if (mas_is_start(mas)) { - mas_walk(mas); - if (!mas->index) - goto none; + if (mas_is_none(mas) || mas_is_paused(mas)) { + mas->node = MAS_START; + } else if (mas_is_underflow(mas)) { + /* underflowed before but the min changed */ + mas->node = MAS_START; } + if (mas_is_start(mas)) + mas_walk(mas); + if (unlikely(mas_is_ptr(mas))) { if (!mas->index) goto none; @@ -5747,7 +5831,7 @@ void *mas_prev(struct ma_state *mas, unsigned long min) if (mas_prev_setup(mas, min, &entry)) return entry; - return mas_prev_slot(mas, min, false); + return mas_prev_slot(mas, min, false, true); } EXPORT_SYMBOL_GPL(mas_prev); @@ -5770,7 +5854,7 @@ void *mas_prev_range(struct ma_state *mas, unsigned long min) if (mas_prev_setup(mas, min, &entry)) return entry; - return mas_prev_slot(mas, min, true); + return mas_prev_slot(mas, min, true, true); } EXPORT_SYMBOL_GPL(mas_prev_range); @@ -5828,24 +5912,35 @@ EXPORT_SYMBOL_GPL(mas_pause); static inline bool mas_find_setup(struct ma_state *mas, unsigned long max, void **entry) { - *entry = NULL; + if (mas_is_active(mas)) { + if (mas->last < max) + return false; - if (unlikely(mas_is_none(mas))) { + return true; + } + + if (mas_is_paused(mas)) { if (unlikely(mas->last >= max)) return true; - mas->index = mas->last; + mas->index = ++mas->last; mas->node = MAS_START; - } else if (unlikely(mas_is_paused(mas))) { + } else if (mas_is_none(mas)) { if (unlikely(mas->last >= max)) return true; + mas->index = mas->last; mas->node = MAS_START; - mas->index = ++mas->last; - } else if (unlikely(mas_is_ptr(mas))) - goto ptr_out_of_range; + } else if (mas_is_overflow(mas) || mas_is_underflow(mas)) { + if (mas->index > max) { + mas->node = MAS_OVERFLOW; + return true; + } + + mas->node = MAS_START; + } - if (unlikely(mas_is_start(mas))) { + if (mas_is_start(mas)) { /* First run or continue */ if (mas->index > max) return true; @@ -5895,7 +5990,7 @@ void *mas_find(struct ma_state *mas, unsigned long max) return entry; /* Retries on dead nodes handled by mas_next_slot */ - return mas_next_slot(mas, max, false); + return mas_next_slot(mas, max, false, false); } EXPORT_SYMBOL_GPL(mas_find); @@ -5913,13 +6008,13 @@ EXPORT_SYMBOL_GPL(mas_find); */ void *mas_find_range(struct ma_state *mas, unsigned long max) { - void *entry; + void *entry = NULL; if (mas_find_setup(mas, max, &entry)) return entry; /* Retries on dead nodes handled by mas_next_slot */ - return mas_next_slot(mas, max, true); + return mas_next_slot(mas, max, true, false); } EXPORT_SYMBOL_GPL(mas_find_range); @@ -5934,26 +6029,36 @@ EXPORT_SYMBOL_GPL(mas_find_range); static inline bool mas_find_rev_setup(struct ma_state *mas, unsigned long min, void **entry) { - *entry = NULL; - - if (unlikely(mas_is_none(mas))) { - if (mas->index <= min) - goto none; + if (mas_is_active(mas)) { + if (mas->index > min) + return false; - mas->last = mas->index; - mas->node = MAS_START; + return true; } - if (unlikely(mas_is_paused(mas))) { + if (mas_is_paused(mas)) { if (unlikely(mas->index <= min)) { mas->node = MAS_NONE; return true; } mas->node = MAS_START; mas->last = --mas->index; + } else if (mas_is_none(mas)) { + if (mas->index <= min) + goto none; + + mas->last = mas->index; + mas->node = MAS_START; + } else if (mas_is_underflow(mas) || mas_is_overflow(mas)) { + if (mas->last <= min) { + mas->node = MAS_UNDERFLOW; + return true; + } + + mas->node = MAS_START; } - if (unlikely(mas_is_start(mas))) { + if (mas_is_start(mas)) { /* First run or continue */ if (mas->index < min) return true; @@ -6004,13 +6109,13 @@ none: */ void *mas_find_rev(struct ma_state *mas, unsigned long min) { - void *entry; + void *entry = NULL; if (mas_find_rev_setup(mas, min, &entry)) return entry; /* Retries on dead nodes handled by mas_prev_slot */ - return mas_prev_slot(mas, min, false); + return mas_prev_slot(mas, min, false, false); } EXPORT_SYMBOL_GPL(mas_find_rev); @@ -6030,13 +6135,13 @@ EXPORT_SYMBOL_GPL(mas_find_rev); */ void *mas_find_range_rev(struct ma_state *mas, unsigned long min) { - void *entry; + void *entry = NULL; if (mas_find_rev_setup(mas, min, &entry)) return entry; /* Retries on dead nodes handled by mas_prev_slot */ - return mas_prev_slot(mas, min, true); + return mas_prev_slot(mas, min, true, false); } EXPORT_SYMBOL_GPL(mas_find_range_rev); diff --git a/lib/test_maple_tree.c b/lib/test_maple_tree.c index 0674aebd4423..06959165e2f9 100644 --- a/lib/test_maple_tree.c +++ b/lib/test_maple_tree.c @@ -2166,7 +2166,7 @@ static noinline void __init next_prev_test(struct maple_tree *mt) MT_BUG_ON(mt, val != NULL); MT_BUG_ON(mt, mas.index != 0); MT_BUG_ON(mt, mas.last != 5); - MT_BUG_ON(mt, mas.node != MAS_NONE); + MT_BUG_ON(mt, mas.node != MAS_UNDERFLOW); mas.index = 0; mas.last = 5; @@ -2917,6 +2917,7 @@ static noinline void __init check_empty_area_fill(struct maple_tree *mt) * exists MAS_NONE active range * exists active active range * DNE active active set to last range + * ERANGE active MAS_OVERFLOW last range * * Function ENTRY Start Result index & last * mas_prev() @@ -2945,6 +2946,7 @@ static noinline void __init check_empty_area_fill(struct maple_tree *mt) * any MAS_ROOT MAS_NONE 0 * exists active active range * DNE active active last range + * ERANGE active MAS_UNDERFLOW last range * * Function ENTRY Start Result index & last * mas_find() @@ -2955,7 +2957,7 @@ static noinline void __init check_empty_area_fill(struct maple_tree *mt) * DNE MAS_START MAS_NONE 0 * DNE MAS_PAUSE MAS_NONE 0 * DNE MAS_ROOT MAS_NONE 0 - * DNE MAS_NONE MAS_NONE 0 + * DNE MAS_NONE MAS_NONE 1 * if index == 0 * exists MAS_START MAS_ROOT 0 * exists MAS_PAUSE MAS_ROOT 0 @@ -2967,7 +2969,7 @@ static noinline void __init check_empty_area_fill(struct maple_tree *mt) * DNE MAS_START active set to max * exists MAS_PAUSE active range * DNE MAS_PAUSE active set to max - * exists MAS_NONE active range + * exists MAS_NONE active range (start at last) * exists active active range * DNE active active last range (max < last) * @@ -2992,7 +2994,7 @@ static noinline void __init check_empty_area_fill(struct maple_tree *mt) * DNE MAS_START active set to min * exists MAS_PAUSE active range * DNE MAS_PAUSE active set to min - * exists MAS_NONE active range + * exists MAS_NONE active range (start at index) * exists active active range * DNE active active last range (min > index) * @@ -3039,10 +3041,10 @@ static noinline void __init check_state_handling(struct maple_tree *mt) mtree_store_range(mt, 0, 0, ptr, GFP_KERNEL); mas_lock(&mas); - /* prev: Start -> none */ + /* prev: Start -> underflow*/ entry = mas_prev(&mas, 0); MT_BUG_ON(mt, entry != NULL); - MT_BUG_ON(mt, mas.node != MAS_NONE); + MT_BUG_ON(mt, mas.node != MAS_UNDERFLOW); /* prev: Start -> root */ mas_set(&mas, 10); @@ -3069,7 +3071,7 @@ static noinline void __init check_state_handling(struct maple_tree *mt) MT_BUG_ON(mt, entry != NULL); MT_BUG_ON(mt, mas.node != MAS_NONE); - /* next: start -> none */ + /* next: start -> none*/ mas_set(&mas, 10); entry = mas_next(&mas, ULONG_MAX); MT_BUG_ON(mt, mas.index != 1); @@ -3268,25 +3270,46 @@ static noinline void __init check_state_handling(struct maple_tree *mt) MT_BUG_ON(mt, mas.last != 0x2500); MT_BUG_ON(mt, !mas_active(mas)); - /* next:active -> active out of range*/ + /* next:active -> active beyond data */ entry = mas_next(&mas, 0x2999); MT_BUG_ON(mt, entry != NULL); MT_BUG_ON(mt, mas.index != 0x2501); MT_BUG_ON(mt, mas.last != 0x2fff); MT_BUG_ON(mt, !mas_active(mas)); - /* Continue after out of range*/ + /* Continue after last range ends after max */ entry = mas_next(&mas, ULONG_MAX); MT_BUG_ON(mt, entry != ptr3); MT_BUG_ON(mt, mas.index != 0x3000); MT_BUG_ON(mt, mas.last != 0x3500); MT_BUG_ON(mt, !mas_active(mas)); - /* next:active -> active out of range*/ + /* next:active -> active continued */ + entry = mas_next(&mas, ULONG_MAX); + MT_BUG_ON(mt, entry != NULL); + MT_BUG_ON(mt, mas.index != 0x3501); + MT_BUG_ON(mt, mas.last != ULONG_MAX); + MT_BUG_ON(mt, !mas_active(mas)); + + /* next:active -> overflow */ entry = mas_next(&mas, ULONG_MAX); MT_BUG_ON(mt, entry != NULL); MT_BUG_ON(mt, mas.index != 0x3501); MT_BUG_ON(mt, mas.last != ULONG_MAX); + MT_BUG_ON(mt, mas.node != MAS_OVERFLOW); + + /* next:overflow -> overflow */ + entry = mas_next(&mas, ULONG_MAX); + MT_BUG_ON(mt, entry != NULL); + MT_BUG_ON(mt, mas.index != 0x3501); + MT_BUG_ON(mt, mas.last != ULONG_MAX); + MT_BUG_ON(mt, mas.node != MAS_OVERFLOW); + + /* prev:overflow -> active */ + entry = mas_prev(&mas, 0); + MT_BUG_ON(mt, entry != ptr3); + MT_BUG_ON(mt, mas.index != 0x3000); + MT_BUG_ON(mt, mas.last != 0x3500); MT_BUG_ON(mt, !mas_active(mas)); /* next: none -> active, skip value at location */ @@ -3307,11 +3330,46 @@ static noinline void __init check_state_handling(struct maple_tree *mt) MT_BUG_ON(mt, mas.last != 0x1500); MT_BUG_ON(mt, !mas_active(mas)); - /* prev:active -> active out of range*/ + /* prev:active -> active spanning end range */ + entry = mas_prev(&mas, 0x0100); + MT_BUG_ON(mt, entry != NULL); + MT_BUG_ON(mt, mas.index != 0); + MT_BUG_ON(mt, mas.last != 0x0FFF); + MT_BUG_ON(mt, !mas_active(mas)); + + /* prev:active -> underflow */ + entry = mas_prev(&mas, 0); + MT_BUG_ON(mt, entry != NULL); + MT_BUG_ON(mt, mas.index != 0); + MT_BUG_ON(mt, mas.last != 0x0FFF); + MT_BUG_ON(mt, mas.node != MAS_UNDERFLOW); + + /* prev:underflow -> underflow */ entry = mas_prev(&mas, 0); MT_BUG_ON(mt, entry != NULL); MT_BUG_ON(mt, mas.index != 0); MT_BUG_ON(mt, mas.last != 0x0FFF); + MT_BUG_ON(mt, mas.node != MAS_UNDERFLOW); + + /* next:underflow -> active */ + entry = mas_next(&mas, ULONG_MAX); + MT_BUG_ON(mt, entry != ptr); + MT_BUG_ON(mt, mas.index != 0x1000); + MT_BUG_ON(mt, mas.last != 0x1500); + MT_BUG_ON(mt, !mas_active(mas)); + + /* prev:first value -> underflow */ + entry = mas_prev(&mas, 0x1000); + MT_BUG_ON(mt, entry != NULL); + MT_BUG_ON(mt, mas.index != 0x1000); + MT_BUG_ON(mt, mas.last != 0x1500); + MT_BUG_ON(mt, mas.node != MAS_UNDERFLOW); + + /* find:underflow -> first value */ + entry = mas_find(&mas, ULONG_MAX); + MT_BUG_ON(mt, entry != ptr); + MT_BUG_ON(mt, mas.index != 0x1000); + MT_BUG_ON(mt, mas.last != 0x1500); MT_BUG_ON(mt, !mas_active(mas)); /* prev: pause ->active */ @@ -3325,14 +3383,14 @@ static noinline void __init check_state_handling(struct maple_tree *mt) MT_BUG_ON(mt, mas.last != 0x2500); MT_BUG_ON(mt, !mas_active(mas)); - /* prev:active -> active out of range*/ + /* prev:active -> active spanning min */ entry = mas_prev(&mas, 0x1600); MT_BUG_ON(mt, entry != NULL); MT_BUG_ON(mt, mas.index != 0x1501); MT_BUG_ON(mt, mas.last != 0x1FFF); MT_BUG_ON(mt, !mas_active(mas)); - /* prev: active ->active, continue*/ + /* prev: active ->active, continue */ entry = mas_prev(&mas, 0); MT_BUG_ON(mt, entry != ptr); MT_BUG_ON(mt, mas.index != 0x1000); @@ -3379,7 +3437,7 @@ static noinline void __init check_state_handling(struct maple_tree *mt) MT_BUG_ON(mt, mas.last != 0x2FFF); MT_BUG_ON(mt, !mas_active(mas)); - /* find: none ->active */ + /* find: overflow ->active */ entry = mas_find(&mas, 0x5000); MT_BUG_ON(mt, entry != ptr3); MT_BUG_ON(mt, mas.index != 0x3000); @@ -3778,7 +3836,6 @@ static int __init maple_tree_seed(void) check_empty_area_fill(&tree); mtree_destroy(&tree); - mt_init_flags(&tree, MT_FLAGS_ALLOC_RANGE); check_state_handling(&tree); mtree_destroy(&tree); -- cgit v1.2.3 From 935d4f0c6dc8b3533e6e39346de7389a84490178 Mon Sep 17 00:00:00 2001 From: Ryan Roberts Date: Fri, 22 Sep 2023 12:58:03 +0100 Subject: mm: hugetlb: add huge page size param to set_huge_pte_at() Patch series "Fix set_huge_pte_at() panic on arm64", v2. This series fixes a bug in arm64's implementation of set_huge_pte_at(), which can result in an unprivileged user causing a kernel panic. The problem was triggered when running the new uffd poison mm selftest for HUGETLB memory. This test (and the uffd poison feature) was merged for v6.5-rc7. Ideally, I'd like to get this fix in for v6.6 and I've cc'ed stable (correctly this time) to get it backported to v6.5, where the issue first showed up. Description of Bug ================== arm64's huge pte implementation supports multiple huge page sizes, some of which are implemented in the page table with multiple contiguous entries. So set_huge_pte_at() needs to work out how big the logical pte is, so that it can also work out how many physical ptes (or pmds) need to be written. It previously did this by grabbing the folio out of the pte and querying its size. However, there are cases when the pte being set is actually a swap entry. But this also used to work fine, because for huge ptes, we only ever saw migration entries and hwpoison entries. And both of these types of swap entries have a PFN embedded, so the code would grab that and everything still worked out. But over time, more calls to set_huge_pte_at() have been added that set swap entry types that do not embed a PFN. And this causes the code to go bang. The triggering case is for the uffd poison test, commit 99aa77215ad0 ("selftests/mm: add uffd unit test for UFFDIO_POISON"), which causes a PTE_MARKER_POISONED swap entry to be set, coutesey of commit 8a13897fb0da ("mm: userfaultfd: support UFFDIO_POISON for hugetlbfs") - added in v6.5-rc7. Although review shows that there are other call sites that set PTE_MARKER_UFFD_WP (which also has no PFN), these don't trigger on arm64 because arm64 doesn't support UFFD WP. If CONFIG_DEBUG_VM is enabled, we do at least get a BUG(), but otherwise, it will dereference a bad pointer in page_folio(): static inline struct folio *hugetlb_swap_entry_to_folio(swp_entry_t entry) { VM_BUG_ON(!is_migration_entry(entry) && !is_hwpoison_entry(entry)); return page_folio(pfn_to_page(swp_offset_pfn(entry))); } Fix === The simplest fix would have been to revert the dodgy cleanup commit 18f3962953e4 ("mm: hugetlb: kill set_huge_swap_pte_at()"), but since things have moved on, this would have required an audit of all the new set_huge_pte_at() call sites to see if they should be converted to set_huge_swap_pte_at(). As per the original intent of the change, it would also leave us open to future bugs when people invariably get it wrong and call the wrong helper. So instead, I've added a huge page size parameter to set_huge_pte_at(). This means that the arm64 code has the size in all cases. It's a bigger change, due to needing to touch the arches that implement the function, but it is entirely mechanical, so in my view, low risk. I've compile-tested all touched arches; arm64, parisc, powerpc, riscv, s390, sparc (and additionally x86_64). I've additionally booted and run mm selftests against arm64, where I observe the uffd poison test is fixed, and there are no other regressions. This patch (of 2): In order to fix a bug, arm64 needs to be told the size of the huge page for which the pte is being set in set_huge_pte_at(). Provide for this by adding an `unsigned long sz` parameter to the function. This follows the same pattern as huge_pte_clear(). This commit makes the required interface modifications to the core mm as well as all arches that implement this function (arm64, parisc, powerpc, riscv, s390, sparc). The actual arm64 bug will be fixed in a separate commit. No behavioral changes intended. Link: https://lkml.kernel.org/r/20230922115804.2043771-1-ryan.roberts@arm.com Link: https://lkml.kernel.org/r/20230922115804.2043771-2-ryan.roberts@arm.com Fixes: 8a13897fb0da ("mm: userfaultfd: support UFFDIO_POISON for hugetlbfs") Signed-off-by: Ryan Roberts Reviewed-by: Christophe Leroy [powerpc 8xx] Reviewed-by: Lorenzo Stoakes [vmalloc change] Cc: Alexandre Ghiti Cc: Albert Ou Cc: Alexander Gordeev Cc: Anshuman Khandual Cc: Arnd Bergmann Cc: Axel Rasmussen Cc: Catalin Marinas Cc: Christian Borntraeger Cc: Christoph Hellwig Cc: David S. Miller Cc: Gerald Schaefer Cc: Heiko Carstens Cc: Helge Deller Cc: "James E.J. Bottomley" Cc: Mike Kravetz Cc: Muchun Song Cc: Nicholas Piggin Cc: Palmer Dabbelt Cc: Paul Walmsley Cc: Peter Xu Cc: Qi Zheng Cc: Ryan Roberts Cc: SeongJae Park Cc: Sven Schnelle Cc: Uladzislau Rezki (Sony) Cc: Vasily Gorbik Cc: Will Deacon Cc: [6.5+] Signed-off-by: Andrew Morton --- arch/arm64/include/asm/hugetlb.h | 2 +- arch/arm64/mm/hugetlbpage.c | 6 ++-- arch/parisc/include/asm/hugetlb.h | 2 +- arch/parisc/mm/hugetlbpage.c | 2 +- arch/powerpc/include/asm/nohash/32/hugetlb-8xx.h | 3 +- arch/powerpc/mm/book3s64/hugetlbpage.c | 5 ++- arch/powerpc/mm/book3s64/radix_hugetlbpage.c | 3 +- arch/powerpc/mm/nohash/8xx.c | 3 +- arch/powerpc/mm/pgtable.c | 3 +- arch/riscv/include/asm/hugetlb.h | 3 +- arch/riscv/mm/hugetlbpage.c | 3 +- arch/s390/include/asm/hugetlb.h | 6 ++-- arch/s390/mm/hugetlbpage.c | 8 ++++- arch/sparc/include/asm/hugetlb.h | 6 ++-- arch/sparc/mm/hugetlbpage.c | 8 ++++- include/asm-generic/hugetlb.h | 2 +- include/linux/hugetlb.h | 6 ++-- mm/damon/vaddr.c | 3 +- mm/hugetlb.c | 43 +++++++++++++----------- mm/migrate.c | 7 ++-- mm/rmap.c | 23 ++++++++++--- mm/vmalloc.c | 2 +- 22 files changed, 100 insertions(+), 49 deletions(-) (limited to 'include/linux') diff --git a/arch/arm64/include/asm/hugetlb.h b/arch/arm64/include/asm/hugetlb.h index f43a38ac1779..2ddc33d93b13 100644 --- a/arch/arm64/include/asm/hugetlb.h +++ b/arch/arm64/include/asm/hugetlb.h @@ -28,7 +28,7 @@ pte_t arch_make_huge_pte(pte_t entry, unsigned int shift, vm_flags_t flags); #define arch_make_huge_pte arch_make_huge_pte #define __HAVE_ARCH_HUGE_SET_HUGE_PTE_AT extern void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, - pte_t *ptep, pte_t pte); + pte_t *ptep, pte_t pte, unsigned long sz); #define __HAVE_ARCH_HUGE_PTEP_SET_ACCESS_FLAGS extern int huge_ptep_set_access_flags(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep, diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c index 9c52718ea750..a7f8c8db3425 100644 --- a/arch/arm64/mm/hugetlbpage.c +++ b/arch/arm64/mm/hugetlbpage.c @@ -249,7 +249,7 @@ static inline struct folio *hugetlb_swap_entry_to_folio(swp_entry_t entry) } void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, - pte_t *ptep, pte_t pte) + pte_t *ptep, pte_t pte, unsigned long sz) { size_t pgsize; int i; @@ -571,5 +571,7 @@ pte_t huge_ptep_modify_prot_start(struct vm_area_struct *vma, unsigned long addr void huge_ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep, pte_t old_pte, pte_t pte) { - set_huge_pte_at(vma->vm_mm, addr, ptep, pte); + unsigned long psize = huge_page_size(hstate_vma(vma)); + + set_huge_pte_at(vma->vm_mm, addr, ptep, pte, psize); } diff --git a/arch/parisc/include/asm/hugetlb.h b/arch/parisc/include/asm/hugetlb.h index f7f078c2872c..72daacc472a0 100644 --- a/arch/parisc/include/asm/hugetlb.h +++ b/arch/parisc/include/asm/hugetlb.h @@ -6,7 +6,7 @@ #define __HAVE_ARCH_HUGE_SET_HUGE_PTE_AT void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, - pte_t *ptep, pte_t pte); + pte_t *ptep, pte_t pte, unsigned long sz); #define __HAVE_ARCH_HUGE_PTEP_GET_AND_CLEAR pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, diff --git a/arch/parisc/mm/hugetlbpage.c b/arch/parisc/mm/hugetlbpage.c index a8a1a7c1e16e..a9f7e21f6656 100644 --- a/arch/parisc/mm/hugetlbpage.c +++ b/arch/parisc/mm/hugetlbpage.c @@ -140,7 +140,7 @@ static void __set_huge_pte_at(struct mm_struct *mm, unsigned long addr, } void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, - pte_t *ptep, pte_t entry) + pte_t *ptep, pte_t entry, unsigned long sz) { __set_huge_pte_at(mm, addr, ptep, entry); } diff --git a/arch/powerpc/include/asm/nohash/32/hugetlb-8xx.h b/arch/powerpc/include/asm/nohash/32/hugetlb-8xx.h index de092b04ee1a..92df40c6cc6b 100644 --- a/arch/powerpc/include/asm/nohash/32/hugetlb-8xx.h +++ b/arch/powerpc/include/asm/nohash/32/hugetlb-8xx.h @@ -46,7 +46,8 @@ static inline int check_and_get_huge_psize(int shift) } #define __HAVE_ARCH_HUGE_SET_HUGE_PTE_AT -void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte); +void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, + pte_t pte, unsigned long sz); #define __HAVE_ARCH_HUGE_PTE_CLEAR static inline void huge_pte_clear(struct mm_struct *mm, unsigned long addr, diff --git a/arch/powerpc/mm/book3s64/hugetlbpage.c b/arch/powerpc/mm/book3s64/hugetlbpage.c index 3bc0eb21b2a0..5a2e512e96db 100644 --- a/arch/powerpc/mm/book3s64/hugetlbpage.c +++ b/arch/powerpc/mm/book3s64/hugetlbpage.c @@ -143,11 +143,14 @@ pte_t huge_ptep_modify_prot_start(struct vm_area_struct *vma, void huge_ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep, pte_t old_pte, pte_t pte) { + unsigned long psize; if (radix_enabled()) return radix__huge_ptep_modify_prot_commit(vma, addr, ptep, old_pte, pte); - set_huge_pte_at(vma->vm_mm, addr, ptep, pte); + + psize = huge_page_size(hstate_vma(vma)); + set_huge_pte_at(vma->vm_mm, addr, ptep, pte, psize); } void __init hugetlbpage_init_defaultsize(void) diff --git a/arch/powerpc/mm/book3s64/radix_hugetlbpage.c b/arch/powerpc/mm/book3s64/radix_hugetlbpage.c index 17075c78d4bc..35fd2a95be24 100644 --- a/arch/powerpc/mm/book3s64/radix_hugetlbpage.c +++ b/arch/powerpc/mm/book3s64/radix_hugetlbpage.c @@ -47,6 +47,7 @@ void radix__huge_ptep_modify_prot_commit(struct vm_area_struct *vma, pte_t old_pte, pte_t pte) { struct mm_struct *mm = vma->vm_mm; + unsigned long psize = huge_page_size(hstate_vma(vma)); /* * POWER9 NMMU must flush the TLB after clearing the PTE before @@ -58,5 +59,5 @@ void radix__huge_ptep_modify_prot_commit(struct vm_area_struct *vma, atomic_read(&mm->context.copros) > 0) radix__flush_hugetlb_page(vma, addr); - set_huge_pte_at(vma->vm_mm, addr, ptep, pte); + set_huge_pte_at(vma->vm_mm, addr, ptep, pte, psize); } diff --git a/arch/powerpc/mm/nohash/8xx.c b/arch/powerpc/mm/nohash/8xx.c index dbbfe897455d..a642a7929892 100644 --- a/arch/powerpc/mm/nohash/8xx.c +++ b/arch/powerpc/mm/nohash/8xx.c @@ -91,7 +91,8 @@ static int __ref __early_map_kernel_hugepage(unsigned long va, phys_addr_t pa, if (new && WARN_ON(pte_present(*ptep) && pgprot_val(prot))) return -EINVAL; - set_huge_pte_at(&init_mm, va, ptep, pte_mkhuge(pfn_pte(pa >> PAGE_SHIFT, prot))); + set_huge_pte_at(&init_mm, va, ptep, + pte_mkhuge(pfn_pte(pa >> PAGE_SHIFT, prot)), psize); return 0; } diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c index 3f86fd217690..3ba9fe411604 100644 --- a/arch/powerpc/mm/pgtable.c +++ b/arch/powerpc/mm/pgtable.c @@ -288,7 +288,8 @@ int huge_ptep_set_access_flags(struct vm_area_struct *vma, } #if defined(CONFIG_PPC_8xx) -void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte) +void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, + pte_t pte, unsigned long sz) { pmd_t *pmd = pmd_off(mm, addr); pte_basic_t val; diff --git a/arch/riscv/include/asm/hugetlb.h b/arch/riscv/include/asm/hugetlb.h index 34e24f078cc1..4c5b0e929890 100644 --- a/arch/riscv/include/asm/hugetlb.h +++ b/arch/riscv/include/asm/hugetlb.h @@ -18,7 +18,8 @@ void huge_pte_clear(struct mm_struct *mm, unsigned long addr, #define __HAVE_ARCH_HUGE_SET_HUGE_PTE_AT void set_huge_pte_at(struct mm_struct *mm, - unsigned long addr, pte_t *ptep, pte_t pte); + unsigned long addr, pte_t *ptep, pte_t pte, + unsigned long sz); #define __HAVE_ARCH_HUGE_PTEP_GET_AND_CLEAR pte_t huge_ptep_get_and_clear(struct mm_struct *mm, diff --git a/arch/riscv/mm/hugetlbpage.c b/arch/riscv/mm/hugetlbpage.c index 96225a8533ad..e4a2ace92dbe 100644 --- a/arch/riscv/mm/hugetlbpage.c +++ b/arch/riscv/mm/hugetlbpage.c @@ -180,7 +180,8 @@ pte_t arch_make_huge_pte(pte_t entry, unsigned int shift, vm_flags_t flags) void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, - pte_t pte) + pte_t pte, + unsigned long sz) { int i, pte_num; diff --git a/arch/s390/include/asm/hugetlb.h b/arch/s390/include/asm/hugetlb.h index f07267875a19..deb198a61039 100644 --- a/arch/s390/include/asm/hugetlb.h +++ b/arch/s390/include/asm/hugetlb.h @@ -16,6 +16,8 @@ #define hugepages_supported() (MACHINE_HAS_EDAT1) void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pte, unsigned long sz); +void __set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte); pte_t huge_ptep_get(pte_t *ptep); pte_t huge_ptep_get_and_clear(struct mm_struct *mm, @@ -65,7 +67,7 @@ static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma, int changed = !pte_same(huge_ptep_get(ptep), pte); if (changed) { huge_ptep_get_and_clear(vma->vm_mm, addr, ptep); - set_huge_pte_at(vma->vm_mm, addr, ptep, pte); + __set_huge_pte_at(vma->vm_mm, addr, ptep, pte); } return changed; } @@ -74,7 +76,7 @@ static inline void huge_ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { pte_t pte = huge_ptep_get_and_clear(mm, addr, ptep); - set_huge_pte_at(mm, addr, ptep, pte_wrprotect(pte)); + __set_huge_pte_at(mm, addr, ptep, pte_wrprotect(pte)); } static inline pte_t mk_huge_pte(struct page *page, pgprot_t pgprot) diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c index c718f2a0de94..297a6d897d5a 100644 --- a/arch/s390/mm/hugetlbpage.c +++ b/arch/s390/mm/hugetlbpage.c @@ -142,7 +142,7 @@ static void clear_huge_pte_skeys(struct mm_struct *mm, unsigned long rste) __storage_key_init_range(paddr, paddr + size - 1); } -void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, +void __set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte) { unsigned long rste; @@ -163,6 +163,12 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, set_pte(ptep, __pte(rste)); } +void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pte, unsigned long sz) +{ + __set_huge_pte_at(mm, addr, ptep, pte); +} + pte_t huge_ptep_get(pte_t *ptep) { return __rste_to_pte(pte_val(*ptep)); diff --git a/arch/sparc/include/asm/hugetlb.h b/arch/sparc/include/asm/hugetlb.h index 0a26cca24232..c714ca6a05aa 100644 --- a/arch/sparc/include/asm/hugetlb.h +++ b/arch/sparc/include/asm/hugetlb.h @@ -14,6 +14,8 @@ extern struct pud_huge_patch_entry __pud_huge_patch, __pud_huge_patch_end; #define __HAVE_ARCH_HUGE_SET_HUGE_PTE_AT void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pte, unsigned long sz); +void __set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte); #define __HAVE_ARCH_HUGE_PTEP_GET_AND_CLEAR @@ -32,7 +34,7 @@ static inline void huge_ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { pte_t old_pte = *ptep; - set_huge_pte_at(mm, addr, ptep, pte_wrprotect(old_pte)); + __set_huge_pte_at(mm, addr, ptep, pte_wrprotect(old_pte)); } #define __HAVE_ARCH_HUGE_PTEP_SET_ACCESS_FLAGS @@ -42,7 +44,7 @@ static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma, { int changed = !pte_same(*ptep, pte); if (changed) { - set_huge_pte_at(vma->vm_mm, addr, ptep, pte); + __set_huge_pte_at(vma->vm_mm, addr, ptep, pte); flush_tlb_page(vma, addr); } return changed; diff --git a/arch/sparc/mm/hugetlbpage.c b/arch/sparc/mm/hugetlbpage.c index d7018823206c..b432500c13a5 100644 --- a/arch/sparc/mm/hugetlbpage.c +++ b/arch/sparc/mm/hugetlbpage.c @@ -328,7 +328,7 @@ pte_t *huge_pte_offset(struct mm_struct *mm, return pte_offset_huge(pmd, addr); } -void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, +void __set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t entry) { unsigned int nptes, orig_shift, shift; @@ -364,6 +364,12 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, orig_shift); } +void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t entry, unsigned long sz) +{ + __set_huge_pte_at(mm, addr, ptep, entry); +} + pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { diff --git a/include/asm-generic/hugetlb.h b/include/asm-generic/hugetlb.h index 4da02798a00b..6dcf4d576970 100644 --- a/include/asm-generic/hugetlb.h +++ b/include/asm-generic/hugetlb.h @@ -76,7 +76,7 @@ static inline void hugetlb_free_pgd_range(struct mmu_gather *tlb, #ifndef __HAVE_ARCH_HUGE_SET_HUGE_PTE_AT static inline void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, - pte_t *ptep, pte_t pte) + pte_t *ptep, pte_t pte, unsigned long sz) { set_pte_at(mm, addr, ptep, pte); } diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 5b2626063f4f..a30686e649f7 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -984,7 +984,9 @@ static inline void huge_ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep, pte_t old_pte, pte_t pte) { - set_huge_pte_at(vma->vm_mm, addr, ptep, pte); + unsigned long psize = huge_page_size(hstate_vma(vma)); + + set_huge_pte_at(vma->vm_mm, addr, ptep, pte, psize); } #endif @@ -1173,7 +1175,7 @@ static inline pte_t huge_ptep_clear_flush(struct vm_area_struct *vma, } static inline void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, - pte_t *ptep, pte_t pte) + pte_t *ptep, pte_t pte, unsigned long sz) { } diff --git a/mm/damon/vaddr.c b/mm/damon/vaddr.c index 4c81a9dbd044..cf8a9fc5c9d1 100644 --- a/mm/damon/vaddr.c +++ b/mm/damon/vaddr.c @@ -341,13 +341,14 @@ static void damon_hugetlb_mkold(pte_t *pte, struct mm_struct *mm, bool referenced = false; pte_t entry = huge_ptep_get(pte); struct folio *folio = pfn_folio(pte_pfn(entry)); + unsigned long psize = huge_page_size(hstate_vma(vma)); folio_get(folio); if (pte_young(entry)) { referenced = true; entry = pte_mkold(entry); - set_huge_pte_at(mm, addr, pte, entry); + set_huge_pte_at(mm, addr, pte, entry, psize); } #ifdef CONFIG_MMU_NOTIFIER diff --git a/mm/hugetlb.c b/mm/hugetlb.c index ba6d39b71cb1..52d26072dfda 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -4980,7 +4980,7 @@ static bool is_hugetlb_entry_hwpoisoned(pte_t pte) static void hugetlb_install_folio(struct vm_area_struct *vma, pte_t *ptep, unsigned long addr, - struct folio *new_folio, pte_t old) + struct folio *new_folio, pte_t old, unsigned long sz) { pte_t newpte = make_huge_pte(vma, &new_folio->page, 1); @@ -4988,7 +4988,7 @@ hugetlb_install_folio(struct vm_area_struct *vma, pte_t *ptep, unsigned long add hugepage_add_new_anon_rmap(new_folio, vma, addr); if (userfaultfd_wp(vma) && huge_pte_uffd_wp(old)) newpte = huge_pte_mkuffd_wp(newpte); - set_huge_pte_at(vma->vm_mm, addr, ptep, newpte); + set_huge_pte_at(vma->vm_mm, addr, ptep, newpte, sz); hugetlb_count_add(pages_per_huge_page(hstate_vma(vma)), vma->vm_mm); folio_set_hugetlb_migratable(new_folio); } @@ -5065,7 +5065,7 @@ again: } else if (unlikely(is_hugetlb_entry_hwpoisoned(entry))) { if (!userfaultfd_wp(dst_vma)) entry = huge_pte_clear_uffd_wp(entry); - set_huge_pte_at(dst, addr, dst_pte, entry); + set_huge_pte_at(dst, addr, dst_pte, entry, sz); } else if (unlikely(is_hugetlb_entry_migration(entry))) { swp_entry_t swp_entry = pte_to_swp_entry(entry); bool uffd_wp = pte_swp_uffd_wp(entry); @@ -5080,18 +5080,18 @@ again: entry = swp_entry_to_pte(swp_entry); if (userfaultfd_wp(src_vma) && uffd_wp) entry = pte_swp_mkuffd_wp(entry); - set_huge_pte_at(src, addr, src_pte, entry); + set_huge_pte_at(src, addr, src_pte, entry, sz); } if (!userfaultfd_wp(dst_vma)) entry = huge_pte_clear_uffd_wp(entry); - set_huge_pte_at(dst, addr, dst_pte, entry); + set_huge_pte_at(dst, addr, dst_pte, entry, sz); } else if (unlikely(is_pte_marker(entry))) { pte_marker marker = copy_pte_marker( pte_to_swp_entry(entry), dst_vma); if (marker) set_huge_pte_at(dst, addr, dst_pte, - make_pte_marker(marker)); + make_pte_marker(marker), sz); } else { entry = huge_ptep_get(src_pte); pte_folio = page_folio(pte_page(entry)); @@ -5145,7 +5145,7 @@ again: goto again; } hugetlb_install_folio(dst_vma, dst_pte, addr, - new_folio, src_pte_old); + new_folio, src_pte_old, sz); spin_unlock(src_ptl); spin_unlock(dst_ptl); continue; @@ -5166,7 +5166,7 @@ again: if (!userfaultfd_wp(dst_vma)) entry = huge_pte_clear_uffd_wp(entry); - set_huge_pte_at(dst, addr, dst_pte, entry); + set_huge_pte_at(dst, addr, dst_pte, entry, sz); hugetlb_count_add(npages, dst); } spin_unlock(src_ptl); @@ -5184,7 +5184,8 @@ again: } static void move_huge_pte(struct vm_area_struct *vma, unsigned long old_addr, - unsigned long new_addr, pte_t *src_pte, pte_t *dst_pte) + unsigned long new_addr, pte_t *src_pte, pte_t *dst_pte, + unsigned long sz) { struct hstate *h = hstate_vma(vma); struct mm_struct *mm = vma->vm_mm; @@ -5202,7 +5203,7 @@ static void move_huge_pte(struct vm_area_struct *vma, unsigned long old_addr, spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING); pte = huge_ptep_get_and_clear(mm, old_addr, src_pte); - set_huge_pte_at(mm, new_addr, dst_pte, pte); + set_huge_pte_at(mm, new_addr, dst_pte, pte, sz); if (src_ptl != dst_ptl) spin_unlock(src_ptl); @@ -5259,7 +5260,7 @@ int move_hugetlb_page_tables(struct vm_area_struct *vma, if (!dst_pte) break; - move_huge_pte(vma, old_addr, new_addr, src_pte, dst_pte); + move_huge_pte(vma, old_addr, new_addr, src_pte, dst_pte, sz); } if (shared_pmd) @@ -5337,7 +5338,8 @@ static void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct if (pte_swp_uffd_wp_any(pte) && !(zap_flags & ZAP_FLAG_DROP_MARKER)) set_huge_pte_at(mm, address, ptep, - make_pte_marker(PTE_MARKER_UFFD_WP)); + make_pte_marker(PTE_MARKER_UFFD_WP), + sz); else huge_pte_clear(mm, address, ptep, sz); spin_unlock(ptl); @@ -5371,7 +5373,8 @@ static void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct if (huge_pte_uffd_wp(pte) && !(zap_flags & ZAP_FLAG_DROP_MARKER)) set_huge_pte_at(mm, address, ptep, - make_pte_marker(PTE_MARKER_UFFD_WP)); + make_pte_marker(PTE_MARKER_UFFD_WP), + sz); hugetlb_count_sub(pages_per_huge_page(h), mm); page_remove_rmap(page, vma, true); @@ -5676,7 +5679,7 @@ retry_avoidcopy: hugepage_add_new_anon_rmap(new_folio, vma, haddr); if (huge_pte_uffd_wp(pte)) newpte = huge_pte_mkuffd_wp(newpte); - set_huge_pte_at(mm, haddr, ptep, newpte); + set_huge_pte_at(mm, haddr, ptep, newpte, huge_page_size(h)); folio_set_hugetlb_migratable(new_folio); /* Make the old page be freed below */ new_folio = old_folio; @@ -5972,7 +5975,7 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm, */ if (unlikely(pte_marker_uffd_wp(old_pte))) new_pte = huge_pte_mkuffd_wp(new_pte); - set_huge_pte_at(mm, haddr, ptep, new_pte); + set_huge_pte_at(mm, haddr, ptep, new_pte, huge_page_size(h)); hugetlb_count_add(pages_per_huge_page(h), mm); if ((flags & FAULT_FLAG_WRITE) && !(vma->vm_flags & VM_SHARED)) { @@ -6261,7 +6264,8 @@ int hugetlb_mfill_atomic_pte(pte_t *dst_pte, } _dst_pte = make_pte_marker(PTE_MARKER_POISONED); - set_huge_pte_at(dst_mm, dst_addr, dst_pte, _dst_pte); + set_huge_pte_at(dst_mm, dst_addr, dst_pte, _dst_pte, + huge_page_size(h)); /* No need to invalidate - it was non-present before */ update_mmu_cache(dst_vma, dst_addr, dst_pte); @@ -6412,7 +6416,7 @@ int hugetlb_mfill_atomic_pte(pte_t *dst_pte, if (wp_enabled) _dst_pte = huge_pte_mkuffd_wp(_dst_pte); - set_huge_pte_at(dst_mm, dst_addr, dst_pte, _dst_pte); + set_huge_pte_at(dst_mm, dst_addr, dst_pte, _dst_pte, huge_page_size(h)); hugetlb_count_add(pages_per_huge_page(h), dst_mm); @@ -6598,7 +6602,7 @@ long hugetlb_change_protection(struct vm_area_struct *vma, else if (uffd_wp_resolve) newpte = pte_swp_clear_uffd_wp(newpte); if (!pte_same(pte, newpte)) - set_huge_pte_at(mm, address, ptep, newpte); + set_huge_pte_at(mm, address, ptep, newpte, psize); } else if (unlikely(is_pte_marker(pte))) { /* No other markers apply for now. */ WARN_ON_ONCE(!pte_marker_uffd_wp(pte)); @@ -6623,7 +6627,8 @@ long hugetlb_change_protection(struct vm_area_struct *vma, if (unlikely(uffd_wp)) /* Safe to modify directly (none->non-present). */ set_huge_pte_at(mm, address, ptep, - make_pte_marker(PTE_MARKER_UFFD_WP)); + make_pte_marker(PTE_MARKER_UFFD_WP), + psize); } spin_unlock(ptl); } diff --git a/mm/migrate.c b/mm/migrate.c index b7fa020003f3..2053b54556ca 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -243,7 +243,9 @@ static bool remove_migration_pte(struct folio *folio, #ifdef CONFIG_HUGETLB_PAGE if (folio_test_hugetlb(folio)) { - unsigned int shift = huge_page_shift(hstate_vma(vma)); + struct hstate *h = hstate_vma(vma); + unsigned int shift = huge_page_shift(h); + unsigned long psize = huge_page_size(h); pte = arch_make_huge_pte(pte, shift, vma->vm_flags); if (folio_test_anon(folio)) @@ -251,7 +253,8 @@ static bool remove_migration_pte(struct folio *folio, rmap_flags); else page_dup_file_rmap(new, true); - set_huge_pte_at(vma->vm_mm, pvmw.address, pvmw.pte, pte); + set_huge_pte_at(vma->vm_mm, pvmw.address, pvmw.pte, pte, + psize); } else #endif { diff --git a/mm/rmap.c b/mm/rmap.c index ec7f8e6c9e48..9f795b93cf40 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -1480,6 +1480,7 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma, struct mmu_notifier_range range; enum ttu_flags flags = (enum ttu_flags)(long)arg; unsigned long pfn; + unsigned long hsz = 0; /* * When racing against e.g. zap_pte_range() on another cpu, @@ -1511,6 +1512,9 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma, */ adjust_range_if_pmd_sharing_possible(vma, &range.start, &range.end); + + /* We need the huge page size for set_huge_pte_at() */ + hsz = huge_page_size(hstate_vma(vma)); } mmu_notifier_invalidate_range_start(&range); @@ -1628,7 +1632,8 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma, pteval = swp_entry_to_pte(make_hwpoison_entry(subpage)); if (folio_test_hugetlb(folio)) { hugetlb_count_sub(folio_nr_pages(folio), mm); - set_huge_pte_at(mm, address, pvmw.pte, pteval); + set_huge_pte_at(mm, address, pvmw.pte, pteval, + hsz); } else { dec_mm_counter(mm, mm_counter(&folio->page)); set_pte_at(mm, address, pvmw.pte, pteval); @@ -1820,6 +1825,7 @@ static bool try_to_migrate_one(struct folio *folio, struct vm_area_struct *vma, struct mmu_notifier_range range; enum ttu_flags flags = (enum ttu_flags)(long)arg; unsigned long pfn; + unsigned long hsz = 0; /* * When racing against e.g. zap_pte_range() on another cpu, @@ -1855,6 +1861,9 @@ static bool try_to_migrate_one(struct folio *folio, struct vm_area_struct *vma, */ adjust_range_if_pmd_sharing_possible(vma, &range.start, &range.end); + + /* We need the huge page size for set_huge_pte_at() */ + hsz = huge_page_size(hstate_vma(vma)); } mmu_notifier_invalidate_range_start(&range); @@ -2020,7 +2029,8 @@ static bool try_to_migrate_one(struct folio *folio, struct vm_area_struct *vma, pteval = swp_entry_to_pte(make_hwpoison_entry(subpage)); if (folio_test_hugetlb(folio)) { hugetlb_count_sub(folio_nr_pages(folio), mm); - set_huge_pte_at(mm, address, pvmw.pte, pteval); + set_huge_pte_at(mm, address, pvmw.pte, pteval, + hsz); } else { dec_mm_counter(mm, mm_counter(&folio->page)); set_pte_at(mm, address, pvmw.pte, pteval); @@ -2044,7 +2054,8 @@ static bool try_to_migrate_one(struct folio *folio, struct vm_area_struct *vma, if (arch_unmap_one(mm, vma, address, pteval) < 0) { if (folio_test_hugetlb(folio)) - set_huge_pte_at(mm, address, pvmw.pte, pteval); + set_huge_pte_at(mm, address, pvmw.pte, + pteval, hsz); else set_pte_at(mm, address, pvmw.pte, pteval); ret = false; @@ -2058,7 +2069,8 @@ static bool try_to_migrate_one(struct folio *folio, struct vm_area_struct *vma, if (anon_exclusive && page_try_share_anon_rmap(subpage)) { if (folio_test_hugetlb(folio)) - set_huge_pte_at(mm, address, pvmw.pte, pteval); + set_huge_pte_at(mm, address, pvmw.pte, + pteval, hsz); else set_pte_at(mm, address, pvmw.pte, pteval); ret = false; @@ -2090,7 +2102,8 @@ static bool try_to_migrate_one(struct folio *folio, struct vm_area_struct *vma, if (pte_uffd_wp(pteval)) swp_pte = pte_swp_mkuffd_wp(swp_pte); if (folio_test_hugetlb(folio)) - set_huge_pte_at(mm, address, pvmw.pte, swp_pte); + set_huge_pte_at(mm, address, pvmw.pte, swp_pte, + hsz); else set_pte_at(mm, address, pvmw.pte, swp_pte); trace_set_migration_pte(address, pte_val(swp_pte), diff --git a/mm/vmalloc.c b/mm/vmalloc.c index ef8599d394fd..a3fedb3ee0db 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -111,7 +111,7 @@ static int vmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, pte_t entry = pfn_pte(pfn, prot); entry = arch_make_huge_pte(entry, ilog2(size), 0); - set_huge_pte_at(&init_mm, addr, pte, entry); + set_huge_pte_at(&init_mm, addr, pte, entry, size); pfn += PFN_DOWN(size); continue; } -- cgit v1.2.3 From 348cbf987ed328682af7d74ce98d9f0e6857f42f Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Sat, 30 Sep 2023 09:20:20 -0700 Subject: Input: mt - annotate struct input_mt with __counted_by Prepare for the coming implementation by GCC and Clang of the __counted_by attribute. Flexible array members annotated with __counted_by can have their accesses bounds-checked at run-time checking via CONFIG_UBSAN_BOUNDS (for array indexing) and CONFIG_FORTIFY_SOURCE (for strcpy/memcpy-family functions). As found with Coccinelle[1], add __counted_by for struct input_mt. [1] https://github.com/kees/kernel-tools/blob/trunk/coccinelle/examples/counted_by.cocci Signed-off-by: Kees Cook Reviewed-by: Gustavo A. R. Silva Link: https://lore.kernel.org/r/20230922175036.work.762-kees@kernel.org Signed-off-by: Dmitry Torokhov --- include/linux/input/mt.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/input/mt.h b/include/linux/input/mt.h index 3b8580bd33c1..2cf89a538b18 100644 --- a/include/linux/input/mt.h +++ b/include/linux/input/mt.h @@ -47,7 +47,7 @@ struct input_mt { unsigned int flags; unsigned int frame; int *red; - struct input_mt_slot slots[]; + struct input_mt_slot slots[] __counted_by(num_slots); }; static inline void input_mt_set_value(struct input_mt_slot *slot, -- cgit v1.2.3 From fce9c967820a72f600abbf061d7077861685a14d Mon Sep 17 00:00:00 2001 From: Ingo Rohloff Date: Sat, 26 Aug 2023 22:02:41 +0200 Subject: wifi: mt76: mt7921e: Support MT7992 IP in Xiaomi Redmibook 15 Pro (2023) In the Xiaomi Redmibook 15 Pro (2023) laptop I have got, a wifi chip is used, which according to its PCI Vendor ID is from "ITTIM Technology". This chip works flawlessly with the mt7921e module. The driver doesn't bind to this PCI device, because the Vendor ID from "ITTIM Technology" is not recognized. This patch adds the PCI Vendor ID from "ITTIM Technology" to the list of PCI Vendor IDs and lets the mt7921e driver bind to the mentioned wifi chip. Signed-off-by: Ingo Rohloff Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mt7921/pci.c | 2 ++ include/linux/pci_ids.h | 2 ++ 2 files changed, 4 insertions(+) (limited to 'include/linux') diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/pci.c b/drivers/net/wireless/mediatek/mt76/mt7921/pci.c index 3dda84a93717..f04e7095e181 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7921/pci.c +++ b/drivers/net/wireless/mediatek/mt76/mt7921/pci.c @@ -17,6 +17,8 @@ static const struct pci_device_id mt7921_pci_device_table[] = { .driver_data = (kernel_ulong_t)MT7921_FIRMWARE_WM }, { PCI_DEVICE(PCI_VENDOR_ID_MEDIATEK, 0x7922), .driver_data = (kernel_ulong_t)MT7922_FIRMWARE_WM }, + { PCI_DEVICE(PCI_VENDOR_ID_ITTIM, 0x7922), + .driver_data = (kernel_ulong_t)MT7922_FIRMWARE_WM }, { PCI_DEVICE(PCI_VENDOR_ID_MEDIATEK, 0x0608), .driver_data = (kernel_ulong_t)MT7921_FIRMWARE_WM }, { PCI_DEVICE(PCI_VENDOR_ID_MEDIATEK, 0x0616), diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 2dc75df1437f..6ae1803bcd2f 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -180,6 +180,8 @@ #define PCI_DEVICE_ID_BERKOM_A4T 0xffa4 #define PCI_DEVICE_ID_BERKOM_SCITEL_QUADRO 0xffa8 +#define PCI_VENDOR_ID_ITTIM 0x0b48 + #define PCI_VENDOR_ID_COMPAQ 0x0e11 #define PCI_DEVICE_ID_COMPAQ_TOKENRING 0x0508 #define PCI_DEVICE_ID_COMPAQ_TACHYON 0xa0fc -- cgit v1.2.3 From 0b068c714ca9479d2783cc333fff5bc2d4a6d45c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 21 Sep 2023 08:52:16 +0000 Subject: net: add DEV_STATS_READ() helper Companion of DEV_STATS_INC() & DEV_STATS_ADD(). This is going to be used in the series. Use it in macsec_get_stats64(). Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- drivers/net/macsec.c | 6 +++--- include/linux/netdevice.h | 1 + 2 files changed, 4 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index b7e151439c48..7a44e1cbe305 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -3655,9 +3655,9 @@ static void macsec_get_stats64(struct net_device *dev, dev_fetch_sw_netstats(s, dev->tstats); - s->rx_dropped = atomic_long_read(&dev->stats.__rx_dropped); - s->tx_dropped = atomic_long_read(&dev->stats.__tx_dropped); - s->rx_errors = atomic_long_read(&dev->stats.__rx_errors); + s->rx_dropped = DEV_STATS_READ(dev, rx_dropped); + s->tx_dropped = DEV_STATS_READ(dev, tx_dropped); + s->rx_errors = DEV_STATS_READ(dev, rx_errors); } static int macsec_get_iflink(const struct net_device *dev) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 7e520c14eb8c..e070a4540fba 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -5236,5 +5236,6 @@ extern struct net_device *blackhole_netdev; #define DEV_STATS_INC(DEV, FIELD) atomic_long_inc(&(DEV)->stats.__##FIELD) #define DEV_STATS_ADD(DEV, FIELD, VAL) \ atomic_long_add((VAL), &(DEV)->stats.__##FIELD) +#define DEV_STATS_READ(DEV, FIELD) atomic_long_read(&(DEV)->stats.__##FIELD) #endif /* _LINUX_NETDEVICE_H */ -- cgit v1.2.3 From fa4c4507099f781ca89a748c480af9cf97629726 Mon Sep 17 00:00:00 2001 From: Niklas Schnelle Date: Thu, 28 Sep 2023 16:31:35 +0200 Subject: iommu: Allow .iotlb_sync_map to fail and handle s390's -ENOMEM return On s390 when using a paging hypervisor, .iotlb_sync_map is used to sync mappings by letting the hypervisor inspect the synced IOVA range and updating a shadow table. This however means that .iotlb_sync_map can fail as the hypervisor may run out of resources while doing the sync. This can be due to the hypervisor being unable to pin guest pages, due to a limit on mapped addresses such as vfio_iommu_type1.dma_entry_limit or lack of other resources. Either way such a failure to sync a mapping should result in a DMA_MAPPING_ERROR. Now especially when running with batched IOTLB flushes for unmap it may be that some IOVAs have already been invalidated but not yet synced via .iotlb_sync_map. Thus if the hypervisor indicates running out of resources, first do a global flush allowing the hypervisor to free resources associated with these mappings as well a retry creating the new mappings and only if that also fails report this error to callers. Reviewed-by: Lu Baolu Reviewed-by: Matthew Rosato Acked-by: Jernej Skrabec # sun50i Signed-off-by: Niklas Schnelle Link: https://lore.kernel.org/r/20230928-dma_iommu-v13-1-9e5fc4dacc36@linux.ibm.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/iommu.c | 5 +++-- drivers/iommu/apple-dart.c | 5 +++-- drivers/iommu/intel/iommu.c | 5 +++-- drivers/iommu/iommu.c | 20 ++++++++++++++++---- drivers/iommu/msm_iommu.c | 5 +++-- drivers/iommu/mtk_iommu.c | 5 +++-- drivers/iommu/s390-iommu.c | 29 +++++++++++++++++++++++------ drivers/iommu/sprd-iommu.c | 5 +++-- drivers/iommu/sun50i-iommu.c | 6 ++++-- include/linux/iommu.h | 4 ++-- 10 files changed, 63 insertions(+), 26 deletions(-) (limited to 'include/linux') diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 95bd7c25ba6f..74229b1607a1 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -2233,14 +2233,15 @@ static int amd_iommu_attach_device(struct iommu_domain *dom, return ret; } -static void amd_iommu_iotlb_sync_map(struct iommu_domain *dom, - unsigned long iova, size_t size) +static int amd_iommu_iotlb_sync_map(struct iommu_domain *dom, + unsigned long iova, size_t size) { struct protection_domain *domain = to_pdomain(dom); struct io_pgtable_ops *ops = &domain->iop.iop.ops; if (ops->map_pages) domain_flush_np_cache(domain, iova, size); + return 0; } static int amd_iommu_map_pages(struct iommu_domain *dom, unsigned long iova, diff --git a/drivers/iommu/apple-dart.c b/drivers/iommu/apple-dart.c index 2082081402d3..22880a42ccaa 100644 --- a/drivers/iommu/apple-dart.c +++ b/drivers/iommu/apple-dart.c @@ -506,10 +506,11 @@ static void apple_dart_iotlb_sync(struct iommu_domain *domain, apple_dart_domain_flush_tlb(to_dart_domain(domain)); } -static void apple_dart_iotlb_sync_map(struct iommu_domain *domain, - unsigned long iova, size_t size) +static int apple_dart_iotlb_sync_map(struct iommu_domain *domain, + unsigned long iova, size_t size) { apple_dart_domain_flush_tlb(to_dart_domain(domain)); + return 0; } static phys_addr_t apple_dart_iova_to_phys(struct iommu_domain *domain, diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 5db283c17e0d..477937513502 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -4678,8 +4678,8 @@ static bool risky_device(struct pci_dev *pdev) return false; } -static void intel_iommu_iotlb_sync_map(struct iommu_domain *domain, - unsigned long iova, size_t size) +static int intel_iommu_iotlb_sync_map(struct iommu_domain *domain, + unsigned long iova, size_t size) { struct dmar_domain *dmar_domain = to_dmar_domain(domain); unsigned long pages = aligned_nrpages(iova, size); @@ -4689,6 +4689,7 @@ static void intel_iommu_iotlb_sync_map(struct iommu_domain *domain, xa_for_each(&dmar_domain->iommu_array, i, info) __mapping_notify_one(info->iommu, dmar_domain, pfn, pages); + return 0; } static void intel_iommu_remove_dev_pasid(struct device *dev, ioasid_t pasid) diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 1ecac2b5c54f..1cc9d139c860 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -2585,8 +2585,17 @@ int iommu_map(struct iommu_domain *domain, unsigned long iova, return -EINVAL; ret = __iommu_map(domain, iova, paddr, size, prot, gfp); - if (ret == 0 && ops->iotlb_sync_map) - ops->iotlb_sync_map(domain, iova, size); + if (ret == 0 && ops->iotlb_sync_map) { + ret = ops->iotlb_sync_map(domain, iova, size); + if (ret) + goto out_err; + } + + return ret; + +out_err: + /* undo mappings already done */ + iommu_unmap(domain, iova, size); return ret; } @@ -2714,8 +2723,11 @@ next: sg = sg_next(sg); } - if (ops->iotlb_sync_map) - ops->iotlb_sync_map(domain, iova, mapped); + if (ops->iotlb_sync_map) { + ret = ops->iotlb_sync_map(domain, iova, mapped); + if (ret) + goto out_err; + } return mapped; out_err: diff --git a/drivers/iommu/msm_iommu.c b/drivers/iommu/msm_iommu.c index a163cee0b724..f86af9815d6f 100644 --- a/drivers/iommu/msm_iommu.c +++ b/drivers/iommu/msm_iommu.c @@ -498,12 +498,13 @@ static int msm_iommu_map(struct iommu_domain *domain, unsigned long iova, return ret; } -static void msm_iommu_sync_map(struct iommu_domain *domain, unsigned long iova, - size_t size) +static int msm_iommu_sync_map(struct iommu_domain *domain, unsigned long iova, + size_t size) { struct msm_priv *priv = to_msm_priv(domain); __flush_iotlb_range(iova, size, SZ_4K, false, priv); + return 0; } static size_t msm_iommu_unmap(struct iommu_domain *domain, unsigned long iova, diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index 19ef50221c93..2d8e95246781 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -837,12 +837,13 @@ static void mtk_iommu_iotlb_sync(struct iommu_domain *domain, mtk_iommu_tlb_flush_range_sync(gather->start, length, dom->bank); } -static void mtk_iommu_sync_map(struct iommu_domain *domain, unsigned long iova, - size_t size) +static int mtk_iommu_sync_map(struct iommu_domain *domain, unsigned long iova, + size_t size) { struct mtk_iommu_domain *dom = to_mtk_domain(domain); mtk_iommu_tlb_flush_range_sync(iova, size, dom->bank); + return 0; } static phys_addr_t mtk_iommu_iova_to_phys(struct iommu_domain *domain, diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c index 5695ad71d60e..560d0957f9be 100644 --- a/drivers/iommu/s390-iommu.c +++ b/drivers/iommu/s390-iommu.c @@ -219,6 +219,12 @@ static void s390_iommu_release_device(struct device *dev) __s390_iommu_detach_device(zdev); } +static int zpci_refresh_all(struct zpci_dev *zdev) +{ + return zpci_refresh_trans((u64)zdev->fh << 32, zdev->start_dma, + zdev->end_dma - zdev->start_dma + 1); +} + static void s390_iommu_flush_iotlb_all(struct iommu_domain *domain) { struct s390_domain *s390_domain = to_s390_domain(domain); @@ -226,8 +232,7 @@ static void s390_iommu_flush_iotlb_all(struct iommu_domain *domain) rcu_read_lock(); list_for_each_entry_rcu(zdev, &s390_domain->devices, iommu_list) { - zpci_refresh_trans((u64)zdev->fh << 32, zdev->start_dma, - zdev->end_dma - zdev->start_dma + 1); + zpci_refresh_all(zdev); } rcu_read_unlock(); } @@ -251,20 +256,32 @@ static void s390_iommu_iotlb_sync(struct iommu_domain *domain, rcu_read_unlock(); } -static void s390_iommu_iotlb_sync_map(struct iommu_domain *domain, - unsigned long iova, size_t size) +static int s390_iommu_iotlb_sync_map(struct iommu_domain *domain, + unsigned long iova, size_t size) { struct s390_domain *s390_domain = to_s390_domain(domain); struct zpci_dev *zdev; + int ret = 0; rcu_read_lock(); list_for_each_entry_rcu(zdev, &s390_domain->devices, iommu_list) { if (!zdev->tlb_refresh) continue; - zpci_refresh_trans((u64)zdev->fh << 32, - iova, size); + ret = zpci_refresh_trans((u64)zdev->fh << 32, + iova, size); + /* + * let the hypervisor discover invalidated entries + * allowing it to free IOVAs and unpin pages + */ + if (ret == -ENOMEM) { + ret = zpci_refresh_all(zdev); + if (ret) + break; + } } rcu_read_unlock(); + + return ret; } static int s390_iommu_validate_trans(struct s390_domain *s390_domain, diff --git a/drivers/iommu/sprd-iommu.c b/drivers/iommu/sprd-iommu.c index 9c33ea6903f6..2eb9fb46703b 100644 --- a/drivers/iommu/sprd-iommu.c +++ b/drivers/iommu/sprd-iommu.c @@ -341,8 +341,8 @@ static size_t sprd_iommu_unmap(struct iommu_domain *domain, unsigned long iova, return size; } -static void sprd_iommu_sync_map(struct iommu_domain *domain, - unsigned long iova, size_t size) +static int sprd_iommu_sync_map(struct iommu_domain *domain, + unsigned long iova, size_t size) { struct sprd_iommu_domain *dom = to_sprd_domain(domain); unsigned int reg; @@ -354,6 +354,7 @@ static void sprd_iommu_sync_map(struct iommu_domain *domain, /* clear IOMMU TLB buffer after page table updated */ sprd_iommu_write(dom->sdev, reg, 0xffffffff); + return 0; } static void sprd_iommu_sync(struct iommu_domain *domain, diff --git a/drivers/iommu/sun50i-iommu.c b/drivers/iommu/sun50i-iommu.c index 3c834854eda1..41484a5a399b 100644 --- a/drivers/iommu/sun50i-iommu.c +++ b/drivers/iommu/sun50i-iommu.c @@ -401,8 +401,8 @@ static void sun50i_iommu_flush_iotlb_all(struct iommu_domain *domain) spin_unlock_irqrestore(&iommu->iommu_lock, flags); } -static void sun50i_iommu_iotlb_sync_map(struct iommu_domain *domain, - unsigned long iova, size_t size) +static int sun50i_iommu_iotlb_sync_map(struct iommu_domain *domain, + unsigned long iova, size_t size) { struct sun50i_iommu_domain *sun50i_domain = to_sun50i_domain(domain); struct sun50i_iommu *iommu = sun50i_domain->iommu; @@ -411,6 +411,8 @@ static void sun50i_iommu_iotlb_sync_map(struct iommu_domain *domain, spin_lock_irqsave(&iommu->iommu_lock, flags); sun50i_iommu_zap_range(iommu, iova, size); spin_unlock_irqrestore(&iommu->iommu_lock, flags); + + return 0; } static void sun50i_iommu_iotlb_sync(struct iommu_domain *domain, diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 64bd20142cbe..1eb638752781 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -350,8 +350,8 @@ struct iommu_domain_ops { struct iommu_iotlb_gather *iotlb_gather); void (*flush_iotlb_all)(struct iommu_domain *domain); - void (*iotlb_sync_map)(struct iommu_domain *domain, unsigned long iova, - size_t size); + int (*iotlb_sync_map)(struct iommu_domain *domain, unsigned long iova, + size_t size); void (*iotlb_sync)(struct iommu_domain *domain, struct iommu_iotlb_gather *iotlb_gather); -- cgit v1.2.3 From 32d5bc8b09c7cc48c511809e7c3b1755c7ecc5fa Mon Sep 17 00:00:00 2001 From: Niklas Schnelle Date: Thu, 28 Sep 2023 16:31:39 +0200 Subject: iommu/dma: Allow a single FQ in addition to per-CPU FQs In some virtualized environments, including s390 paged memory guests, IOTLB flushes are used to update IOMMU shadow tables. Due to this, they are much more expensive than in typical bare metal environments or non-paged s390 guests. In addition they may parallelize poorly in virtualized environments. This changes the trade off for flushing IOVAs such that minimizing the number of IOTLB flushes trumps any benefit of cheaper queuing operations or increased paralellism. In this scenario per-CPU flush queues pose several problems. Firstly per-CPU memory is often quite limited prohibiting larger queues. Secondly collecting IOVAs per-CPU but flushing via a global timeout reduces the number of IOVAs flushed for each timeout especially on s390 where PCI interrupts may not be bound to a specific CPU. Let's introduce a single flush queue mode that reuses the same queue logic but only allocates a single global queue. This mode is selected by dma-iommu if a newly introduced .shadow_on_flush flag is set in struct dev_iommu. As a first user the s390 IOMMU driver sets this flag during probe_device. With the unchanged small FQ size and timeouts this setting is worse than per-CPU queues but a follow up patch will make the FQ size and timeout variable. Together this allows the common IOVA flushing code to more closely resemble the global flush behavior used on s390's previous internal DMA API implementation. Link: https://lore.kernel.org/all/9a466109-01c5-96b0-bf03-304123f435ee@arm.com/ Acked-by: Robin Murphy Reviewed-by: Matthew Rosato #s390 Signed-off-by: Niklas Schnelle Link: https://lore.kernel.org/r/20230928-dma_iommu-v13-5-9e5fc4dacc36@linux.ibm.com Signed-off-by: Joerg Roedel --- drivers/iommu/dma-iommu.c | 168 ++++++++++++++++++++++++++++++++++----------- drivers/iommu/s390-iommu.c | 3 + include/linux/iommu.h | 2 + 3 files changed, 134 insertions(+), 39 deletions(-) (limited to 'include/linux') diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index 4b1a88f514c9..a85ff75ad531 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -43,14 +43,26 @@ enum iommu_dma_cookie_type { IOMMU_DMA_MSI_COOKIE, }; +enum iommu_dma_queue_type { + IOMMU_DMA_OPTS_PER_CPU_QUEUE, + IOMMU_DMA_OPTS_SINGLE_QUEUE, +}; + +struct iommu_dma_options { + enum iommu_dma_queue_type qt; +}; + struct iommu_dma_cookie { enum iommu_dma_cookie_type type; union { /* Full allocator for IOMMU_DMA_IOVA_COOKIE */ struct { struct iova_domain iovad; - - struct iova_fq __percpu *fq; /* Flush queue */ + /* Flush queue */ + union { + struct iova_fq *single_fq; + struct iova_fq __percpu *percpu_fq; + }; /* Number of TLB flushes that have been started */ atomic64_t fq_flush_start_cnt; /* Number of TLB flushes that have been finished */ @@ -67,6 +79,8 @@ struct iommu_dma_cookie { /* Domain for flush queue callback; NULL if flush queue not in use */ struct iommu_domain *fq_domain; + /* Options for dma-iommu use */ + struct iommu_dma_options options; struct mutex mutex; }; @@ -124,7 +138,7 @@ static inline unsigned int fq_ring_add(struct iova_fq *fq) return idx; } -static void fq_ring_free(struct iommu_dma_cookie *cookie, struct iova_fq *fq) +static void fq_ring_free_locked(struct iommu_dma_cookie *cookie, struct iova_fq *fq) { u64 counter = atomic64_read(&cookie->fq_flush_finish_cnt); unsigned int idx; @@ -145,6 +159,15 @@ static void fq_ring_free(struct iommu_dma_cookie *cookie, struct iova_fq *fq) } } +static void fq_ring_free(struct iommu_dma_cookie *cookie, struct iova_fq *fq) +{ + unsigned long flags; + + spin_lock_irqsave(&fq->lock, flags); + fq_ring_free_locked(cookie, fq); + spin_unlock_irqrestore(&fq->lock, flags); +} + static void fq_flush_iotlb(struct iommu_dma_cookie *cookie) { atomic64_inc(&cookie->fq_flush_start_cnt); @@ -160,14 +183,11 @@ static void fq_flush_timeout(struct timer_list *t) atomic_set(&cookie->fq_timer_on, 0); fq_flush_iotlb(cookie); - for_each_possible_cpu(cpu) { - unsigned long flags; - struct iova_fq *fq; - - fq = per_cpu_ptr(cookie->fq, cpu); - spin_lock_irqsave(&fq->lock, flags); - fq_ring_free(cookie, fq); - spin_unlock_irqrestore(&fq->lock, flags); + if (cookie->options.qt == IOMMU_DMA_OPTS_SINGLE_QUEUE) { + fq_ring_free(cookie, cookie->single_fq); + } else { + for_each_possible_cpu(cpu) + fq_ring_free(cookie, per_cpu_ptr(cookie->percpu_fq, cpu)); } } @@ -188,7 +208,11 @@ static void queue_iova(struct iommu_dma_cookie *cookie, */ smp_mb(); - fq = raw_cpu_ptr(cookie->fq); + if (cookie->options.qt == IOMMU_DMA_OPTS_SINGLE_QUEUE) + fq = cookie->single_fq; + else + fq = raw_cpu_ptr(cookie->percpu_fq); + spin_lock_irqsave(&fq->lock, flags); /* @@ -196,11 +220,11 @@ static void queue_iova(struct iommu_dma_cookie *cookie, * flushed out on another CPU. This makes the fq_full() check below less * likely to be true. */ - fq_ring_free(cookie, fq); + fq_ring_free_locked(cookie, fq); if (fq_full(fq)) { fq_flush_iotlb(cookie); - fq_ring_free(cookie, fq); + fq_ring_free_locked(cookie, fq); } idx = fq_ring_add(fq); @@ -219,31 +243,88 @@ static void queue_iova(struct iommu_dma_cookie *cookie, jiffies + msecs_to_jiffies(IOVA_FQ_TIMEOUT)); } -static void iommu_dma_free_fq(struct iommu_dma_cookie *cookie) +static void iommu_dma_free_fq_single(struct iova_fq *fq) { - int cpu, idx; + int idx; - if (!cookie->fq) - return; + fq_ring_for_each(idx, fq) + put_pages_list(&fq->entries[idx].freelist); + vfree(fq); +} + +static void iommu_dma_free_fq_percpu(struct iova_fq __percpu *percpu_fq) +{ + int cpu, idx; - del_timer_sync(&cookie->fq_timer); /* The IOVAs will be torn down separately, so just free our queued pages */ for_each_possible_cpu(cpu) { - struct iova_fq *fq = per_cpu_ptr(cookie->fq, cpu); + struct iova_fq *fq = per_cpu_ptr(percpu_fq, cpu); fq_ring_for_each(idx, fq) put_pages_list(&fq->entries[idx].freelist); } - free_percpu(cookie->fq); + free_percpu(percpu_fq); +} + +static void iommu_dma_free_fq(struct iommu_dma_cookie *cookie) +{ + if (!cookie->fq_domain) + return; + + del_timer_sync(&cookie->fq_timer); + if (cookie->options.qt == IOMMU_DMA_OPTS_SINGLE_QUEUE) + iommu_dma_free_fq_single(cookie->single_fq); + else + iommu_dma_free_fq_percpu(cookie->percpu_fq); +} + +static void iommu_dma_init_one_fq(struct iova_fq *fq) +{ + int i; + + fq->head = 0; + fq->tail = 0; + + spin_lock_init(&fq->lock); + + for (i = 0; i < IOVA_FQ_SIZE; i++) + INIT_LIST_HEAD(&fq->entries[i].freelist); +} + +static int iommu_dma_init_fq_single(struct iommu_dma_cookie *cookie) +{ + struct iova_fq *queue; + + queue = vmalloc(sizeof(*queue)); + if (!queue) + return -ENOMEM; + iommu_dma_init_one_fq(queue); + cookie->single_fq = queue; + + return 0; +} + +static int iommu_dma_init_fq_percpu(struct iommu_dma_cookie *cookie) +{ + struct iova_fq __percpu *queue; + int cpu; + + queue = alloc_percpu(struct iova_fq); + if (!queue) + return -ENOMEM; + + for_each_possible_cpu(cpu) + iommu_dma_init_one_fq(per_cpu_ptr(queue, cpu)); + cookie->percpu_fq = queue; + return 0; } /* sysfs updates are serialised by the mutex of the group owning @domain */ int iommu_dma_init_fq(struct iommu_domain *domain) { struct iommu_dma_cookie *cookie = domain->iova_cookie; - struct iova_fq __percpu *queue; - int i, cpu; + int rc; if (cookie->fq_domain) return 0; @@ -251,26 +332,16 @@ int iommu_dma_init_fq(struct iommu_domain *domain) atomic64_set(&cookie->fq_flush_start_cnt, 0); atomic64_set(&cookie->fq_flush_finish_cnt, 0); - queue = alloc_percpu(struct iova_fq); - if (!queue) { + if (cookie->options.qt == IOMMU_DMA_OPTS_SINGLE_QUEUE) + rc = iommu_dma_init_fq_single(cookie); + else + rc = iommu_dma_init_fq_percpu(cookie); + + if (rc) { pr_warn("iova flush queue initialization failed\n"); return -ENOMEM; } - for_each_possible_cpu(cpu) { - struct iova_fq *fq = per_cpu_ptr(queue, cpu); - - fq->head = 0; - fq->tail = 0; - - spin_lock_init(&fq->lock); - - for (i = 0; i < IOVA_FQ_SIZE; i++) - INIT_LIST_HEAD(&fq->entries[i].freelist); - } - - cookie->fq = queue; - timer_setup(&cookie->fq_timer, fq_flush_timeout, 0); atomic_set(&cookie->fq_timer_on, 0); /* @@ -554,6 +625,23 @@ static bool dev_use_sg_swiotlb(struct device *dev, struct scatterlist *sg, return false; } +/** + * iommu_dma_init_options - Initialize dma-iommu options + * @options: The options to be initialized + * @dev: Device the options are set for + * + * This allows tuning dma-iommu specific to device properties + */ +static void iommu_dma_init_options(struct iommu_dma_options *options, + struct device *dev) +{ + /* Shadowing IOTLB flushes do better with a single queue */ + if (dev->iommu->shadow_on_flush) + options->qt = IOMMU_DMA_OPTS_SINGLE_QUEUE; + else + options->qt = IOMMU_DMA_OPTS_PER_CPU_QUEUE; +} + /** * iommu_dma_init_domain - Initialise a DMA mapping domain * @domain: IOMMU domain previously prepared by iommu_get_dma_cookie() @@ -614,6 +702,8 @@ static int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base, if (ret) goto done_unlock; + iommu_dma_init_options(&cookie->options, dev); + /* If the FQ fails we can simply fall back to strict mode */ if (domain->type == IOMMU_DOMAIN_DMA_FQ && (!device_iommu_capable(dev, IOMMU_CAP_DEFERRED_FLUSH) || iommu_dma_init_fq(domain))) diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c index 5b5ba1cd5f00..9a5196f523de 100644 --- a/drivers/iommu/s390-iommu.c +++ b/drivers/iommu/s390-iommu.c @@ -463,6 +463,9 @@ static struct iommu_device *s390_iommu_probe_device(struct device *dev) if (zdev->end_dma > ZPCI_TABLE_SIZE_RT - 1) zdev->end_dma = ZPCI_TABLE_SIZE_RT - 1; + if (zdev->tlb_refresh) + dev->iommu->shadow_on_flush = 1; + return &zdev->iommu_dev; } diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 1eb638752781..0c4d8ae985ac 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -424,6 +424,7 @@ struct iommu_fault_param { * @attach_deferred: the dma domain attachment is deferred * @pci_32bit_workaround: Limit DMA allocations to 32-bit IOVAs * @require_direct: device requires IOMMU_RESV_DIRECT regions + * @shadow_on_flush: IOTLB flushes are used to sync shadow tables * * TODO: migrate other per device data pointers under iommu_dev_data, e.g. * struct iommu_group *iommu_group; @@ -439,6 +440,7 @@ struct dev_iommu { u32 attach_deferred:1; u32 pci_32bit_workaround:1; u32 require_direct:1; + u32 shadow_on_flush:1; }; int iommu_device_register(struct iommu_device *iommu, -- cgit v1.2.3 From 0d293714ac32650bfb669ceadf7cc2fad8161401 Mon Sep 17 00:00:00 2001 From: Patrisious Haddad Date: Thu, 21 Sep 2023 15:10:27 +0300 Subject: RDMA/mlx5: Send events from IB driver about device affiliation state Send blocking events from IB driver whenever the device is done being affiliated or if it is removed from an affiliation. This is useful since now the EN driver can register to those event and know when a device is affiliated or not. Signed-off-by: Patrisious Haddad Reviewed-by: Mark Bloch Link: https://lore.kernel.org/r/a7491c3e483cfd8d962f5f75b9a25f253043384a.1695296682.git.leon@kernel.org Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx5/main.c | 17 +++++++++++++++++ drivers/net/ethernet/mellanox/mlx5/core/main.c | 6 ++++++ include/linux/mlx5/device.h | 2 ++ include/linux/mlx5/driver.h | 2 ++ 4 files changed, 27 insertions(+) (limited to 'include/linux') diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index aed5cdea50e6..530d88784e41 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -3175,6 +3176,13 @@ static void mlx5_ib_unbind_slave_port(struct mlx5_ib_dev *ibdev, lockdep_assert_held(&mlx5_ib_multiport_mutex); + mlx5_core_mp_event_replay(ibdev->mdev, + MLX5_DRIVER_EVENT_AFFILIATION_REMOVED, + NULL); + mlx5_core_mp_event_replay(mpi->mdev, + MLX5_DRIVER_EVENT_AFFILIATION_REMOVED, + NULL); + mlx5_ib_cleanup_cong_debugfs(ibdev, port_num); spin_lock(&port->mp.mpi_lock); @@ -3226,6 +3234,7 @@ static bool mlx5_ib_bind_slave_port(struct mlx5_ib_dev *ibdev, struct mlx5_ib_multiport_info *mpi) { u32 port_num = mlx5_core_native_port_num(mpi->mdev) - 1; + u64 key; int err; lockdep_assert_held(&mlx5_ib_multiport_mutex); @@ -3254,6 +3263,14 @@ static bool mlx5_ib_bind_slave_port(struct mlx5_ib_dev *ibdev, mlx5_ib_init_cong_debugfs(ibdev, port_num); + key = ibdev->ib_dev.index; + mlx5_core_mp_event_replay(mpi->mdev, + MLX5_DRIVER_EVENT_AFFILIATION_DONE, + &key); + mlx5_core_mp_event_replay(ibdev->mdev, + MLX5_DRIVER_EVENT_AFFILIATION_DONE, + &key); + return true; unbind: diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 15561965d2af..6ca91c0e8a6a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -361,6 +361,12 @@ void mlx5_core_uplink_netdev_event_replay(struct mlx5_core_dev *dev) } EXPORT_SYMBOL(mlx5_core_uplink_netdev_event_replay); +void mlx5_core_mp_event_replay(struct mlx5_core_dev *dev, u32 event, void *data) +{ + mlx5_blocking_notifier_call_chain(dev, event, data); +} +EXPORT_SYMBOL(mlx5_core_mp_event_replay); + int mlx5_core_get_caps_mode(struct mlx5_core_dev *dev, enum mlx5_cap_type cap_type, enum mlx5_cap_mode cap_mode) { diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 4d5be378fa8c..26333d602a50 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -366,6 +366,8 @@ enum mlx5_driver_event { MLX5_DRIVER_EVENT_UPLINK_NETDEV, MLX5_DRIVER_EVENT_MACSEC_SA_ADDED, MLX5_DRIVER_EVENT_MACSEC_SA_DELETED, + MLX5_DRIVER_EVENT_AFFILIATION_DONE, + MLX5_DRIVER_EVENT_AFFILIATION_REMOVED, }; enum { diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 3033bbaeac81..5ca4e085d813 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -1027,6 +1027,8 @@ bool mlx5_cmd_is_down(struct mlx5_core_dev *dev); void mlx5_core_uplink_netdev_set(struct mlx5_core_dev *mdev, struct net_device *netdev); void mlx5_core_uplink_netdev_event_replay(struct mlx5_core_dev *mdev); +void mlx5_core_mp_event_replay(struct mlx5_core_dev *dev, u32 event, void *data); + void mlx5_health_cleanup(struct mlx5_core_dev *dev); int mlx5_health_init(struct mlx5_core_dev *dev); void mlx5_start_health_poll(struct mlx5_core_dev *dev); -- cgit v1.2.3 From ef36ffcb381096ed32c309c342a02bf62939c503 Mon Sep 17 00:00:00 2001 From: Patrisious Haddad Date: Thu, 21 Sep 2023 15:10:30 +0300 Subject: net/mlx5: Add alias flow table bits Add all the capabilities needed to check for alias object support. As well as all the fields or commands needed for its creation and the creation of flow table that is able to jump to an alias object. Signed-off-by: Patrisious Haddad Reviewed-by: Mark Bloch Link: https://lore.kernel.org/r/544c030f2a78c4adf3fe6b64f97a39cc1bbdabb9.1695296682.git.leon@kernel.org Signed-off-by: Leon Romanovsky --- include/linux/mlx5/mlx5_ifc.h | 56 ++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 55 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 3265bfcb3156..23f9780adb83 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -312,6 +312,7 @@ enum { MLX5_CMD_OP_QUERY_VHCA_STATE = 0xb0d, MLX5_CMD_OP_MODIFY_VHCA_STATE = 0xb0e, MLX5_CMD_OP_SYNC_CRYPTO = 0xb12, + MLX5_CMD_OP_ALLOW_OTHER_VHCA_ACCESS = 0xb16, MLX5_CMD_OP_MAX }; @@ -1934,6 +1935,14 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 match_definer_format_supported[0x40]; }; +enum { + MLX5_CROSS_VHCA_OBJ_TO_OBJ_SUPPORTED_LOCAL_FLOW_TABLE_TO_REMOTE_FLOW_TABLE_MISS = 0x80000, +}; + +enum { + MLX5_ALLOWED_OBJ_FOR_OTHER_VHCA_ACCESS_FLOW_TABLE = 0x200, +}; + struct mlx5_ifc_cmd_hca_cap_2_bits { u8 reserved_at_0[0x80]; @@ -1952,7 +1961,11 @@ struct mlx5_ifc_cmd_hca_cap_2_bits { u8 migration_in_chunks[0x1]; u8 reserved_at_d1[0xf]; - u8 reserved_at_e0[0xc0]; + u8 cross_vhca_object_to_object_supported[0x20]; + + u8 allowed_object_for_other_vhca_access[0x40]; + + u8 reserved_at_140[0x60]; u8 flow_table_type_2_type[0x8]; u8 reserved_at_1a8[0x3]; @@ -6371,6 +6384,28 @@ struct mlx5_ifc_general_obj_out_cmd_hdr_bits { u8 reserved_at_60[0x20]; }; +struct mlx5_ifc_allow_other_vhca_access_in_bits { + u8 opcode[0x10]; + u8 uid[0x10]; + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + u8 reserved_at_40[0x50]; + u8 object_type_to_be_accessed[0x10]; + u8 object_id_to_be_accessed[0x20]; + u8 reserved_at_c0[0x40]; + union { + u8 access_key_raw[0x100]; + u8 access_key[8][0x20]; + }; +}; + +struct mlx5_ifc_allow_other_vhca_access_out_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + u8 syndrome[0x20]; + u8 reserved_at_40[0x40]; +}; + struct mlx5_ifc_modify_header_arg_bits { u8 reserved_at_0[0x80]; @@ -6393,6 +6428,24 @@ struct mlx5_ifc_create_match_definer_out_bits { struct mlx5_ifc_general_obj_out_cmd_hdr_bits general_obj_out_cmd_hdr; }; +struct mlx5_ifc_alias_context_bits { + u8 vhca_id_to_be_accessed[0x10]; + u8 reserved_at_10[0xd]; + u8 status[0x3]; + u8 object_id_to_be_accessed[0x20]; + u8 reserved_at_40[0x40]; + union { + u8 access_key_raw[0x100]; + u8 access_key[8][0x20]; + }; + u8 metadata[0x80]; +}; + +struct mlx5_ifc_create_alias_obj_in_bits { + struct mlx5_ifc_general_obj_in_cmd_hdr_bits hdr; + struct mlx5_ifc_alias_context_bits alias_ctx; +}; + enum { MLX5_QUERY_FLOW_GROUP_OUT_MATCH_CRITERIA_ENABLE_OUTER_HEADERS = 0x0, MLX5_QUERY_FLOW_GROUP_OUT_MATCH_CRITERIA_ENABLE_MISC_PARAMETERS = 0x1, @@ -11919,6 +11972,7 @@ enum { MLX5_GENERAL_OBJECT_TYPES_FLOW_METER_ASO = 0x24, MLX5_GENERAL_OBJECT_TYPES_MACSEC = 0x27, MLX5_GENERAL_OBJECT_TYPES_INT_KEK = 0x47, + MLX5_GENERAL_OBJECT_TYPES_FLOW_TABLE_ALIAS = 0xff15, }; enum { -- cgit v1.2.3 From d424348b060d87f92cc59d8e6ea9c612c5b708f5 Mon Sep 17 00:00:00 2001 From: Dragos Tatulea Date: Thu, 28 Sep 2023 19:45:12 +0300 Subject: vdpa/mlx5: Expose descriptor group mkey hw capability Necessary for improved live migration flow. Actual support will be added in a downstream patch. Reviewed-by: Gal Pressman Signed-off-by: Dragos Tatulea Link: https://lore.kernel.org/r/20230928164550.980832-3-dtatulea@nvidia.com Signed-off-by: Leon Romanovsky --- include/linux/mlx5/mlx5_ifc.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index fc3db401f8a2..3388007c645f 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1231,7 +1231,13 @@ struct mlx5_ifc_virtio_emulation_cap_bits { u8 max_emulated_devices[0x8]; u8 max_num_virtio_queues[0x18]; - u8 reserved_at_a0[0x60]; + u8 reserved_at_a0[0x20]; + + u8 reserved_at_c0[0x13]; + u8 desc_group_mkey_supported[0x1]; + u8 reserved_at_d4[0xc]; + + u8 reserved_at_e0[0x20]; u8 umem_1_buffer_param_a[0x20]; -- cgit v1.2.3 From a60359ea32251399c00d934981f5a6fa25ec917f Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Fri, 22 Sep 2023 08:45:51 +0200 Subject: usb: renesas_usbhs: remove boilerplate from header file There is a SPDX entry, so we can remove the boilerplate. Signed-off-by: Wolfram Sang Reviewed-by: Yoshihiro Shimoda Reviewed-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20230922064551.4663-1-wsa+renesas@sang-engineering.com Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/renesas_usbhs.h | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/usb/renesas_usbhs.h b/include/linux/usb/renesas_usbhs.h index d418c55523a7..372898d9eeb0 100644 --- a/include/linux/usb/renesas_usbhs.h +++ b/include/linux/usb/renesas_usbhs.h @@ -5,16 +5,6 @@ * Copyright (C) 2011 Renesas Solutions Corp. * Copyright (C) 2019 Renesas Electronics Corporation * Kuninori Morimoto - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - * */ #ifndef RENESAS_USB_H #define RENESAS_USB_H -- cgit v1.2.3 From a17fae8fc38e91026f116a85c5068668fbf9848a Mon Sep 17 00:00:00 2001 From: Utkarsh Patel Date: Tue, 19 Sep 2023 19:32:39 -0700 Subject: usb: typec: Add Displayport Alternate Mode 2.1 Support Displayport Alternate mode 2.1 requires configuration for additional cable details such as signalling for cable, UHBR13.5 Support, Cable type and DPAM version. These details can be used with mux drivers to configure SOP DP configuration for Displayport Alternate mode 2.1. This change also includes pertinent cable signalling support in displayport alternate mode. Reviewed-by: Andy Shevchenko Reviewed-by: Heikki Krogerus Signed-off-by: Utkarsh Patel Link: https://lore.kernel.org/r/20230920023243.2494410-2-utkarsh.h.patel@intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/altmodes/displayport.c | 5 ++++- drivers/usb/typec/ucsi/displayport.c | 2 +- drivers/usb/typec/ucsi/ucsi_ccg.c | 4 ++-- include/linux/usb/typec_dp.h | 28 ++++++++++++++++++++++++---- 4 files changed, 31 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/drivers/usb/typec/altmodes/displayport.c b/drivers/usb/typec/altmodes/displayport.c index 426c88a516e5..f503cb4cd721 100644 --- a/drivers/usb/typec/altmodes/displayport.c +++ b/drivers/usb/typec/altmodes/displayport.c @@ -86,8 +86,11 @@ static int dp_altmode_notify(struct dp_altmode *dp) static int dp_altmode_configure(struct dp_altmode *dp, u8 con) { - u32 conf = DP_CONF_SIGNALING_DP; /* Only DP signaling supported */ u8 pin_assign = 0; + u32 conf; + + /* DP Signalling */ + conf = (dp->data.conf & DP_CONF_SIGNALLING_MASK) >> DP_CONF_SIGNALLING_SHIFT; switch (con) { case DP_STATUS_CON_DISABLED: diff --git a/drivers/usb/typec/ucsi/displayport.c b/drivers/usb/typec/ucsi/displayport.c index 73cd5bf35047..d9d3c91125ca 100644 --- a/drivers/usb/typec/ucsi/displayport.c +++ b/drivers/usb/typec/ucsi/displayport.c @@ -315,7 +315,7 @@ struct typec_altmode *ucsi_register_displayport(struct ucsi_connector *con, struct ucsi_dp *dp; /* We can't rely on the firmware with the capabilities. */ - desc->vdo |= DP_CAP_DP_SIGNALING | DP_CAP_RECEPTACLE; + desc->vdo |= DP_CAP_DP_SIGNALLING(0) | DP_CAP_RECEPTACLE; /* Claiming that we support all pin assignments */ desc->vdo |= all_assignments << 8; diff --git a/drivers/usb/typec/ucsi/ucsi_ccg.c b/drivers/usb/typec/ucsi/ucsi_ccg.c index 607061a37eca..449c125f6f87 100644 --- a/drivers/usb/typec/ucsi/ucsi_ccg.c +++ b/drivers/usb/typec/ucsi/ucsi_ccg.c @@ -501,8 +501,8 @@ static void ucsi_ccg_nvidia_altmode(struct ucsi_ccg *uc, case NVIDIA_FTB_DP_OFFSET: if (alt[0].mid == USB_TYPEC_NVIDIA_VLINK_DBG_VDO) alt[0].mid = USB_TYPEC_NVIDIA_VLINK_DP_VDO | - DP_CAP_DP_SIGNALING | DP_CAP_USB | - DP_CONF_SET_PIN_ASSIGN(BIT(DP_PIN_ASSIGN_E)); + DP_CAP_DP_SIGNALLING(0) | DP_CAP_USB | + DP_CONF_SET_PIN_ASSIGN(BIT(DP_PIN_ASSIGN_E)); break; case NVIDIA_FTB_DBG_OFFSET: if (alt[0].mid == USB_TYPEC_NVIDIA_VLINK_DP_VDO) diff --git a/include/linux/usb/typec_dp.h b/include/linux/usb/typec_dp.h index 8d09c2f0a9b8..1f358098522d 100644 --- a/include/linux/usb/typec_dp.h +++ b/include/linux/usb/typec_dp.h @@ -67,8 +67,10 @@ enum { #define DP_CAP_UFP_D 1 #define DP_CAP_DFP_D 2 #define DP_CAP_DFP_D_AND_UFP_D 3 -#define DP_CAP_DP_SIGNALING BIT(2) /* Always set */ -#define DP_CAP_GEN2 BIT(3) /* Reserved after v1.0b */ +#define DP_CAP_DP_SIGNALLING(_cap_) (((_cap_) & GENMASK(5, 2)) >> 2) +#define DP_CAP_SIGNALLING_HBR3 1 +#define DP_CAP_SIGNALLING_UHBR10 2 +#define DP_CAP_SIGNALLING_UHBR20 3 #define DP_CAP_RECEPTACLE BIT(6) #define DP_CAP_USB BIT(7) #define DP_CAP_DFP_D_PIN_ASSIGN(_cap_) (((_cap_) & GENMASK(15, 8)) >> 8) @@ -78,6 +80,13 @@ enum { DP_CAP_UFP_D_PIN_ASSIGN(_cap_) : DP_CAP_DFP_D_PIN_ASSIGN(_cap_)) #define DP_CAP_PIN_ASSIGN_DFP_D(_cap_) ((_cap_ & DP_CAP_RECEPTACLE) ? \ DP_CAP_DFP_D_PIN_ASSIGN(_cap_) : DP_CAP_UFP_D_PIN_ASSIGN(_cap_)) +#define DP_CAP_UHBR_13_5_SUPPORT BIT(26) +#define DP_CAP_CABLE_TYPE(_cap_) (((_cap_) & GENMASK(29, 28)) >> 28) +#define DP_CAP_CABLE_TYPE_PASSIVE 0 +#define DP_CAP_CABLE_TYPE_RE_TIMER 1 +#define DP_CAP_CABLE_TYPE_RE_DRIVER 2 +#define DP_CAP_CABLE_TYPE_OPTICAL 3 +#define DP_CAP_DPAM_VERSION BIT(30) /* DisplayPort Status Update VDO bits */ #define DP_STATUS_CONNECTION(_status_) ((_status_) & 3) @@ -97,13 +106,24 @@ enum { #define DP_CONF_CURRENTLY(_conf_) ((_conf_) & 3) #define DP_CONF_UFP_U_AS_DFP_D BIT(0) #define DP_CONF_UFP_U_AS_UFP_D BIT(1) -#define DP_CONF_SIGNALING_DP BIT(2) -#define DP_CONF_SIGNALING_GEN_2 BIT(3) /* Reserved after v1.0b */ +#define DP_CONF_SIGNALLING_MASK GENMASK(5, 2) +#define DP_CONF_SIGNALLING_SHIFT 2 +#define DP_CONF_SIGNALLING_HBR3 1 +#define DP_CONF_SIGNALLING_UHBR10 2 +#define DP_CONF_SIGNALLING_UHBR20 3 #define DP_CONF_PIN_ASSIGNEMENT_SHIFT 8 #define DP_CONF_PIN_ASSIGNEMENT_MASK GENMASK(15, 8) /* Helper for setting/getting the pin assignment value to the configuration */ #define DP_CONF_SET_PIN_ASSIGN(_a_) ((_a_) << 8) #define DP_CONF_GET_PIN_ASSIGN(_conf_) (((_conf_) & GENMASK(15, 8)) >> 8) +#define DP_CONF_UHBR13_5_SUPPORT BIT(26) +#define DP_CONF_CABLE_TYPE_MASK GENMASK(29, 28) +#define DP_CONF_CABLE_TYPE_SHIFT 28 +#define DP_CONF_CABLE_TYPE_PASSIVE 0 +#define DP_CONF_CABLE_TYPE_RE_TIMER 1 +#define DP_CONF_CABLE_TYPE_RE_DRIVER 2 +#define DP_CONF_CABLE_TYPE_OPTICAL 3 +#define DP_CONF_DPAM_VERSION BIT(30) #endif /* __USB_TYPEC_DP_H */ -- cgit v1.2.3 From c365b1e1f40499472433cc8fca3d0ea280ead52a Mon Sep 17 00:00:00 2001 From: Utkarsh Patel Date: Tue, 19 Sep 2023 19:32:40 -0700 Subject: usb: typec: Add Active or Passive cable defination to cable discover mode VDO As per USB Type-C Connector specification v2.2 section F.2.6, BIT25 represents Active or Passive cable. Added BIT25 defination to the Thunderbolt 3 cable discover mode VDO. Reviewed-by: Heikki Krogerus Signed-off-by: Utkarsh Patel Link: https://lore.kernel.org/r/20230920023243.2494410-3-utkarsh.h.patel@intel.com Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/typec_tbt.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/usb/typec_tbt.h b/include/linux/usb/typec_tbt.h index 63dd44b72e0c..c7a2153bd6f5 100644 --- a/include/linux/usb/typec_tbt.h +++ b/include/linux/usb/typec_tbt.h @@ -46,6 +46,7 @@ struct typec_thunderbolt_data { #define TBT_CABLE_OPTICAL BIT(21) #define TBT_CABLE_RETIMER BIT(22) #define TBT_CABLE_LINK_TRAINING BIT(23) +#define TBT_CABLE_ACTIVE_PASSIVE BIT(25) #define TBT_SET_CABLE_SPEED(_s_) (((_s_) & GENMASK(2, 0)) << 16) #define TBT_SET_CABLE_ROUNDED(_g_) (((_g_) & GENMASK(1, 0)) << 19) -- cgit v1.2.3 From f9ee6043283a78c84adf6ef3cef7a085eee4130f Mon Sep 17 00:00:00 2001 From: Utkarsh Patel Date: Tue, 19 Sep 2023 19:32:41 -0700 Subject: usb: pd: Add helper macro to get Type C cable speed Added a helper macro to get the Type C cable speed when provided the cable VDO. Reviewed-by: Heikki Krogerus Signed-off-by: Utkarsh Patel Link: https://lore.kernel.org/r/20230920023243.2494410-4-utkarsh.h.patel@intel.com Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/pd_vdo.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/usb/pd_vdo.h b/include/linux/usb/pd_vdo.h index b057250704e8..3a747938cdab 100644 --- a/include/linux/usb/pd_vdo.h +++ b/include/linux/usb/pd_vdo.h @@ -376,6 +376,7 @@ | ((vbm) & 0x3) << 9 | (sbu) << 8 | (sbut) << 7 | ((cur) & 0x3) << 5 \ | (vbt) << 4 | (sopp) << 3 | ((spd) & 0x7)) +#define VDO_TYPEC_CABLE_SPEED(vdo) ((vdo) & 0x7) #define VDO_TYPEC_CABLE_TYPE(vdo) (((vdo) >> 18) & 0x3) /* -- cgit v1.2.3 From 1b8a62937e0b23c41956feec778ca7776a01df48 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Tue, 26 Sep 2023 15:25:31 +0200 Subject: ASoC: ti: Convert TWL4030 to use GPIO descriptors The TWL4030 is actually only ever populated from the device tree, so we can just pass the right device and headphone jack GPIO name to snd_soc_jack_add_gpios() and it will pick the right GPIO right from the device tree. The platform data patch is unused (no in-tree users of the pdata method) but these can use GPIO descriptor tables rather than global GPIO numbers if they need this. Signed-off-by: Linus Walleij Acked-by: Jarkko Nikula Link: https://lore.kernel.org/r/20230926-descriptors-asoc-ti-v1-3-60cf4f8adbc5@linaro.org Signed-off-by: Mark Brown --- include/linux/platform_data/omap-twl4030.h | 3 --- sound/soc/ti/omap-twl4030.c | 20 ++++++++------------ 2 files changed, 8 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/platform_data/omap-twl4030.h b/include/linux/platform_data/omap-twl4030.h index 0dd851ea1c72..7fcb55fe21c9 100644 --- a/include/linux/platform_data/omap-twl4030.h +++ b/include/linux/platform_data/omap-twl4030.h @@ -37,9 +37,6 @@ struct omap_tw4030_pdata { bool has_digimic0; bool has_digimic1; u8 has_linein; - - /* Jack detect GPIO or <= 0 if it is not implemented */ - int jack_detect; }; #endif /* _OMAP_TWL4030_H_ */ diff --git a/sound/soc/ti/omap-twl4030.c b/sound/soc/ti/omap-twl4030.c index 950eec44503b..c7055bb424e6 100644 --- a/sound/soc/ti/omap-twl4030.c +++ b/sound/soc/ti/omap-twl4030.c @@ -20,8 +20,6 @@ #include #include #include -#include -#include #include #include @@ -31,7 +29,6 @@ #include "omap-mcbsp.h" struct omap_twl4030 { - int jack_detect; /* board can detect jack events */ struct snd_soc_jack hs_jack; }; @@ -130,7 +127,7 @@ static struct snd_soc_jack_pin hs_jack_pins[] = { /* Headset jack detection gpios */ static struct snd_soc_jack_gpio hs_jack_gpios[] = { { - .name = "hsdet-gpio", + .name = "ti,jack-det", .report = SND_JACK_HEADSET, .debounce_time = 200, }, @@ -151,9 +148,13 @@ static int omap_twl4030_init(struct snd_soc_pcm_runtime *rtd) struct omap_twl4030 *priv = snd_soc_card_get_drvdata(card); int ret = 0; - /* Headset jack detection only if it is supported */ - if (priv->jack_detect > 0) { - hs_jack_gpios[0].gpio = priv->jack_detect; + /* + * This is a bit of a hack, but the GPIO is optional so we + * only want to add the jack detection if the GPIO is there. + */ + if (of_property_present(card->dev->of_node, "ti,jack-det-gpio")) { + hs_jack_gpios[0].gpiod_dev = card->dev; + hs_jack_gpios[0].idx = 0; ret = snd_soc_card_jack_new_pins(rtd->card, "Headset Jack", SND_JACK_HEADSET, @@ -279,9 +280,6 @@ static int omap_twl4030_probe(struct platform_device *pdev) omap_twl4030_dai_links[1].platforms->of_node = dai_node; } - priv->jack_detect = of_get_named_gpio(node, - "ti,jack-det-gpio", 0); - /* Optional: audio routing can be provided */ prop = of_find_property(node, "ti,audio-routing", NULL); if (prop) { @@ -302,8 +300,6 @@ static int omap_twl4030_probe(struct platform_device *pdev) if (!pdata->voice_connected) card->num_links = 1; - - priv->jack_detect = pdata->jack_detect; } else { dev_err(&pdev->dev, "Missing pdata\n"); return -ENODEV; -- cgit v1.2.3 From 52e24f8c0a102ac76649c6b71224fadcc82bd5da Mon Sep 17 00:00:00 2001 From: Niklas Schnelle Date: Mon, 11 Sep 2023 14:56:52 +0200 Subject: usb: pci-quirks: handle HAS_IOPORT dependency for AMD quirk In a future patch HAS_IOPORT=n will result in inb()/outb() and friends not being declared. In the pci-quirks case the I/O port acceses are used in the quirks for several AMD south bridges, Add a config option for the AMD quirks to depend on HAS_IOPORT and #ifdef the quirk code. Co-developed-by: Arnd Bergmann Signed-off-by: Arnd Bergmann Signed-off-by: Niklas Schnelle Link: https://lore.kernel.org/r/20230911125653.1393895-3-schnelle@linux.ibm.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/Kconfig | 10 ++++++++++ drivers/usb/core/hcd-pci.c | 3 +-- drivers/usb/host/pci-quirks.c | 2 ++ drivers/usb/host/pci-quirks.h | 30 ++++++++++++++++++++++-------- include/linux/usb/hcd.h | 17 +++++++++++++++++ 5 files changed, 52 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/drivers/usb/Kconfig b/drivers/usb/Kconfig index 7f33bcc315f2..abf8c6cdea9e 100644 --- a/drivers/usb/Kconfig +++ b/drivers/usb/Kconfig @@ -91,6 +91,16 @@ config USB_PCI If you have such a device you may say N here and PCI related code will not be built in the USB driver. +config USB_PCI_AMD + bool "AMD PCI USB host support" + depends on USB_PCI && HAS_IOPORT + default X86 || MACH_LOONGSON64 || PPC_PASEMI + help + Enable workarounds for USB implementation quirks in SB600/SB700/SB800 + and later south bridge implementations. These are common on x86 PCs + with AMD CPUs but rarely used elsewhere, with the exception of a few + powerpc and mips desktop machines. + if USB source "drivers/usb/core/Kconfig" diff --git a/drivers/usb/core/hcd-pci.c b/drivers/usb/core/hcd-pci.c index 990280688b25..ee3156f49533 100644 --- a/drivers/usb/core/hcd-pci.c +++ b/drivers/usb/core/hcd-pci.c @@ -206,8 +206,7 @@ int usb_hcd_pci_probe(struct pci_dev *dev, const struct hc_driver *driver) goto free_irq_vectors; } - hcd->amd_resume_bug = (usb_hcd_amd_remote_wakeup_quirk(dev) && - driver->flags & (HCD_USB11 | HCD_USB3)) ? 1 : 0; + hcd->amd_resume_bug = usb_hcd_amd_resume_bug(dev, driver); if (driver->flags & HCD_MEMORY) { /* EHCI, OHCI */ diff --git a/drivers/usb/host/pci-quirks.c b/drivers/usb/host/pci-quirks.c index 5e06fad82a22..10813096d00c 100644 --- a/drivers/usb/host/pci-quirks.c +++ b/drivers/usb/host/pci-quirks.c @@ -76,6 +76,7 @@ #define USB_INTEL_USB3_PSSEN 0xD8 #define USB_INTEL_USB3PRM 0xDC +#ifdef CONFIG_USB_PCI_AMD /* AMD quirk use */ #define AB_REG_BAR_LOW 0xe0 #define AB_REG_BAR_HIGH 0xe1 @@ -587,6 +588,7 @@ bool usb_amd_pt_check_port(struct device *device, int port) return !(value & BIT(port_shift)); } EXPORT_SYMBOL_GPL(usb_amd_pt_check_port); +#endif /* CONFIG_USB_PCI_AMD */ static int usb_asmedia_wait_write(struct pci_dev *pdev) { diff --git a/drivers/usb/host/pci-quirks.h b/drivers/usb/host/pci-quirks.h index cde2263a9d2e..a5230b0b9e91 100644 --- a/drivers/usb/host/pci-quirks.h +++ b/drivers/usb/host/pci-quirks.h @@ -2,7 +2,7 @@ #ifndef __LINUX_USB_PCI_QUIRKS_H #define __LINUX_USB_PCI_QUIRKS_H -#ifdef CONFIG_USB_PCI +#ifdef CONFIG_USB_PCI_AMD int usb_hcd_amd_remote_wakeup_quirk(struct pci_dev *pdev); bool usb_amd_hang_symptom_quirk(void); bool usb_amd_prefetch_quirk(void); @@ -12,23 +12,37 @@ void usb_amd_quirk_pll_disable(void); void usb_amd_quirk_pll_enable(void); void sb800_prefetch(struct device *dev, int on); bool usb_amd_pt_check_port(struct device *device, int port); - -void uhci_reset_hc(struct pci_dev *pdev, unsigned long base); -int uhci_check_and_reset_hc(struct pci_dev *pdev, unsigned long base); -void usb_asmedia_modifyflowcontrol(struct pci_dev *pdev); -void usb_enable_intel_xhci_ports(struct pci_dev *xhci_pdev); -void usb_disable_xhci_ports(struct pci_dev *xhci_pdev); #else -struct pci_dev; +static inline bool usb_amd_hang_symptom_quirk(void) +{ + return false; +}; +static inline bool usb_amd_prefetch_quirk(void) +{ + return false; +} static inline void usb_amd_quirk_pll_disable(void) {} static inline void usb_amd_quirk_pll_enable(void) {} static inline void usb_amd_dev_put(void) {} +static inline bool usb_amd_quirk_pll_check(void) +{ + return false; +} static inline void sb800_prefetch(struct device *dev, int on) {} static inline bool usb_amd_pt_check_port(struct device *device, int port) { return false; } +#endif /* CONFIG_USB_PCI_AMD */ +#ifdef CONFIG_USB_PCI +void uhci_reset_hc(struct pci_dev *pdev, unsigned long base); +int uhci_check_and_reset_hc(struct pci_dev *pdev, unsigned long base); +void usb_asmedia_modifyflowcontrol(struct pci_dev *pdev); +void usb_enable_intel_xhci_ports(struct pci_dev *xhci_pdev); +void usb_disable_xhci_ports(struct pci_dev *xhci_pdev); +#else +struct pci_dev; static inline void usb_asmedia_modifyflowcontrol(struct pci_dev *pdev) {} static inline void usb_disable_xhci_ports(struct pci_dev *xhci_pdev) {} #endif /* CONFIG_USB_PCI */ diff --git a/include/linux/usb/hcd.h b/include/linux/usb/hcd.h index 61d4f0b793dc..00724b4f6e12 100644 --- a/include/linux/usb/hcd.h +++ b/include/linux/usb/hcd.h @@ -484,8 +484,25 @@ extern int usb_hcd_pci_probe(struct pci_dev *dev, extern void usb_hcd_pci_remove(struct pci_dev *dev); extern void usb_hcd_pci_shutdown(struct pci_dev *dev); +#ifdef CONFIG_USB_PCI_AMD extern int usb_hcd_amd_remote_wakeup_quirk(struct pci_dev *dev); +static inline bool usb_hcd_amd_resume_bug(struct pci_dev *dev, + const struct hc_driver *driver) +{ + if (!usb_hcd_amd_remote_wakeup_quirk(dev)) + return false; + if (driver->flags & (HCD_USB11 | HCD_USB3)) + return true; + return false; +} +#else /* CONFIG_USB_PCI_AMD */ +static inline bool usb_hcd_amd_resume_bug(struct pci_dev *dev, + const struct hc_driver *driver) +{ + return false; +} +#endif extern const struct dev_pm_ops usb_hcd_pci_pm_ops; #endif /* CONFIG_USB_PCI */ -- cgit v1.2.3 From 568441b7d45fc7198a89b522d721428f2005c356 Mon Sep 17 00:00:00 2001 From: Heikki Krogerus Date: Mon, 2 Oct 2023 17:22:40 +0300 Subject: usb: pd: Exposing the Peak Current value of Fixed Supplies to user space Exposing the value of the field as is. The Peak Current value has to be interpreted as described in Table 6-10 (Fixed Power Source Peak Current Capability) of the USB Power Delivery Specification, but that interpretation will be done in user space, not in kernel. Suggested-by: Douglas Gilbert Reviewed-by: Guenter Roeck Signed-off-by: Heikki Krogerus Link: https://lore.kernel.org/r/20231002142240.2641962-1-heikki.krogerus@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- Documentation/ABI/testing/sysfs-class-usb_power_delivery | 7 +++++++ drivers/usb/typec/pd.c | 10 ++++------ include/linux/usb/pd.h | 1 + 3 files changed, 12 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/Documentation/ABI/testing/sysfs-class-usb_power_delivery b/Documentation/ABI/testing/sysfs-class-usb_power_delivery index 1bf9d1d7902c..61d233c320ea 100644 --- a/Documentation/ABI/testing/sysfs-class-usb_power_delivery +++ b/Documentation/ABI/testing/sysfs-class-usb_power_delivery @@ -124,6 +124,13 @@ Contact: Heikki Krogerus Description: The voltage the supply supports in millivolts. +What: /sys/class/usb_power_delivery/.../source-capabilities/:fixed_supply/peak_current +Date: October 2023 +Contact: Heikki Krogerus +Description: + This file shows the value of the Fixed Power Source Peak Current + Capability field. + What: /sys/class/usb_power_delivery/.../source-capabilities/:fixed_supply/maximum_current Date: May 2022 Contact: Heikki Krogerus diff --git a/drivers/usb/typec/pd.c b/drivers/usb/typec/pd.c index 8cc66e4467c4..85d015cdbe1f 100644 --- a/drivers/usb/typec/pd.c +++ b/drivers/usb/typec/pd.c @@ -83,14 +83,12 @@ unchunked_extended_messages_supported_show(struct device *dev, } static DEVICE_ATTR_RO(unchunked_extended_messages_supported); -/* - * REVISIT: Peak Current requires access also to the RDO. static ssize_t peak_current_show(struct device *dev, struct device_attribute *attr, char *buf) { - ... + return sysfs_emit(buf, "%u\n", (to_pdo(dev)->pdo >> PDO_FIXED_PEAK_CURR_SHIFT) & 3); } -*/ +static DEVICE_ATTR_RO(peak_current); static ssize_t fast_role_swap_current_show(struct device *dev, struct device_attribute *attr, char *buf) @@ -135,7 +133,7 @@ static struct attribute *source_fixed_supply_attrs[] = { &dev_attr_usb_communication_capable.attr, &dev_attr_dual_role_data.attr, &dev_attr_unchunked_extended_messages_supported.attr, - /*&dev_attr_peak_current.attr,*/ + &dev_attr_peak_current.attr, &dev_attr_voltage.attr, &maximum_current_attr.attr, NULL @@ -144,7 +142,7 @@ static struct attribute *source_fixed_supply_attrs[] = { static umode_t fixed_attr_is_visible(struct kobject *kobj, struct attribute *attr, int n) { if (to_pdo(kobj_to_dev(kobj))->object_position && - /*attr != &dev_attr_peak_current.attr &&*/ + attr != &dev_attr_peak_current.attr && attr != &dev_attr_voltage.attr && attr != &maximum_current_attr.attr && attr != &operational_current_attr.attr) diff --git a/include/linux/usb/pd.h b/include/linux/usb/pd.h index c59fb79a42e8..eb626af0e4e7 100644 --- a/include/linux/usb/pd.h +++ b/include/linux/usb/pd.h @@ -228,6 +228,7 @@ enum pd_pdo_type { #define PDO_FIXED_UNCHUNK_EXT BIT(24) /* Unchunked Extended Message supported (Source) */ #define PDO_FIXED_FRS_CURR_MASK (BIT(24) | BIT(23)) /* FR_Swap Current (Sink) */ #define PDO_FIXED_FRS_CURR_SHIFT 23 +#define PDO_FIXED_PEAK_CURR_SHIFT 20 #define PDO_FIXED_VOLT_SHIFT 10 /* 50mV units */ #define PDO_FIXED_CURR_SHIFT 0 /* 10mA units */ -- cgit v1.2.3 From 3551ff7c5cfff4dc27fdcd14fa286edc08d78088 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Mon, 28 Aug 2023 17:43:02 +0200 Subject: usb: gadget: clarify usage of USB_GADGET_DELAYED_STATUS USB_GADGET_DELAYED_STATUS was introduced in commit 1b9ba000177e ("usb: gadget: composite: Allow function drivers to pause control transfers"). It was initially intended for the composite framework to allow delaying completing the status stage of a SET_CONFIGURATION request until all functions are ready. Unfortunately, that commit had an unintended side-effect of returning USB_GADGET_DELAYED_STATUS from the ->setup() call of the composite framework gadget driver. As a result of this and the incomplete documentation, some UDC drivers started relying on USB_GADGET_DELAYED_STATUS to decide when to avoid autocompleting the status stage for 0-length control transfers. dwc3 was the first in commit 5bdb1dcc6330 ("usb: dwc3: ep0: handle delayed_status again"). And a number of other UDC drivers followed later, probably relying on the dwc3 behavior as a reference. Unfortunately, this violated the interface between the UDC and the gadget driver for 0-length control transfers: the UDC driver must only proceed with the status stage for a 0-length control transfer once the gadget driver queued a response to EP0. As a result, a few gadget drivers are partially broken when used with a UDC that only delays the status stage for 0-length transfers when USB_GADGET_DELAYED_STATUS is returned from the setup() callback. This includes Raw Gadget and GadgetFS. For FunctionFS, a workaround was added in commit 946ef68ad4e4 ("usb: gadget: ffs: Let setup() return USB_GADGET_DELAYED_STATUS") and commit 4d644abf2569 ("usb: gadget: f_fs: Only return delayed status when len is 0"). The proper solution to this issue would be to contain USB_GADGET_DELAYED_STATUS within the composite framework and make all UDC drivers to not complete the status stage for 0-length requests on their own. Unfortunately, there is quite a few UDC drivers that need to get fixed and the required changes for some of them are not trivial. For now, update the comments to clarify that USB_GADGET_DELAYED_STATUS must not be used by the UDC drivers. The following two commits also add workarounds to Raw Gadget and GadgetFS to make them compatible with the broken UDC drivers until they are fixed. Acked-by: Alan Stern Signed-off-by: Andrey Konovalov Link: https://lore.kernel.org/r/7f0ee06c68c7241c844cd50f8565fdd5ead79b1b.1693237258.git.andreyknvl@gmail.com Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/composite.h | 8 ++++++++ include/linux/usb/gadget.h | 9 +++++++++ 2 files changed, 17 insertions(+) (limited to 'include/linux') diff --git a/include/linux/usb/composite.h b/include/linux/usb/composite.h index 6014340ba980..af3cd2aae4bc 100644 --- a/include/linux/usb/composite.h +++ b/include/linux/usb/composite.h @@ -35,6 +35,14 @@ * are ready. The control transfer will then be kept from completing till * all the function drivers that requested for USB_GADGET_DELAYED_STAUS * invoke usb_composite_setup_continue(). + * + * NOTE: USB_GADGET_DELAYED_STATUS must not be used in UDC drivers: they + * must delay completing the status stage for 0-length control transfers + * regardless of the whether USB_GADGET_DELAYED_STATUS is returned from + * the gadget driver's setup() callback. + * Currently, a number of UDC drivers rely on USB_GADGET_DELAYED_STATUS, + * which is a bug. These drivers must be fixed and USB_GADGET_DELAYED_STATUS + * must be contained within the composite framework. */ #define USB_GADGET_DELAYED_STATUS 0x7fff /* Impossibly large value */ diff --git a/include/linux/usb/gadget.h b/include/linux/usb/gadget.h index 75bda0783395..6532beb587b1 100644 --- a/include/linux/usb/gadget.h +++ b/include/linux/usb/gadget.h @@ -711,6 +711,15 @@ static inline int usb_gadget_check_config(struct usb_gadget *gadget) * get_interface. Setting a configuration (or interface) is where * endpoints should be activated or (config 0) shut down. * + * The gadget driver's setup() callback does not have to queue a response to + * ep0 within the setup() call, the driver can do it after setup() returns. + * The UDC driver must wait until such a response is queued before proceeding + * with the data/status stages of the control transfer. + * + * NOTE: Currently, a number of UDC drivers rely on USB_GADGET_DELAYED_STATUS + * being returned from the setup() callback, which is a bug. See the comment + * next to USB_GADGET_DELAYED_STATUS for details. + * * (Note that only the default control endpoint is supported. Neither * hosts nor devices generally support control traffic except to ep0.) * -- cgit v1.2.3 From 5234193ee2b997e59326b047610a8f3f64a0ce02 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 15 Sep 2023 13:15:17 -0700 Subject: ceph: Annotate struct ceph_osd_request with __counted_by Prepare for the coming implementation by GCC and Clang of the __counted_by attribute. Flexible array members annotated with __counted_by can have their accesses bounds-checked at run-time checking via CONFIG_UBSAN_BOUNDS (for array indexing) and CONFIG_FORTIFY_SOURCE (for strcpy/memcpy-family functions). As found with Coccinelle[1], add __counted_by for struct ceph_osd_request. [1] https://github.com/kees/kernel-tools/blob/trunk/coccinelle/examples/counted_by.cocci Cc: Ilya Dryomov Cc: Xiubo Li Cc: Jeff Layton Cc: ceph-devel@vger.kernel.org Reviewed-by: "Gustavo A. R. Silva" Reviewed-by: Xiubo Li Link: https://lore.kernel.org/r/20230915201517.never.373-kees@kernel.org Signed-off-by: Kees Cook --- include/linux/ceph/osd_client.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index bf9823956758..b8610e9d2471 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -278,7 +278,7 @@ struct ceph_osd_request { int r_attempts; u32 r_map_dne_bound; - struct ceph_osd_req_op r_ops[]; + struct ceph_osd_req_op r_ops[] __counted_by(r_num_ops); }; struct ceph_request_redirect { -- cgit v1.2.3 From 210d4e9c732fa87b584af72faae96c037d9d7957 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 22 Sep 2023 10:28:44 -0700 Subject: ipv4/igmp: Annotate struct ip_sf_socklist with __counted_by Prepare for the coming implementation by GCC and Clang of the __counted_by attribute. Flexible array members annotated with __counted_by can have their accesses bounds-checked at run-time checking via CONFIG_UBSAN_BOUNDS (for array indexing) and CONFIG_FORTIFY_SOURCE (for strcpy/memcpy-family functions). As found with Coccinelle[1], add __counted_by for struct ip_sf_socklist. [1] https://github.com/kees/kernel-tools/blob/trunk/coccinelle/examples/counted_by.cocci Cc: Martin KaFai Lau Cc: Alexei Starovoitov Signed-off-by: Kees Cook Reviewed-by: Gustavo A. R. Silva Link: https://lore.kernel.org/r/20230922172858.3822653-2-keescook@chromium.org Signed-off-by: Jakub Kicinski --- include/linux/igmp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/igmp.h b/include/linux/igmp.h index ebf4349a53af..5171231f70a8 100644 --- a/include/linux/igmp.h +++ b/include/linux/igmp.h @@ -39,7 +39,7 @@ struct ip_sf_socklist { unsigned int sl_max; unsigned int sl_count; struct rcu_head rcu; - __be32 sl_addr[]; + __be32 sl_addr[] __counted_by(sl_max); }; #define IP_SFBLOCK 10 /* allocate this many at once */ -- cgit v1.2.3 From b7768e67af9a5b6d6101cbfc146969fedf8df4be Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Thu, 28 Sep 2023 16:55:33 +0800 Subject: regulator: mt6358: Add missing regulators for MT6366 When support for the MT6366 PMIC regulators was added, it was assumed that it had the same functionality as MT6358. In reality there are differences. A few regulators have different ranges, or were renamed and repurposed, or removed altogether. Add the 3 regulators that were missing from the original submission. These are added for completeness. VSRAM_CORE is not used in existing projects. VM18 and VMDDR feed DRAM related consumers, and are not used in-kernel. Signed-off-by: Chen-Yu Tsai Reviewed-by: AngeloGioacchino Del Regno Acked-by: Lee Jones Link: https://lore.kernel.org/r/20230928085537.3246669-11-wenst@chromium.org Signed-off-by: Mark Brown --- drivers/regulator/mt6358-regulator.c | 20 ++++++++++++++++++++ include/linux/mfd/mt6358/registers.h | 17 +++++++++++++++++ include/linux/regulator/mt6358-regulator.h | 3 +++ 3 files changed, 40 insertions(+) (limited to 'include/linux') diff --git a/drivers/regulator/mt6358-regulator.c b/drivers/regulator/mt6358-regulator.c index 9301491e5e4a..54a4028ded8e 100644 --- a/drivers/regulator/mt6358-regulator.c +++ b/drivers/regulator/mt6358-regulator.c @@ -325,6 +325,20 @@ static const struct linear_range vldo28_ranges[] = { REGULATOR_LINEAR_RANGE(3000000, 0, 10, 10000), }; +static const unsigned int mt6366_vmddr_selectors[] = { 0, 1, 2, 3, 4, 5, 6, 7, 9, 12 }; +static const struct linear_range mt6366_vmddr_ranges[] = { + REGULATOR_LINEAR_RANGE(600000, 0, 10, 10000), + REGULATOR_LINEAR_RANGE(700000, 0, 10, 10000), + REGULATOR_LINEAR_RANGE(800000, 0, 10, 10000), + REGULATOR_LINEAR_RANGE(900000, 0, 10, 10000), + REGULATOR_LINEAR_RANGE(1000000, 0, 10, 10000), + REGULATOR_LINEAR_RANGE(1100000, 0, 10, 10000), + REGULATOR_LINEAR_RANGE(1200000, 0, 10, 10000), + REGULATOR_LINEAR_RANGE(1300000, 0, 10, 10000), + REGULATOR_LINEAR_RANGE(1500000, 0, 10, 10000), + REGULATOR_LINEAR_RANGE(1800000, 0, 10, 10000), +}; + static const unsigned int mt6366_vcn18_vm18_selectors[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }; static const struct linear_range mt6366_vcn18_vm18_ranges[] = { @@ -602,6 +616,10 @@ static const struct mt6358_regulator_info mt6366_regulators[] = { MT6358_LDO_VSIM2_CON0, 0, MT6358_VSIM2_ANA_CON0, 0xf00), MT6366_LDO("vcn18", VCN18, mt6366_vcn18_vm18, MT6358_LDO_VCN18_CON0, 0, MT6358_VCN18_ANA_CON0, 0xf00), + MT6366_LDO("vm18", VM18, mt6366_vcn18_vm18, + MT6358_LDO_VM18_CON0, 0, MT6358_VM18_ANA_CON0, 0xf00), + MT6366_LDO("vmddr", VMDDR, mt6366_vmddr, + MT6358_LDO_VMDDR_CON0, 0, MT6358_VMDDR_ANA_CON0, 0xf00), MT6366_LDO1("vsram-proc11", VSRAM_PROC11, 500000, 1293750, 6250, MT6358_LDO_VSRAM_PROC11_DBG0, 0x7f00, MT6358_LDO_VSRAM_CON0, 0x7f), MT6366_LDO1("vsram-others", VSRAM_OTHERS, 500000, 1293750, 6250, @@ -610,6 +628,8 @@ static const struct mt6358_regulator_info mt6366_regulators[] = { MT6358_LDO_VSRAM_GPU_DBG0, 0x7f00, MT6358_LDO_VSRAM_CON3, 0x7f), MT6366_LDO1("vsram-proc12", VSRAM_PROC12, 500000, 1293750, 6250, MT6358_LDO_VSRAM_PROC12_DBG0, 0x7f00, MT6358_LDO_VSRAM_CON1, 0x7f), + MT6366_LDO1("vsram-core", VSRAM_CORE, 500000, 1293750, 6250, + MT6358_LDO_VSRAM_CORE_DBG0, 0x7f00, MT6358_LDO_VSRAM_CON5, 0x7f), }; static int mt6358_sync_vcn33_setting(struct device *dev) diff --git a/include/linux/mfd/mt6358/registers.h b/include/linux/mfd/mt6358/registers.h index 5ea2590be710..d83e87298ac4 100644 --- a/include/linux/mfd/mt6358/registers.h +++ b/include/linux/mfd/mt6358/registers.h @@ -294,4 +294,21 @@ #define MT6358_AUD_TOP_INT_CON0 0x2228 #define MT6358_AUD_TOP_INT_STATUS0 0x2234 +/* + * MT6366 has no VCAM*, but has other regulators in its place. The names + * keep the MT6358 prefix for ease of use in the regulator driver. + */ +#define MT6358_LDO_VSRAM_CON5 0x1bf8 +#define MT6358_LDO_VM18_CON0 MT6358_LDO_VCAMA1_CON0 +#define MT6358_LDO_VM18_CON1 MT6358_LDO_VCAMA1_CON1 +#define MT6358_LDO_VM18_CON2 MT6358_LDO_VCAMA1_CON2 +#define MT6358_LDO_VMDDR_CON0 MT6358_LDO_VCAMA2_CON0 +#define MT6358_LDO_VMDDR_CON1 MT6358_LDO_VCAMA2_CON1 +#define MT6358_LDO_VMDDR_CON2 MT6358_LDO_VCAMA2_CON2 +#define MT6358_LDO_VSRAM_CORE_CON0 MT6358_LDO_VCAMD_CON0 +#define MT6358_LDO_VSRAM_CORE_DBG0 0x1cb6 +#define MT6358_LDO_VSRAM_CORE_DBG1 0x1cb8 +#define MT6358_VM18_ANA_CON0 MT6358_VCAMA1_ANA_CON0 +#define MT6358_VMDDR_ANA_CON0 MT6358_VCAMD_ANA_CON0 + #endif /* __MFD_MT6358_REGISTERS_H__ */ diff --git a/include/linux/regulator/mt6358-regulator.h b/include/linux/regulator/mt6358-regulator.h index c71a6a9fce7a..562386f9b80e 100644 --- a/include/linux/regulator/mt6358-regulator.h +++ b/include/linux/regulator/mt6358-regulator.h @@ -86,6 +86,9 @@ enum { MT6366_ID_VMC, MT6366_ID_VAUD28, MT6366_ID_VSIM2, + MT6366_ID_VM18, + MT6366_ID_VMDDR, + MT6366_ID_VSRAM_CORE, MT6366_ID_RG_MAX, }; -- cgit v1.2.3 From d844fe65f0957024c3e1b0bf2a0615246184d9bc Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Mon, 7 Aug 2023 20:03:57 -0700 Subject: sched/headers: Move 'struct sched_param' out of uapi, to work around glibc/musl breakage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Both glibc and musl define 'struct sched_param' in sched.h, while kernel has it in uapi/linux/sched/types.h, making it cumbersome to use sched_getattr(2) or sched_setattr(2) from userspace. For example, something like this: #include #include struct sched_attr sa; will result in "error: redefinition of ‘struct sched_param’" (note the code doesn't need sched_param at all -- it needs struct sched_attr plus some stuff from sched.h). The situation is, glibc is not going to provide a wrapper for sched_{get,set}attr, thus the need to include linux/sched_types.h directly, which leads to the above problem. Thus, the userspace is left with a few sub-par choices when it wants to use e.g. sched_setattr(2), such as maintaining a copy of struct sched_attr definition, or using some other ugly tricks. OTOH, 'struct sched_param' is well known, defined in POSIX, and it won't be ever changed (as that would break backward compatibility). So, while 'struct sched_param' is indeed part of the kernel uapi, exposing it the way it's done now creates an issue, and hiding it (like this patch does) fixes that issue, hopefully without creating another one: common userspace software rely on libc headers, and as for "special" software (like libc), it looks like glibc and musl do not rely on kernel headers for 'struct sched_param' definition (but let's Cc their mailing lists in case it's otherwise). The alternative to this patch would be to move struct sched_attr to, say, linux/sched.h, or linux/sched/attr.h (the new file). Oh, and here is the previous attempt to fix the issue: https://lore.kernel.org/all/20200528135552.GA87103@google.com/ While I support Linus arguments, the issue is still here and needs to be fixed. [ mingo: Linus is right, this shouldn't be needed - but on the other hand I agree that this header is not really helpful to user-space as-is. So let's pretend that is only about sched_attr, and call this commit a workaround for user-space breakage that it in reality is ... Also, remove the Fixes tag. ] Signed-off-by: Kir Kolyshkin Signed-off-by: Ingo Molnar Cc: Linus Torvalds Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20230808030357.1213829-1-kolyshkin@gmail.com --- include/linux/sched.h | 5 ++++- include/uapi/linux/sched/types.h | 4 ---- 2 files changed, 4 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index dc37ae787e33..e4235bbfad77 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -63,7 +63,6 @@ struct robust_list_head; struct root_domain; struct rq; struct sched_attr; -struct sched_param; struct seq_file; struct sighand_struct; struct signal_struct; @@ -370,6 +369,10 @@ extern struct root_domain def_root_domain; extern struct mutex sched_domains_mutex; #endif +struct sched_param { + int sched_priority; +}; + struct sched_info { #ifdef CONFIG_SCHED_INFO /* Cumulative counters: */ diff --git a/include/uapi/linux/sched/types.h b/include/uapi/linux/sched/types.h index f2c4589d4dbf..90662385689b 100644 --- a/include/uapi/linux/sched/types.h +++ b/include/uapi/linux/sched/types.h @@ -4,10 +4,6 @@ #include -struct sched_param { - int sched_priority; -}; - #define SCHED_ATTR_SIZE_VER0 48 /* sizeof first published struct */ #define SCHED_ATTR_SIZE_VER1 56 /* add: util_{min,max} */ -- cgit v1.2.3 From affccb16c117d188eb09495cbdea149cecbf00b9 Mon Sep 17 00:00:00 2001 From: Niklas Cassel Date: Wed, 6 Sep 2023 11:22:32 +0200 Subject: ata: ahci: print the lpm policy on boot The target LPM policy can be set using either a Kconfig or a kernel module parameter. However, if the board type is set to anything but board_ahci_low_power, then the LPM policy will overridden and set to ATA_LPM_UNKNOWN. Additionally, if the default suspend is suspend to idle, depending on the hardware capabilities of the HBA, ahci_update_initial_lpm_policy() might override the LPM policy to either ATA_LPM_MIN_POWER_WITH_PARTIAL or ATA_LPM_MIN_POWER. All this means that it is very hard to know which LPM policy a user will actually be using on a given system. In order to make it easier to debug LPM related issues, print the LPM policy on boot. One common LPM related issue is that the device fails to link up. Because of that, we cannot add this print to ata_dev_configure(), as that function is only called after a successful link up. Instead, add the info using ata_port_desc(), with the help of a new ata_port_desc_misc() helper. The port description is printed once per port during boot. Before changes: ata1: SATA max UDMA/133 abar m524288@0xa5780000 port 0xa5780100 irq 170 ata2: SATA max UDMA/133 abar m524288@0xa5780000 port 0xa5780180 irq 170 After changes: ata1: SATA max UDMA/133 abar m524288@0xa5780000 port 0xa5780100 irq 170 lpm-pol 4 ata2: SATA max UDMA/133 abar m524288@0xa5780000 port 0xa5780180 irq 170 lpm-pol 4 Signed-off-by: Niklas Cassel Signed-off-by: Damien Le Moal --- drivers/ata/libahci.c | 2 +- drivers/ata/libata-core.c | 2 +- drivers/ata/libata-sff.c | 10 +++++----- drivers/ata/pata_cs5520.c | 2 +- include/linux/libata.h | 5 +++++ 5 files changed, 13 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/drivers/ata/libahci.c b/drivers/ata/libahci.c index f1263364fa97..1a63200ea437 100644 --- a/drivers/ata/libahci.c +++ b/drivers/ata/libahci.c @@ -2730,7 +2730,7 @@ static int ahci_host_activate_multi_irqs(struct ata_host *host, if (rc) return rc; - ata_port_desc(host->ports[i], "irq %d", irq); + ata_port_desc_misc(host->ports[i], irq); } return ata_host_register(host, sht); diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index d8cc1e27a125..0f63f805c78c 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -6026,7 +6026,7 @@ int ata_host_activate(struct ata_host *host, int irq, return rc; for (i = 0; i < host->n_ports; i++) - ata_port_desc(host->ports[i], "irq %d", irq); + ata_port_desc_misc(host->ports[i], irq); rc = ata_host_register(host, sht); /* if failed, just free the IRQ and leave ports alone */ diff --git a/drivers/ata/libata-sff.c b/drivers/ata/libata-sff.c index 8fcc622fcb3d..95a19c4ef2a1 100644 --- a/drivers/ata/libata-sff.c +++ b/drivers/ata/libata-sff.c @@ -2316,7 +2316,7 @@ int ata_pci_sff_activate_host(struct ata_host *host, for (i = 0; i < 2; i++) { if (ata_port_is_dummy(host->ports[i])) continue; - ata_port_desc(host->ports[i], "irq %d", pdev->irq); + ata_port_desc_misc(host->ports[i], pdev->irq); } } else if (legacy_mode) { if (!ata_port_is_dummy(host->ports[0])) { @@ -2326,8 +2326,8 @@ int ata_pci_sff_activate_host(struct ata_host *host, if (rc) goto out; - ata_port_desc(host->ports[0], "irq %d", - ATA_PRIMARY_IRQ(pdev)); + ata_port_desc_misc(host->ports[0], + ATA_PRIMARY_IRQ(pdev)); } if (!ata_port_is_dummy(host->ports[1])) { @@ -2337,8 +2337,8 @@ int ata_pci_sff_activate_host(struct ata_host *host, if (rc) goto out; - ata_port_desc(host->ports[1], "irq %d", - ATA_SECONDARY_IRQ(pdev)); + ata_port_desc_misc(host->ports[1], + ATA_SECONDARY_IRQ(pdev)); } } diff --git a/drivers/ata/pata_cs5520.c b/drivers/ata/pata_cs5520.c index 422d42761a1d..38795508c2e9 100644 --- a/drivers/ata/pata_cs5520.c +++ b/drivers/ata/pata_cs5520.c @@ -212,7 +212,7 @@ static int cs5520_init_one(struct pci_dev *pdev, const struct pci_device_id *id) if (rc) return rc; - ata_port_desc(ap, "irq %d", irq[i]); + ata_port_desc_misc(ap, irq[i]); } return ata_host_register(host, &cs5520_sht); diff --git a/include/linux/libata.h b/include/linux/libata.h index 2a7d2af0ed80..54a217868ad0 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -1561,6 +1561,11 @@ void ata_port_desc(struct ata_port *ap, const char *fmt, ...); extern void ata_port_pbar_desc(struct ata_port *ap, int bar, ssize_t offset, const char *name); #endif +static inline void ata_port_desc_misc(struct ata_port *ap, int irq) +{ + ata_port_desc(ap, "irq %d", irq); + ata_port_desc(ap, "lpm-pol %d", ap->target_lpm_policy); +} static inline bool ata_tag_internal(unsigned int tag) { -- cgit v1.2.3 From 0e19548145d863c48c6562add768a05231d768c9 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 22 Sep 2023 10:52:11 -0700 Subject: ata: libata: Annotate struct ata_cpr_log with __counted_by Prepare for the coming implementation by GCC and Clang of the __counted_by attribute. Flexible array members annotated with __counted_by can have their accesses bounds-checked at run-time checking via CONFIG_UBSAN_BOUNDS (for array indexing) and CONFIG_FORTIFY_SOURCE (for strcpy/memcpy-family functions). As found with Coccinelle[1], add __counted_by for struct ata_cpr_log. [1] https://github.com/kees/kernel-tools/blob/trunk/coccinelle/examples/counted_by.cocci Cc: Damien Le Moal Cc: linux-ide@vger.kernel.org Signed-off-by: Kees Cook Reviewed-by: Gustavo A. R. Silva Signed-off-by: Damien Le Moal --- include/linux/libata.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/libata.h b/include/linux/libata.h index 54a217868ad0..b39891320271 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -656,7 +656,7 @@ struct ata_cpr { struct ata_cpr_log { u8 nr_cpr; - struct ata_cpr cpr[]; + struct ata_cpr cpr[] __counted_by(nr_cpr); }; struct ata_device { -- cgit v1.2.3 From 1b947279798fd51bef46e8241102f5b896add021 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Mon, 28 Aug 2023 13:10:28 +0900 Subject: ata: libata: Cleanup inline DMA helper functions Simplify the inline DMA helper functions ata_using_mwdma(), ata_using_udma() and ata_dma_enabled() to directly return as a boolean the result of their test condition. Signed-off-by: Damien Le Moal Reviewed-by: Hannes Reinecke Tested-by: Chia-Lin Kao (AceLan) Tested-by: Geert Uytterhoeven Reviewed-by: Martin K. Petersen --- include/linux/libata.h | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/libata.h b/include/linux/libata.h index b39891320271..1dbb14daccfa 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -1886,23 +1886,21 @@ static inline unsigned long ata_deadline(unsigned long from_jiffies, change in future hardware and specs, secondly 0xFF means 'no DMA' but is > UDMA_0. Dyma ddreigiau */ -static inline int ata_using_mwdma(struct ata_device *adev) +static inline bool ata_using_mwdma(struct ata_device *adev) { - if (adev->dma_mode >= XFER_MW_DMA_0 && adev->dma_mode <= XFER_MW_DMA_4) - return 1; - return 0; + return adev->dma_mode >= XFER_MW_DMA_0 && + adev->dma_mode <= XFER_MW_DMA_4; } -static inline int ata_using_udma(struct ata_device *adev) +static inline bool ata_using_udma(struct ata_device *adev) { - if (adev->dma_mode >= XFER_UDMA_0 && adev->dma_mode <= XFER_UDMA_7) - return 1; - return 0; + return adev->dma_mode >= XFER_UDMA_0 && + adev->dma_mode <= XFER_UDMA_7; } -static inline int ata_dma_enabled(struct ata_device *adev) +static inline bool ata_dma_enabled(struct ata_device *adev) { - return (adev->dma_mode == 0xFF ? 0 : 1); + return adev->dma_mode != 0xFF; } /************************************************************************** -- cgit v1.2.3 From c2a36609dab3b7c937ab95bfb8b98e72391f772e Mon Sep 17 00:00:00 2001 From: "Jiri Slaby (SUSE)" Date: Tue, 19 Sep 2023 10:51:47 +0200 Subject: tty: switch tty_{,un}throttle_safe() to return a bool They return 0 or 1 -- a boolean value, so make it clear than noone should expect negative or other values. Signed-off-by: "Jiri Slaby (SUSE)" Link: https://lore.kernel.org/r/20230919085156.1578-7-jirislaby@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/tty/tty_ioctl.c | 18 ++++++++---------- include/linux/tty.h | 4 ++-- 2 files changed, 10 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/drivers/tty/tty_ioctl.c b/drivers/tty/tty_ioctl.c index 7958bf6d27c4..ba60fcf518e0 100644 --- a/drivers/tty/tty_ioctl.c +++ b/drivers/tty/tty_ioctl.c @@ -124,17 +124,16 @@ EXPORT_SYMBOL(tty_unthrottle); * conditions when throttling is conditional on factors evaluated prior to * throttling. * - * Returns 0 if tty is throttled (or was already throttled) + * Returns false if tty is throttled (or was already throttled) */ - -int tty_throttle_safe(struct tty_struct *tty) +bool tty_throttle_safe(struct tty_struct *tty) { - int ret = 0; + bool ret = false; mutex_lock(&tty->throttle_mutex); if (!tty_throttled(tty)) { if (tty->flow_change != TTY_THROTTLE_SAFE) - ret = 1; + ret = true; else { set_bit(TTY_THROTTLED, &tty->flags); if (tty->ops->throttle) @@ -155,17 +154,16 @@ int tty_throttle_safe(struct tty_struct *tty) * unthrottle due to race conditions when unthrottling is conditional * on factors evaluated prior to unthrottling. * - * Returns 0 if tty is unthrottled (or was already unthrottled) + * Returns false if tty is unthrottled (or was already unthrottled) */ - -int tty_unthrottle_safe(struct tty_struct *tty) +bool tty_unthrottle_safe(struct tty_struct *tty) { - int ret = 0; + bool ret = false; mutex_lock(&tty->throttle_mutex); if (tty_throttled(tty)) { if (tty->flow_change != TTY_UNTHROTTLE_SAFE) - ret = 1; + ret = true; else { clear_bit(TTY_THROTTLED, &tty->flags); if (tty->ops->unthrottle) diff --git a/include/linux/tty.h b/include/linux/tty.h index f002d0f25db7..59d675f345e9 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -416,8 +416,8 @@ unsigned int tty_chars_in_buffer(struct tty_struct *tty); unsigned int tty_write_room(struct tty_struct *tty); void tty_driver_flush_buffer(struct tty_struct *tty); void tty_unthrottle(struct tty_struct *tty); -int tty_throttle_safe(struct tty_struct *tty); -int tty_unthrottle_safe(struct tty_struct *tty); +bool tty_throttle_safe(struct tty_struct *tty); +bool tty_unthrottle_safe(struct tty_struct *tty); int tty_do_resize(struct tty_struct *tty, struct winsize *ws); int tty_get_icount(struct tty_struct *tty, struct serial_icounter_struct *icount); -- cgit v1.2.3 From 71067fb797e074c468221793fe93ba1b58350f1e Mon Sep 17 00:00:00 2001 From: "Jiri Slaby (SUSE)" Date: Tue, 19 Sep 2023 10:51:50 +0200 Subject: tty: fix kernel-doc for functions in tty.h tty_kref_get() is already included in Documentation, but is not properly formatted. Fix this. tty_get_baud_rate() is neither properly formatted, nor is included. Fix both. Signed-off-by: "Jiri Slaby (SUSE)" Link: https://lore.kernel.org/r/20230919085156.1578-10-jirislaby@kernel.org Signed-off-by: Greg Kroah-Hartman --- Documentation/driver-api/tty/tty_ioctl.rst | 3 +++ include/linux/tty.h | 21 +++++++++------------ 2 files changed, 12 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/Documentation/driver-api/tty/tty_ioctl.rst b/Documentation/driver-api/tty/tty_ioctl.rst index 9b0be79fc15e..3ff1ac5e07f1 100644 --- a/Documentation/driver-api/tty/tty_ioctl.rst +++ b/Documentation/driver-api/tty/tty_ioctl.rst @@ -5,3 +5,6 @@ TTY IOCTL Helpers ================= .. kernel-doc:: drivers/tty/tty_ioctl.c + +.. kernel-doc:: include/linux/tty.h + :identifiers: tty_get_baud_rate diff --git a/include/linux/tty.h b/include/linux/tty.h index 59d675f345e9..4b6340ac2af2 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -390,14 +390,12 @@ int vcs_init(void); extern const struct class tty_class; /** - * tty_kref_get - get a tty reference - * @tty: tty device + * tty_kref_get - get a tty reference + * @tty: tty device * - * Return a new reference to a tty object. The caller must hold - * sufficient locks/counts to ensure that their existing reference cannot - * go away + * Returns: a new reference to a tty object. The caller must hold sufficient + * locks/counts to ensure that their existing reference cannot go away */ - static inline struct tty_struct *tty_kref_get(struct tty_struct *tty) { if (tty) @@ -435,14 +433,13 @@ void tty_encode_baud_rate(struct tty_struct *tty, speed_t ibaud, speed_t obaud); /** - * tty_get_baud_rate - get tty bit rates - * @tty: tty to query + * tty_get_baud_rate - get tty bit rates + * @tty: tty to query * - * Returns the baud rate as an integer for this terminal. The - * termios lock must be held by the caller and the terminal bit - * flags may be updated. + * Returns: the baud rate as an integer for this terminal. The termios lock + * must be held by the caller and the terminal bit flags may be updated. * - * Locking: none + * Locking: none */ static inline speed_t tty_get_baud_rate(struct tty_struct *tty) { -- cgit v1.2.3 From 29bff582b74ed0bdb7e6986482ad9e6799ea4d2f Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 26 Sep 2023 21:41:28 -0700 Subject: serial: core: fix kernel-doc for uart_port_unlock_irqrestore() Fix the function name to avoid a kernel-doc warning: include/linux/serial_core.h:666: warning: expecting prototype for uart_port_lock_irqrestore(). Prototype was for uart_port_unlock_irqrestore() instead Fixes: b0af4bcb4946 ("serial: core: Provide port lock wrappers") Signed-off-by: Randy Dunlap Cc: Thomas Gleixner Cc: John Ogness Cc: linux-serial@vger.kernel.org Cc: Greg Kroah-Hartman Cc: Jiri Slaby Reviewed-by: John Ogness Link: https://lore.kernel.org/r/20230927044128.4748-1-rdunlap@infradead.org Signed-off-by: Greg Kroah-Hartman --- include/linux/serial_core.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index 3091c62ec37b..89f7b6c63598 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -658,7 +658,7 @@ static inline void uart_port_unlock_irq(struct uart_port *up) } /** - * uart_port_lock_irqrestore - Unlock the UART port, restore interrupts + * uart_port_unlock_irqrestore - Unlock the UART port, restore interrupts * @up: Pointer to UART port structure * @flags: The saved interrupt flags for restore */ -- cgit v1.2.3 From 3cd39bc3b11b8d34b7d7c961a35fdfd18b0ebf75 Mon Sep 17 00:00:00 2001 From: Alejandro Colomar Date: Tue, 3 Oct 2023 14:59:53 +0300 Subject: kernel.h: Move ARRAY_SIZE() to a separate header Touching files so used for the kernel, forces 'make' to recompile most of the kernel. Having those definitions in more granular files helps avoid recompiling so much of the kernel. Signed-off-by: Alejandro Colomar Reviewed-by: Giovanni Cabiddu Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20230817143352.132583-2-lucas.segarra.fernandez@intel.com [andy: reduced to cover only string.h for now] Signed-off-by: Andy Shevchenko --- include/linux/array_size.h | 13 +++++++++++++ include/linux/kernel.h | 7 +------ include/linux/string.h | 1 + 3 files changed, 15 insertions(+), 6 deletions(-) create mode 100644 include/linux/array_size.h (limited to 'include/linux') diff --git a/include/linux/array_size.h b/include/linux/array_size.h new file mode 100644 index 000000000000..06d7d83196ca --- /dev/null +++ b/include/linux/array_size.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_ARRAY_SIZE_H +#define _LINUX_ARRAY_SIZE_H + +#include + +/** + * ARRAY_SIZE - get the number of elements in array @arr + * @arr: array to be sized + */ +#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]) + __must_be_array(arr)) + +#endif /* _LINUX_ARRAY_SIZE_H */ diff --git a/include/linux/kernel.h b/include/linux/kernel.h index cee8fe87e9f4..d9ad21058eed 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -13,6 +13,7 @@ #include #include +#include #include #include #include @@ -50,12 +51,6 @@ #define READ 0 #define WRITE 1 -/** - * ARRAY_SIZE - get the number of elements in array @arr - * @arr: array to be sized - */ -#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]) + __must_be_array(arr)) - #define PTR_IF(cond, ptr) ((cond) ? (ptr) : NULL) #define u64_to_user_ptr(x) ( \ diff --git a/include/linux/string.h b/include/linux/string.h index dbfc66400050..3c920b6d609b 100644 --- a/include/linux/string.h +++ b/include/linux/string.h @@ -2,6 +2,7 @@ #ifndef _LINUX_STRING_H_ #define _LINUX_STRING_H_ +#include #include /* for inline */ #include /* for size_t */ #include /* for NULL */ -- cgit v1.2.3 From 82cc14c9930c7613da2fcb41a8d4f90c8b4cb048 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 3 Oct 2023 15:10:11 +0300 Subject: pinctrl: Replace kernel.h by what is actually being used The kernel.h is a mess of unrelated things and we only used it as a proxy to array_size.h, hence switch from former to the latter. While at it, group and sort the headers where it makes sense. Signed-off-by: Andy Shevchenko --- drivers/pinctrl/core.c | 2 +- drivers/pinctrl/pinconf-generic.c | 16 +++++++++------- drivers/pinctrl/pinconf.c | 14 ++++++++------ drivers/pinctrl/pinctrl-utils.c | 6 ++++-- drivers/pinctrl/pinmux.c | 2 +- include/linux/pinctrl/machine.h | 2 +- 6 files changed, 24 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/drivers/pinctrl/core.c b/drivers/pinctrl/core.c index e9dc9638120a..afd0a1040329 100644 --- a/drivers/pinctrl/core.c +++ b/drivers/pinctrl/core.c @@ -12,12 +12,12 @@ */ #define pr_fmt(fmt) "pinctrl core: " fmt +#include #include #include #include #include #include -#include #include #include #include diff --git a/drivers/pinctrl/pinconf-generic.c b/drivers/pinctrl/pinconf-generic.c index 365c4b0ca465..8313cb5f3b3c 100644 --- a/drivers/pinctrl/pinconf-generic.c +++ b/drivers/pinctrl/pinconf-generic.c @@ -10,17 +10,19 @@ #define pr_fmt(fmt) "generic pinconfig core: " fmt -#include -#include -#include +#include +#include #include +#include +#include +#include #include -#include #include -#include -#include + #include -#include +#include +#include + #include "core.h" #include "pinconf.h" #include "pinctrl-utils.h" diff --git a/drivers/pinctrl/pinconf.c b/drivers/pinctrl/pinconf.c index d9d54065472e..96d853a8f339 100644 --- a/drivers/pinctrl/pinconf.c +++ b/drivers/pinctrl/pinconf.c @@ -9,16 +9,18 @@ */ #define pr_fmt(fmt) "pinconfig core: " fmt -#include -#include -#include -#include -#include +#include #include +#include +#include +#include #include +#include + #include -#include #include +#include + #include "core.h" #include "pinconf.h" diff --git a/drivers/pinctrl/pinctrl-utils.c b/drivers/pinctrl/pinctrl-utils.c index 3580e0fd94ed..40862f7bd6ca 100644 --- a/drivers/pinctrl/pinctrl-utils.c +++ b/drivers/pinctrl/pinctrl-utils.c @@ -6,12 +6,14 @@ * * Author: Laxman Dewangan */ +#include #include #include -#include -#include #include #include + +#include + #include "core.h" #include "pinctrl-utils.h" diff --git a/drivers/pinctrl/pinmux.c b/drivers/pinctrl/pinmux.c index 2a180a5d64a4..3de81d310aee 100644 --- a/drivers/pinctrl/pinmux.c +++ b/drivers/pinctrl/pinmux.c @@ -12,12 +12,12 @@ */ #define pr_fmt(fmt) "pinmux core: " fmt +#include #include #include #include #include #include -#include #include #include #include diff --git a/include/linux/pinctrl/machine.h b/include/linux/pinctrl/machine.h index 0639b36f43c5..ee8803f6ad07 100644 --- a/include/linux/pinctrl/machine.h +++ b/include/linux/pinctrl/machine.h @@ -11,7 +11,7 @@ #ifndef __LINUX_PINCTRL_MACHINE_H #define __LINUX_PINCTRL_MACHINE_H -#include /* ARRAY_SIZE() */ +#include #include -- cgit v1.2.3 From 75cec20345fa8e05a2b5f861fada95ad8e165257 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Fri, 8 Sep 2023 16:32:15 +0200 Subject: bpf: Remove xdp_do_flush_map(). MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit xdp_do_flush_map() can be removed because there is no more user in tree. Remove xdp_do_flush_map(). Signed-off-by: Sebastian Andrzej Siewior Acked-by: Toke Høiland-Jørgensen Acked-by: Jesper Dangaard Brouer Link: https://lore.kernel.org/r/20230908143215.869913-3-bigeasy@linutronix.de Signed-off-by: Jakub Kicinski --- include/linux/filter.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index 27406aee2d40..e8822bd595f9 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -1025,12 +1025,6 @@ int xdp_do_redirect_frame(struct net_device *dev, struct bpf_prog *prog); void xdp_do_flush(void); -/* The xdp_do_flush_map() helper has been renamed to drop the _map suffix, as - * it is no longer only flushing maps. Keep this define for compatibility - * until all drivers are updated - do not use xdp_do_flush_map() in new code! - */ -#define xdp_do_flush_map xdp_do_flush - void bpf_warn_invalid_xdp_action(struct net_device *dev, struct bpf_prog *prog, u32 act); #ifdef CONFIG_INET -- cgit v1.2.3 From ccab211af3c2b90ed792eb5f33707d2f0d59fe50 Mon Sep 17 00:00:00 2001 From: Sohil Mehta Date: Mon, 10 Jul 2023 18:51:24 +0000 Subject: syscalls: Cleanup references to sys_lookup_dcookie() commit 'be65de6b03aa ("fs: Remove dcookies support")' removed the syscall definition for lookup_dcookie. However, syscall tables still point to the old sys_lookup_dcookie() definition. Update syscall tables of all architectures to directly point to sys_ni_syscall() instead. Signed-off-by: Sohil Mehta Reviewed-by: Randy Dunlap Acked-by: Namhyung Kim # for perf Acked-by: Russell King (Oracle) Acked-by: Geert Uytterhoeven Signed-off-by: Arnd Bergmann --- arch/alpha/kernel/syscalls/syscall.tbl | 2 +- arch/arm/tools/syscall.tbl | 2 +- arch/arm64/include/asm/unistd32.h | 4 ++-- arch/m68k/kernel/syscalls/syscall.tbl | 2 +- arch/microblaze/kernel/syscalls/syscall.tbl | 2 +- arch/mips/kernel/syscalls/syscall_n32.tbl | 2 +- arch/mips/kernel/syscalls/syscall_n64.tbl | 2 +- arch/mips/kernel/syscalls/syscall_o32.tbl | 2 +- arch/parisc/kernel/syscalls/syscall.tbl | 2 +- arch/powerpc/kernel/syscalls/syscall.tbl | 2 +- arch/s390/kernel/syscalls/syscall.tbl | 2 +- arch/sh/kernel/syscalls/syscall.tbl | 2 +- arch/sparc/kernel/syscalls/syscall.tbl | 2 +- arch/x86/entry/syscalls/syscall_32.tbl | 2 +- arch/x86/entry/syscalls/syscall_64.tbl | 2 +- arch/xtensa/kernel/syscalls/syscall.tbl | 2 +- include/linux/compat.h | 1 - include/linux/syscalls.h | 1 - include/uapi/asm-generic/unistd.h | 2 +- kernel/sys_ni.c | 2 -- tools/include/uapi/asm-generic/unistd.h | 2 +- tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl | 2 +- tools/perf/arch/powerpc/entry/syscalls/syscall.tbl | 2 +- tools/perf/arch/s390/entry/syscalls/syscall.tbl | 2 +- tools/perf/arch/x86/entry/syscalls/syscall_64.tbl | 2 +- 25 files changed, 23 insertions(+), 27 deletions(-) (limited to 'include/linux') diff --git a/arch/alpha/kernel/syscalls/syscall.tbl b/arch/alpha/kernel/syscalls/syscall.tbl index ad37569d0507..26a32f8e47d5 100644 --- a/arch/alpha/kernel/syscalls/syscall.tbl +++ b/arch/alpha/kernel/syscalls/syscall.tbl @@ -334,7 +334,7 @@ 401 common io_submit sys_io_submit 402 common io_cancel sys_io_cancel 405 common exit_group sys_exit_group -406 common lookup_dcookie sys_lookup_dcookie +406 common lookup_dcookie sys_ni_syscall 407 common epoll_create sys_epoll_create 408 common epoll_ctl sys_epoll_ctl 409 common epoll_wait sys_epoll_wait diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl index c572d6c3dee0..04d8e491d8b4 100644 --- a/arch/arm/tools/syscall.tbl +++ b/arch/arm/tools/syscall.tbl @@ -263,7 +263,7 @@ 246 common io_submit sys_io_submit 247 common io_cancel sys_io_cancel 248 common exit_group sys_exit_group -249 common lookup_dcookie sys_lookup_dcookie +249 common lookup_dcookie sys_ni_syscall 250 common epoll_create sys_epoll_create 251 common epoll_ctl sys_epoll_ctl sys_oabi_epoll_ctl 252 common epoll_wait sys_epoll_wait diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h index 78b68311ec81..9110be82dfaa 100644 --- a/arch/arm64/include/asm/unistd32.h +++ b/arch/arm64/include/asm/unistd32.h @@ -508,8 +508,8 @@ __SYSCALL(__NR_io_submit, compat_sys_io_submit) __SYSCALL(__NR_io_cancel, sys_io_cancel) #define __NR_exit_group 248 __SYSCALL(__NR_exit_group, sys_exit_group) -#define __NR_lookup_dcookie 249 -__SYSCALL(__NR_lookup_dcookie, compat_sys_lookup_dcookie) + /* 249 was lookup_dcookie */ +__SYSCALL(249, sys_ni_syscall) #define __NR_epoll_create 250 __SYSCALL(__NR_epoll_create, sys_epoll_create) #define __NR_epoll_ctl 251 diff --git a/arch/m68k/kernel/syscalls/syscall.tbl b/arch/m68k/kernel/syscalls/syscall.tbl index 259ceb125367..f9d1f2d3f067 100644 --- a/arch/m68k/kernel/syscalls/syscall.tbl +++ b/arch/m68k/kernel/syscalls/syscall.tbl @@ -255,7 +255,7 @@ 245 common io_cancel sys_io_cancel 246 common fadvise64 sys_fadvise64 247 common exit_group sys_exit_group -248 common lookup_dcookie sys_lookup_dcookie +248 common lookup_dcookie sys_ni_syscall 249 common epoll_create sys_epoll_create 250 common epoll_ctl sys_epoll_ctl 251 common epoll_wait sys_epoll_wait diff --git a/arch/microblaze/kernel/syscalls/syscall.tbl b/arch/microblaze/kernel/syscalls/syscall.tbl index a3798c2637fd..185fe73d9bbf 100644 --- a/arch/microblaze/kernel/syscalls/syscall.tbl +++ b/arch/microblaze/kernel/syscalls/syscall.tbl @@ -260,7 +260,7 @@ 250 common fadvise64 sys_fadvise64 # 251 is available for reuse (was briefly sys_set_zone_reclaim) 252 common exit_group sys_exit_group -253 common lookup_dcookie sys_lookup_dcookie +253 common lookup_dcookie sys_ni_syscall 254 common epoll_create sys_epoll_create 255 common epoll_ctl sys_epoll_ctl 256 common epoll_wait sys_epoll_wait diff --git a/arch/mips/kernel/syscalls/syscall_n32.tbl b/arch/mips/kernel/syscalls/syscall_n32.tbl index 152034b8e0a0..08f33e7c2896 100644 --- a/arch/mips/kernel/syscalls/syscall_n32.tbl +++ b/arch/mips/kernel/syscalls/syscall_n32.tbl @@ -214,7 +214,7 @@ 203 n32 io_submit compat_sys_io_submit 204 n32 io_cancel sys_io_cancel 205 n32 exit_group sys_exit_group -206 n32 lookup_dcookie sys_lookup_dcookie +206 n32 lookup_dcookie sys_ni_syscall 207 n32 epoll_create sys_epoll_create 208 n32 epoll_ctl sys_epoll_ctl 209 n32 epoll_wait sys_epoll_wait diff --git a/arch/mips/kernel/syscalls/syscall_n64.tbl b/arch/mips/kernel/syscalls/syscall_n64.tbl index cb5e757f6621..80be0e98ea0c 100644 --- a/arch/mips/kernel/syscalls/syscall_n64.tbl +++ b/arch/mips/kernel/syscalls/syscall_n64.tbl @@ -214,7 +214,7 @@ 203 n64 io_submit sys_io_submit 204 n64 io_cancel sys_io_cancel 205 n64 exit_group sys_exit_group -206 n64 lookup_dcookie sys_lookup_dcookie +206 n64 lookup_dcookie sys_ni_syscall 207 n64 epoll_create sys_epoll_create 208 n64 epoll_ctl sys_epoll_ctl 209 n64 epoll_wait sys_epoll_wait diff --git a/arch/mips/kernel/syscalls/syscall_o32.tbl b/arch/mips/kernel/syscalls/syscall_o32.tbl index 1a646813afdc..310c7e839b69 100644 --- a/arch/mips/kernel/syscalls/syscall_o32.tbl +++ b/arch/mips/kernel/syscalls/syscall_o32.tbl @@ -258,7 +258,7 @@ 244 o32 io_submit sys_io_submit compat_sys_io_submit 245 o32 io_cancel sys_io_cancel 246 o32 exit_group sys_exit_group -247 o32 lookup_dcookie sys_lookup_dcookie compat_sys_lookup_dcookie +247 o32 lookup_dcookie sys_ni_syscall 248 o32 epoll_create sys_epoll_create 249 o32 epoll_ctl sys_epoll_ctl 250 o32 epoll_wait sys_epoll_wait diff --git a/arch/parisc/kernel/syscalls/syscall.tbl b/arch/parisc/kernel/syscalls/syscall.tbl index e97c175b56f9..5410ff9456ae 100644 --- a/arch/parisc/kernel/syscalls/syscall.tbl +++ b/arch/parisc/kernel/syscalls/syscall.tbl @@ -245,7 +245,7 @@ # 220 was alloc_hugepages # 221 was free_hugepages 222 common exit_group sys_exit_group -223 common lookup_dcookie sys_lookup_dcookie compat_sys_lookup_dcookie +223 common lookup_dcookie sys_ni_syscall 224 common epoll_create sys_epoll_create 225 common epoll_ctl sys_epoll_ctl 226 common epoll_wait sys_epoll_wait diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl index 20e50586e8a2..e1412519b4ad 100644 --- a/arch/powerpc/kernel/syscalls/syscall.tbl +++ b/arch/powerpc/kernel/syscalls/syscall.tbl @@ -294,7 +294,7 @@ 233 32 fadvise64 sys_ppc32_fadvise64 compat_sys_ppc32_fadvise64 233 64 fadvise64 sys_fadvise64 234 nospu exit_group sys_exit_group -235 nospu lookup_dcookie sys_lookup_dcookie compat_sys_lookup_dcookie +235 nospu lookup_dcookie sys_ni_syscall 236 common epoll_create sys_epoll_create 237 common epoll_ctl sys_epoll_ctl 238 common epoll_wait sys_epoll_wait diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl index 0122cc156952..cc0bc144b661 100644 --- a/arch/s390/kernel/syscalls/syscall.tbl +++ b/arch/s390/kernel/syscalls/syscall.tbl @@ -100,7 +100,7 @@ 106 common stat sys_newstat compat_sys_newstat 107 common lstat sys_newlstat compat_sys_newlstat 108 common fstat sys_newfstat compat_sys_newfstat -110 common lookup_dcookie sys_lookup_dcookie compat_sys_lookup_dcookie +110 common lookup_dcookie - - 111 common vhangup sys_vhangup sys_vhangup 112 common idle - - 114 common wait4 sys_wait4 compat_sys_wait4 diff --git a/arch/sh/kernel/syscalls/syscall.tbl b/arch/sh/kernel/syscalls/syscall.tbl index e90d585c4d3e..17ca58976849 100644 --- a/arch/sh/kernel/syscalls/syscall.tbl +++ b/arch/sh/kernel/syscalls/syscall.tbl @@ -260,7 +260,7 @@ 250 common fadvise64 sys_fadvise64 # 251 is unused 252 common exit_group sys_exit_group -253 common lookup_dcookie sys_lookup_dcookie +253 common lookup_dcookie sys_ni_syscall 254 common epoll_create sys_epoll_create 255 common epoll_ctl sys_epoll_ctl 256 common epoll_wait sys_epoll_wait diff --git a/arch/sparc/kernel/syscalls/syscall.tbl b/arch/sparc/kernel/syscalls/syscall.tbl index 4ed06c71c43f..3f72970cf983 100644 --- a/arch/sparc/kernel/syscalls/syscall.tbl +++ b/arch/sparc/kernel/syscalls/syscall.tbl @@ -249,7 +249,7 @@ 205 common readahead sys_readahead compat_sys_readahead 206 common socketcall sys_socketcall sys32_socketcall 207 common syslog sys_syslog -208 common lookup_dcookie sys_lookup_dcookie compat_sys_lookup_dcookie +208 common lookup_dcookie sys_ni_syscall 209 common fadvise64 sys_fadvise64 compat_sys_fadvise64 210 common fadvise64_64 sys_fadvise64_64 compat_sys_fadvise64_64 211 common tgkill sys_tgkill diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl index 2d0b1bd866ea..6d0286bbbe27 100644 --- a/arch/x86/entry/syscalls/syscall_32.tbl +++ b/arch/x86/entry/syscalls/syscall_32.tbl @@ -264,7 +264,7 @@ 250 i386 fadvise64 sys_ia32_fadvise64 # 251 is available for reuse (was briefly sys_set_zone_reclaim) 252 i386 exit_group sys_exit_group -253 i386 lookup_dcookie sys_lookup_dcookie compat_sys_lookup_dcookie +253 i386 lookup_dcookie 254 i386 epoll_create sys_epoll_create 255 i386 epoll_ctl sys_epoll_ctl 256 i386 epoll_wait sys_epoll_wait diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl index 1d6eee30eceb..2a62eaf30d69 100644 --- a/arch/x86/entry/syscalls/syscall_64.tbl +++ b/arch/x86/entry/syscalls/syscall_64.tbl @@ -220,7 +220,7 @@ 209 64 io_submit sys_io_submit 210 common io_cancel sys_io_cancel 211 64 get_thread_area -212 common lookup_dcookie sys_lookup_dcookie +212 common lookup_dcookie 213 common epoll_create sys_epoll_create 214 64 epoll_ctl_old 215 64 epoll_wait_old diff --git a/arch/xtensa/kernel/syscalls/syscall.tbl b/arch/xtensa/kernel/syscalls/syscall.tbl index fc1a4f3c81d9..351521b2e841 100644 --- a/arch/xtensa/kernel/syscalls/syscall.tbl +++ b/arch/xtensa/kernel/syscalls/syscall.tbl @@ -273,7 +273,7 @@ 252 common timer_getoverrun sys_timer_getoverrun # System 253 common reserved253 sys_ni_syscall -254 common lookup_dcookie sys_lookup_dcookie +254 common lookup_dcookie sys_ni_syscall 255 common available255 sys_ni_syscall 256 common add_key sys_add_key 257 common request_key sys_request_key diff --git a/include/linux/compat.h b/include/linux/compat.h index 1cfa4f0f490a..233f61ec8afc 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -581,7 +581,6 @@ asmlinkage long compat_sys_io_pgetevents_time64(compat_aio_context_t ctx_id, struct io_event __user *events, struct __kernel_timespec __user *timeout, const struct __compat_aio_sigset __user *usig); -asmlinkage long compat_sys_lookup_dcookie(u32, u32, char __user *, compat_size_t); asmlinkage long compat_sys_epoll_pwait(int epfd, struct epoll_event __user *events, int maxevents, int timeout, diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 22bc6bc147f8..a031613bf966 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -355,7 +355,6 @@ asmlinkage long sys_lremovexattr(const char __user *path, const char __user *name); asmlinkage long sys_fremovexattr(int fd, const char __user *name); asmlinkage long sys_getcwd(char __user *buf, unsigned long size); -asmlinkage long sys_lookup_dcookie(u64 cookie64, char __user *buf, size_t len); asmlinkage long sys_eventfd2(unsigned int count, int flags); asmlinkage long sys_epoll_create1(int flags); asmlinkage long sys_epoll_ctl(int epfd, int op, int fd, diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h index abe087c53b4b..76d946445391 100644 --- a/include/uapi/asm-generic/unistd.h +++ b/include/uapi/asm-generic/unistd.h @@ -71,7 +71,7 @@ __SYSCALL(__NR_fremovexattr, sys_fremovexattr) #define __NR_getcwd 17 __SYSCALL(__NR_getcwd, sys_getcwd) #define __NR_lookup_dcookie 18 -__SC_COMP(__NR_lookup_dcookie, sys_lookup_dcookie, compat_sys_lookup_dcookie) +__SYSCALL(__NR_lookup_dcookie, sys_ni_syscall) #define __NR_eventfd2 19 __SYSCALL(__NR_eventfd2, sys_eventfd2) #define __NR_epoll_create1 20 diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c index e137c1385c56..d6eaaaf9cf77 100644 --- a/kernel/sys_ni.c +++ b/kernel/sys_ni.c @@ -51,8 +51,6 @@ COND_SYSCALL_COMPAT(io_pgetevents); COND_SYSCALL(io_uring_setup); COND_SYSCALL(io_uring_enter); COND_SYSCALL(io_uring_register); -COND_SYSCALL(lookup_dcookie); -COND_SYSCALL_COMPAT(lookup_dcookie); COND_SYSCALL(eventfd2); COND_SYSCALL(epoll_create1); COND_SYSCALL(epoll_ctl); diff --git a/tools/include/uapi/asm-generic/unistd.h b/tools/include/uapi/asm-generic/unistd.h index fd6c1cb585db..7ea3875137e9 100644 --- a/tools/include/uapi/asm-generic/unistd.h +++ b/tools/include/uapi/asm-generic/unistd.h @@ -71,7 +71,7 @@ __SYSCALL(__NR_fremovexattr, sys_fremovexattr) #define __NR_getcwd 17 __SYSCALL(__NR_getcwd, sys_getcwd) #define __NR_lookup_dcookie 18 -__SC_COMP(__NR_lookup_dcookie, sys_lookup_dcookie, compat_sys_lookup_dcookie) +__SYSCALL(__NR_lookup_dcookie, sys_ni_syscall) #define __NR_eventfd2 19 __SYSCALL(__NR_eventfd2, sys_eventfd2) #define __NR_epoll_create1 20 diff --git a/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl b/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl index cfda2511badf..478fe63601fc 100644 --- a/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl +++ b/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl @@ -214,7 +214,7 @@ 203 n64 io_submit sys_io_submit 204 n64 io_cancel sys_io_cancel 205 n64 exit_group sys_exit_group -206 n64 lookup_dcookie sys_lookup_dcookie +206 n64 lookup_dcookie sys_ni_syscall 207 n64 epoll_create sys_epoll_create 208 n64 epoll_ctl sys_epoll_ctl 209 n64 epoll_wait sys_epoll_wait diff --git a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl index 8c0b08b7a80e..1b7777e5f9ff 100644 --- a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl @@ -294,7 +294,7 @@ 233 32 fadvise64 sys_ppc32_fadvise64 compat_sys_ppc32_fadvise64 233 64 fadvise64 sys_fadvise64 234 nospu exit_group sys_exit_group -235 nospu lookup_dcookie sys_lookup_dcookie compat_sys_lookup_dcookie +235 nospu lookup_dcookie sys_ni_syscall 236 common epoll_create sys_epoll_create 237 common epoll_ctl sys_epoll_ctl 238 common epoll_wait sys_epoll_wait diff --git a/tools/perf/arch/s390/entry/syscalls/syscall.tbl b/tools/perf/arch/s390/entry/syscalls/syscall.tbl index a6935af2235c..11782be77f57 100644 --- a/tools/perf/arch/s390/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/s390/entry/syscalls/syscall.tbl @@ -100,7 +100,7 @@ 106 common stat sys_newstat compat_sys_newstat 107 common lstat sys_newlstat compat_sys_newlstat 108 common fstat sys_newfstat compat_sys_newfstat -110 common lookup_dcookie sys_lookup_dcookie compat_sys_lookup_dcookie +110 common lookup_dcookie - - 111 common vhangup sys_vhangup sys_vhangup 112 common idle - - 114 common wait4 sys_wait4 compat_sys_wait4 diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl index 227538b0ce80..27f78821453b 100644 --- a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl +++ b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl @@ -220,7 +220,7 @@ 209 64 io_submit sys_io_submit 210 common io_cancel sys_io_cancel 211 64 get_thread_area -212 common lookup_dcookie sys_lookup_dcookie +212 common lookup_dcookie 213 common epoll_create sys_epoll_create 214 64 epoll_ctl_old 215 64 epoll_wait_old -- cgit v1.2.3 From 26dd68d293fd1c5ac966fb5dd5f6d89de322a541 Mon Sep 17 00:00:00 2001 From: Przemek Kitszel Date: Tue, 12 Sep 2023 07:59:31 -0400 Subject: overflow: add DEFINE_FLEX() for on-stack allocs Add DEFINE_FLEX() macro for on-stack allocations of structs with flexible array member. Expose __struct_size() macro outside of fortify-string.h, as it could be used to read size of structs allocated by DEFINE_FLEX(). Move __member_size() alongside it. -Kees Using underlying array for on-stack storage lets us to declare known-at-compile-time structures without kzalloc(). Actual usage for ice driver is in following patches of the series. Missing __has_builtin() workaround is moved up to serve also assembly compilation with m68k-linux-gcc, see [1]. Error was (note the .S file extension): In file included from ../include/linux/linkage.h:5, from ../arch/m68k/fpsp040/skeleton.S:40: ../include/linux/compiler_types.h:331:5: warning: "__has_builtin" is not defined, evaluates to 0 [-Wundef] 331 | #if __has_builtin(__builtin_dynamic_object_size) | ^~~~~~~~~~~~~ ../include/linux/compiler_types.h:331:18: error: missing binary operator before token "(" 331 | #if __has_builtin(__builtin_dynamic_object_size) | ^ [1] https://lore.kernel.org/netdev/202308112122.OuF0YZqL-lkp@intel.com/ Co-developed-by: Kees Cook Signed-off-by: Kees Cook Signed-off-by: Przemek Kitszel Link: https://lore.kernel.org/r/20230912115937.1645707-2-przemyslaw.kitszel@intel.com Signed-off-by: Jakub Kicinski --- include/linux/compiler_types.h | 32 +++++++++++++++++++++----------- include/linux/fortify-string.h | 4 ---- include/linux/overflow.h | 35 +++++++++++++++++++++++++++++++++++ 3 files changed, 56 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index c523c6683789..6f1ca49306d2 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -2,6 +2,15 @@ #ifndef __LINUX_COMPILER_TYPES_H #define __LINUX_COMPILER_TYPES_H +/* + * __has_builtin is supported on gcc >= 10, clang >= 3 and icc >= 21. + * In the meantime, to support gcc < 10, we implement __has_builtin + * by hand. + */ +#ifndef __has_builtin +#define __has_builtin(x) (0) +#endif + #ifndef __ASSEMBLY__ /* @@ -134,17 +143,6 @@ static inline void __chk_io_ptr(const volatile void __iomem *ptr) { } # define __preserve_most #endif -/* Builtins */ - -/* - * __has_builtin is supported on gcc >= 10, clang >= 3 and icc >= 21. - * In the meantime, to support gcc < 10, we implement __has_builtin - * by hand. - */ -#ifndef __has_builtin -#define __has_builtin(x) (0) -#endif - /* Compiler specific macros. */ #ifdef __clang__ #include @@ -352,6 +350,18 @@ struct ftrace_likely_data { # define __realloc_size(x, ...) #endif +/* + * When the size of an allocated object is needed, use the best available + * mechanism to find it. (For cases where sizeof() cannot be used.) + */ +#if __has_builtin(__builtin_dynamic_object_size) +#define __struct_size(p) __builtin_dynamic_object_size(p, 0) +#define __member_size(p) __builtin_dynamic_object_size(p, 1) +#else +#define __struct_size(p) __builtin_object_size(p, 0) +#define __member_size(p) __builtin_object_size(p, 1) +#endif + #ifndef asm_volatile_goto #define asm_volatile_goto(x...) asm goto(x) #endif diff --git a/include/linux/fortify-string.h b/include/linux/fortify-string.h index da51a83b2829..1e7711185ec6 100644 --- a/include/linux/fortify-string.h +++ b/include/linux/fortify-string.h @@ -93,13 +93,9 @@ extern char *__underlying_strncpy(char *p, const char *q, __kernel_size_t size) #if __has_builtin(__builtin_dynamic_object_size) #define POS __pass_dynamic_object_size(1) #define POS0 __pass_dynamic_object_size(0) -#define __struct_size(p) __builtin_dynamic_object_size(p, 0) -#define __member_size(p) __builtin_dynamic_object_size(p, 1) #else #define POS __pass_object_size(1) #define POS0 __pass_object_size(0) -#define __struct_size(p) __builtin_object_size(p, 0) -#define __member_size(p) __builtin_object_size(p, 1) #endif #define __compiletime_lessthan(bounds, length) ( \ diff --git a/include/linux/overflow.h b/include/linux/overflow.h index f9b60313eaea..7b5cf4a5cd19 100644 --- a/include/linux/overflow.h +++ b/include/linux/overflow.h @@ -309,4 +309,39 @@ static inline size_t __must_check size_sub(size_t minuend, size_t subtrahend) #define struct_size_t(type, member, count) \ struct_size((type *)NULL, member, count) +/** + * _DEFINE_FLEX() - helper macro for DEFINE_FLEX() family. + * Enables caller macro to pass (different) initializer. + * + * @type: structure type name, including "struct" keyword. + * @name: Name for a variable to define. + * @member: Name of the array member. + * @count: Number of elements in the array; must be compile-time const. + * @initializer: initializer expression (could be empty for no init). + */ +#define _DEFINE_FLEX(type, name, member, count, initializer) \ + _Static_assert(__builtin_constant_p(count), \ + "onstack flex array members require compile-time const count"); \ + union { \ + u8 bytes[struct_size_t(type, member, count)]; \ + type obj; \ + } name##_u initializer; \ + type *name = (type *)&name##_u + +/** + * DEFINE_FLEX() - Define an on-stack instance of structure with a trailing + * flexible array member. + * + * @type: structure type name, including "struct" keyword. + * @name: Name for a variable to define. + * @member: Name of the array member. + * @count: Number of elements in the array; must be compile-time const. + * + * Define a zeroed, on-stack, instance of @type structure with a trailing + * flexible array member. + * Use __struct_size(@name) to get compile-time size of it afterwards. + */ +#define DEFINE_FLEX(type, name, member, count) \ + _DEFINE_FLEX(type, name, member, count, = {}) + #endif /* __LINUX_OVERFLOW_H */ -- cgit v1.2.3 From eeb6d1d6f4ec0e304608f72c3ead584bbb155714 Mon Sep 17 00:00:00 2001 From: GuoHua Cheng Date: Wed, 27 Sep 2023 13:56:08 +0800 Subject: PNP: Clean up coding style in pnp.h Address the following checkpatch complaints: ERROR: "foo * bar" should be "foo *bar" ERROR: space required after that ';' (ctx:VxV) Signed-off-by: GuoHua Cheng [ rjw: Subject and changelog edits ] Signed-off-by: Rafael J. Wysocki --- include/linux/pnp.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pnp.h b/include/linux/pnp.h index c2a7cfbca713..267fb8a4fb6e 100644 --- a/include/linux/pnp.h +++ b/include/linux/pnp.h @@ -291,7 +291,7 @@ static inline void pnp_set_drvdata(struct pnp_dev *pdev, void *data) struct pnp_fixup { char id[7]; - void (*quirk_function) (struct pnp_dev * dev); /* fixup function */ + void (*quirk_function) (struct pnp_dev *dev); /* fixup function */ }; /* config parameters */ @@ -419,8 +419,8 @@ struct pnp_protocol { /* protocol specific suspend/resume */ bool (*can_wakeup) (struct pnp_dev *dev); - int (*suspend) (struct pnp_dev * dev, pm_message_t state); - int (*resume) (struct pnp_dev * dev); + int (*suspend) (struct pnp_dev *dev, pm_message_t state); + int (*resume) (struct pnp_dev *dev); /* used by pnp layer only (look but don't touch) */ unsigned char number; /* protocol number */ @@ -492,7 +492,7 @@ static inline int pnp_start_dev(struct pnp_dev *dev) { return -ENODEV; } static inline int pnp_stop_dev(struct pnp_dev *dev) { return -ENODEV; } static inline int pnp_activate_dev(struct pnp_dev *dev) { return -ENODEV; } static inline int pnp_disable_dev(struct pnp_dev *dev) { return -ENODEV; } -static inline int pnp_range_reserved(resource_size_t start, resource_size_t end) { return 0;} +static inline int pnp_range_reserved(resource_size_t start, resource_size_t end) { return 0; } /* protocol helpers */ static inline int pnp_is_active(struct pnp_dev *dev) { return 0; } -- cgit v1.2.3 From 473267a4911f2469722c74ca58087d951072f72a Mon Sep 17 00:00:00 2001 From: Patrick Rohr Date: Mon, 25 Sep 2023 14:47:11 -0700 Subject: net: add sysctl to disable rfc4862 5.5.3e lifetime handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This change adds a sysctl to opt-out of RFC4862 section 5.5.3e's valid lifetime derivation mechanism. RFC4862 section 5.5.3e prescribes that the valid lifetime in a Router Advertisement PIO shall be ignored if it less than 2 hours and to reset the lifetime of the corresponding address to 2 hours. An in-progress 6man draft (see draft-ietf-6man-slaac-renum-07 section 4.2) is currently looking to remove this mechanism. While this draft has not been moving particularly quickly for other reasons, there is widespread consensus on section 4.2 which updates RFC4862 section 5.5.3e. Cc: Maciej Żenczykowski Cc: Lorenzo Colitti Cc: Jen Linkova Signed-off-by: Patrick Rohr Reviewed-by: Jiri Pirko Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20230925214711.959704-1-prohr@google.com Signed-off-by: Jakub Kicinski --- Documentation/networking/ip-sysctl.rst | 11 ++++++++++ include/linux/ipv6.h | 1 + net/ipv6/addrconf.c | 38 ++++++++++++++++++++++------------ 3 files changed, 37 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst index 5bfa1837968c..f7dfde3b09a9 100644 --- a/Documentation/networking/ip-sysctl.rst +++ b/Documentation/networking/ip-sysctl.rst @@ -2311,6 +2311,17 @@ accept_ra_pinfo - BOOLEAN - enabled if accept_ra is enabled. - disabled if accept_ra is disabled. +ra_honor_pio_life - BOOLEAN + Whether to use RFC4862 Section 5.5.3e to determine the valid + lifetime of an address matching a prefix sent in a Router + Advertisement Prefix Information Option. + + - If enabled, the PIO valid lifetime will always be honored. + - If disabled, RFC4862 section 5.5.3e is used to determine + the valid lifetime of the address. + + Default: 0 (disabled) + accept_ra_rt_info_min_plen - INTEGER Minimum prefix length of Route Information in RA. diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index e400ff757f13..5e605e384aac 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -82,6 +82,7 @@ struct ipv6_devconf { __u32 ioam6_id_wide; __u8 ioam6_enabled; __u8 ndisc_evict_nocarrier; + __u8 ra_honor_pio_life; struct ctl_table_header *sysctl_header; }; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 0b6ee962c84e..c2d471ad7922 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -236,6 +236,7 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = { .ioam6_id = IOAM6_DEFAULT_IF_ID, .ioam6_id_wide = IOAM6_DEFAULT_IF_ID_WIDE, .ndisc_evict_nocarrier = 1, + .ra_honor_pio_life = 0, }; static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { @@ -297,6 +298,7 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { .ioam6_id = IOAM6_DEFAULT_IF_ID, .ioam6_id_wide = IOAM6_DEFAULT_IF_ID_WIDE, .ndisc_evict_nocarrier = 1, + .ra_honor_pio_life = 0, }; /* Check if link is ready: is it up and is a valid qdisc available */ @@ -2657,22 +2659,23 @@ int addrconf_prefix_rcv_add_addr(struct net *net, struct net_device *dev, stored_lft = ifp->valid_lft - (now - ifp->tstamp) / HZ; else stored_lft = 0; - if (!create && stored_lft) { + + /* RFC4862 Section 5.5.3e: + * "Note that the preferred lifetime of the + * corresponding address is always reset to + * the Preferred Lifetime in the received + * Prefix Information option, regardless of + * whether the valid lifetime is also reset or + * ignored." + * + * So we should always update prefered_lft here. + */ + update_lft = !create && stored_lft; + + if (update_lft && !in6_dev->cnf.ra_honor_pio_life) { const u32 minimum_lft = min_t(u32, stored_lft, MIN_VALID_LIFETIME); valid_lft = max(valid_lft, minimum_lft); - - /* RFC4862 Section 5.5.3e: - * "Note that the preferred lifetime of the - * corresponding address is always reset to - * the Preferred Lifetime in the received - * Prefix Information option, regardless of - * whether the valid lifetime is also reset or - * ignored." - * - * So we should always update prefered_lft here. - */ - update_lft = 1; } if (update_lft) { @@ -6846,6 +6849,15 @@ static const struct ctl_table addrconf_sysctl[] = { .mode = 0644, .proc_handler = proc_dointvec, }, + { + .procname = "ra_honor_pio_life", + .data = &ipv6_devconf.ra_honor_pio_life, + .maxlen = sizeof(u8), + .mode = 0644, + .proc_handler = proc_dou8vec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }, #ifdef CONFIG_IPV6_ROUTER_PREF { .procname = "accept_ra_rtr_pref", -- cgit v1.2.3 From 2632bb84d1d53cfd6cf65261064273ded4f759d5 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 28 Sep 2023 20:24:34 -0700 Subject: mm: Remove unused vm_brk() With fs/binfmt_elf.c fully refactored to use the new elf_load() helper, there are no more users of vm_brk(), so remove it. Cc: Andrew Morton Cc: linux-mm@kvack.org Suggested-by: Eric Biederman Tested-by: Pedro Falcato Signed-off-by: Sebastian Ott Link: https://lore.kernel.org/r/20230929032435.2391507-6-keescook@chromium.org Signed-off-by: Kees Cook --- include/linux/mm.h | 3 +-- mm/mmap.c | 6 ------ mm/nommu.c | 5 ----- 3 files changed, 1 insertion(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index bf5d0b1b16f4..216dd0c6dcf8 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -3308,8 +3308,7 @@ static inline void mm_populate(unsigned long addr, unsigned long len) static inline void mm_populate(unsigned long addr, unsigned long len) {} #endif -/* These take the mm semaphore themselves */ -extern int __must_check vm_brk(unsigned long, unsigned long); +/* This takes the mm semaphore itself */ extern int __must_check vm_brk_flags(unsigned long, unsigned long, unsigned long); extern int vm_munmap(unsigned long, size_t); extern unsigned long __must_check vm_mmap(struct file *, unsigned long, diff --git a/mm/mmap.c b/mm/mmap.c index b56a7f0c9f85..34d2337ace59 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -3174,12 +3174,6 @@ limits_failed: } EXPORT_SYMBOL(vm_brk_flags); -int vm_brk(unsigned long addr, unsigned long len) -{ - return vm_brk_flags(addr, len, 0); -} -EXPORT_SYMBOL(vm_brk); - /* Release all mmaps. */ void exit_mmap(struct mm_struct *mm) { diff --git a/mm/nommu.c b/mm/nommu.c index 7f9e9e5a0e12..23c43c208f2b 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -1531,11 +1531,6 @@ void exit_mmap(struct mm_struct *mm) mmap_write_unlock(mm); } -int vm_brk(unsigned long addr, unsigned long len) -{ - return -ENOMEM; -} - /* * expand (or shrink) an existing mapping, potentially moving it at the same * time (controlled by the MREMAP_MAYMOVE flag and available VM space) -- cgit v1.2.3 From 2f3dd39e2b492bec366487a2c9bcbdbd7792f77c Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Mon, 2 Oct 2023 17:34:28 -0700 Subject: platform/chrome: cros_ec_proto: Mark outdata as const The 'outdata' is copied to the data buffer in cros_ec_cmd() before being sent over to the EC. Mark the argument as const so that callers can pass const pointers to this function and so that callers know the data won't be modified. Cc: Prashant Malani Signed-off-by: Stephen Boyd Acked-by: Prashant Malani Link: https://lore.kernel.org/r/20231003003429.1378109-5-swboyd@chromium.org Signed-off-by: Tzung-Bi Shih --- drivers/platform/chrome/cros_ec_proto.c | 2 +- include/linux/platform_data/cros_ec_proto.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/drivers/platform/chrome/cros_ec_proto.c b/drivers/platform/chrome/cros_ec_proto.c index 475a6dd72db6..945b1b15a04c 100644 --- a/drivers/platform/chrome/cros_ec_proto.c +++ b/drivers/platform/chrome/cros_ec_proto.c @@ -1004,7 +1004,7 @@ EXPORT_SYMBOL_GPL(cros_ec_get_sensor_count); int cros_ec_cmd(struct cros_ec_device *ec_dev, unsigned int version, int command, - void *outdata, + const void *outdata, size_t outsize, void *indata, size_t insize) diff --git a/include/linux/platform_data/cros_ec_proto.h b/include/linux/platform_data/cros_ec_proto.h index 4f9f756bc17c..8865e350c12a 100644 --- a/include/linux/platform_data/cros_ec_proto.h +++ b/include/linux/platform_data/cros_ec_proto.h @@ -258,7 +258,7 @@ bool cros_ec_check_features(struct cros_ec_dev *ec, int feature); int cros_ec_get_sensor_count(struct cros_ec_dev *ec); -int cros_ec_cmd(struct cros_ec_device *ec_dev, unsigned int version, int command, void *outdata, +int cros_ec_cmd(struct cros_ec_device *ec_dev, unsigned int version, int command, const void *outdata, size_t outsize, void *indata, size_t insize); /** -- cgit v1.2.3 From 8874e414fe78718d0f2861fe511cecbd1cd73f4d Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Tue, 3 Oct 2023 11:49:14 -0700 Subject: platform/x86/intel/tpmi: Add defines to get version information MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add defines to get major and minor version from a TPMI version field value. This will avoid code duplication to convert in every feature driver. Also add define for invalid version field. Signed-off-by: Srinivas Pandruvada Link: https://lore.kernel.org/r/20231003184916.1860084-2-srinivas.pandruvada@linux.intel.com Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- include/linux/intel_tpmi.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/intel_tpmi.h b/include/linux/intel_tpmi.h index 04d937ad4dc4..ee07393445f9 100644 --- a/include/linux/intel_tpmi.h +++ b/include/linux/intel_tpmi.h @@ -6,6 +6,12 @@ #ifndef _INTEL_TPMI_H_ #define _INTEL_TPMI_H_ +#include + +#define TPMI_VERSION_INVALID 0xff +#define TPMI_MINOR_VERSION(val) FIELD_GET(GENMASK(4, 0), val) +#define TPMI_MAJOR_VERSION(val) FIELD_GET(GENMASK(7, 5), val) + /** * struct intel_tpmi_plat_info - Platform information for a TPMI device instance * @package_id: CPU Package id -- cgit v1.2.3 From 968118fcf0546ef74cf306bf8f8c1e06efff10e3 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Tue, 12 Sep 2023 10:44:52 +0200 Subject: OMAP/gpio: drop MPUIO static base The OMAP GPIO driver hardcodes the MPIO chip base, but there is no point: we have already moved all consumers over to using descriptor look-ups. Drop the MPUIO GPIO base and use dynamic assignment. Root out the unused instances of the OMAP_MPUIO() macro and delete the unused OMAP_GPIO_IS_MPUIO() macro. Signed-off-by: Linus Walleij Reviewed-by: Tony Lindgren Tested-by: Janusz Krzysztofik Signed-off-by: Bartosz Golaszewski --- arch/arm/mach-omap1/board-palmte.c | 5 ----- drivers/gpio/gpio-omap.c | 3 +-- include/linux/platform_data/gpio-omap.h | 3 --- 3 files changed, 1 insertion(+), 10 deletions(-) (limited to 'include/linux') diff --git a/arch/arm/mach-omap1/board-palmte.c b/arch/arm/mach-omap1/board-palmte.c index 7e061d671fde..c917cb2c6e17 100644 --- a/arch/arm/mach-omap1/board-palmte.c +++ b/arch/arm/mach-omap1/board-palmte.c @@ -51,11 +51,6 @@ #define PALMTE_HDQ_GPIO 11 #define PALMTE_HEADPHONES_GPIO 14 #define PALMTE_SPEAKER_GPIO 15 -#define PALMTE_DC_GPIO OMAP_MPUIO(2) -#define PALMTE_MMC_SWITCH_GPIO OMAP_MPUIO(4) -#define PALMTE_MMC1_GPIO OMAP_MPUIO(6) -#define PALMTE_MMC2_GPIO OMAP_MPUIO(7) -#define PALMTE_MMC3_GPIO OMAP_MPUIO(11) static const unsigned int palmte_keymap[] = { KEY(0, 0, KEY_F1), /* Calendar */ diff --git a/drivers/gpio/gpio-omap.c b/drivers/gpio/gpio-omap.c index 8889755e2d03..76d5d87e9681 100644 --- a/drivers/gpio/gpio-omap.c +++ b/drivers/gpio/gpio-omap.c @@ -1048,15 +1048,14 @@ static int omap_gpio_chip_init(struct gpio_bank *bank, struct device *pm_dev) bank->chip.label = "mpuio"; if (bank->regs->wkup_en) bank->chip.parent = &omap_mpuio_device.dev; - bank->chip.base = OMAP_MPUIO(0); } else { label = devm_kasprintf(bank->chip.parent, GFP_KERNEL, "gpio-%d-%d", gpio, gpio + bank->width - 1); if (!label) return -ENOMEM; bank->chip.label = label; - bank->chip.base = -1; } + bank->chip.base = -1; bank->chip.ngpio = bank->width; irq = &bank->chip.irq; diff --git a/include/linux/platform_data/gpio-omap.h b/include/linux/platform_data/gpio-omap.h index f377817ce75c..cdd8cfb424f5 100644 --- a/include/linux/platform_data/gpio-omap.h +++ b/include/linux/platform_data/gpio-omap.h @@ -144,9 +144,6 @@ #define OMAP_MAX_GPIO_LINES 192 -#define OMAP_MPUIO(nr) (OMAP_MAX_GPIO_LINES + (nr)) -#define OMAP_GPIO_IS_MPUIO(nr) ((nr) >= OMAP_MAX_GPIO_LINES) - #ifndef __ASSEMBLER__ struct omap_gpio_reg_offs { u16 revision; -- cgit v1.2.3 From 36aa129f221c9070afd8dff03154ab49702a5b1b Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Wed, 27 Sep 2023 16:29:21 +0200 Subject: gpiolib: make gpio_device_get() and gpio_device_put() public In order to start migrating away from accessing struct gpio_chip by users other than their owners, let's first make the reference management functions for the opaque struct gpio_device public in the driver.h header. Signed-off-by: Bartosz Golaszewski Reviewed-by: Linus Walleij Reviewed-by: Andy Shevchenko --- drivers/gpio/gpiolib.c | 24 ++++++++++++++++++++++++ drivers/gpio/gpiolib.h | 10 ---------- include/linux/gpio/driver.h | 3 +++ 3 files changed, 27 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index 7c27a1efc1b0..c9d8f6b57771 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -1058,6 +1058,30 @@ static struct gpio_chip *find_chip_by_name(const char *name) return gpiochip_find((void *)name, gpiochip_match_name); } +/** + * gpio_device_get() - Increase the reference count of this GPIO device + * @gdev: GPIO device to increase the refcount for + * + * Returns: + * Pointer to @gdev. + */ +struct gpio_device *gpio_device_get(struct gpio_device *gdev) +{ + return to_gpio_device(get_device(&gdev->dev)); +} +EXPORT_SYMBOL_GPL(gpio_device_get); + +/** + * gpio_device_put() - Decrease the reference count of this GPIO device and + * possibly free all resources associated with it. + * @gdev: GPIO device to decrease the reference count for + */ +void gpio_device_put(struct gpio_device *gdev) +{ + put_device(&gdev->dev); +} +EXPORT_SYMBOL_GPL(gpio_device_put); + #ifdef CONFIG_GPIOLIB_IRQCHIP /* diff --git a/drivers/gpio/gpiolib.h b/drivers/gpio/gpiolib.h index 9bff5c2cf720..3ccacf3c1288 100644 --- a/drivers/gpio/gpiolib.h +++ b/drivers/gpio/gpiolib.h @@ -86,16 +86,6 @@ static inline struct gpio_device *to_gpio_device(struct device *dev) return container_of(dev, struct gpio_device, dev); } -static inline struct gpio_device *gpio_device_get(struct gpio_device *gdev) -{ - return to_gpio_device(get_device(&gdev->dev)); -} - -static inline void gpio_device_put(struct gpio_device *gdev) -{ - put_device(&gdev->dev); -} - /* gpio suffixes used for ACPI and device tree lookup */ static __maybe_unused const char * const gpio_suffixes[] = { "gpios", "gpio" }; diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index 8f0859ba7065..a2060dc3344b 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -606,6 +606,9 @@ int devm_gpiochip_add_data_with_key(struct device *dev, struct gpio_chip *gc, struct gpio_chip *gpiochip_find(void *data, int (*match)(struct gpio_chip *gc, void *data)); +struct gpio_device *gpio_device_get(struct gpio_device *gdev); +void gpio_device_put(struct gpio_device *gdev); + bool gpiochip_line_is_irq(struct gpio_chip *gc, unsigned int offset); int gpiochip_reqres_irq(struct gpio_chip *gc, unsigned int offset); void gpiochip_relres_irq(struct gpio_chip *gc, unsigned int offset); -- cgit v1.2.3 From 9e4555d1e54a18946d7ca363b9fc8ed1fe7dfde4 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Wed, 27 Sep 2023 16:29:22 +0200 Subject: gpiolib: add support for scope-based management to gpio_device As the few users that need to get the reference to the GPIO device often release it right after inspecting its properties, let's add support for the automatic reference release to struct gpio_device. Signed-off-by: Bartosz Golaszewski Reviewed-by: Linus Walleij Reviewed-by: Andy Shevchenko --- include/linux/gpio/driver.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index a2060dc3344b..1cedbc3d3200 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -3,6 +3,8 @@ #define __LINUX_GPIO_DRIVER_H #include +#include +#include #include #include #include @@ -609,6 +611,9 @@ struct gpio_chip *gpiochip_find(void *data, struct gpio_device *gpio_device_get(struct gpio_device *gdev); void gpio_device_put(struct gpio_device *gdev); +DEFINE_FREE(gpio_device_put, struct gpio_device *, + if (IS_ERR_OR_NULL(_T)) gpio_device_put(_T)); + bool gpiochip_line_is_irq(struct gpio_chip *gc, unsigned int offset); int gpiochip_reqres_irq(struct gpio_chip *gc, unsigned int offset); void gpiochip_relres_irq(struct gpio_chip *gc, unsigned int offset); -- cgit v1.2.3 From cfe102f63308c8c8e01199a682868a64b83f653e Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Wed, 27 Sep 2023 16:29:23 +0200 Subject: gpiolib: provide gpio_device_find() gpiochip_find() is wrong and its kernel doc is misleading as the function doesn't return a reference to the gpio_chip but just a raw pointer. The chip itself is not guaranteed to stay alive, in fact it can be deleted at any point. Also: other than GPIO drivers themselves, nobody else has any business accessing gpio_chip structs. Provide a new gpio_device_find() function that returns a real reference to the opaque gpio_device structure that is guaranteed to stay alive for as long as there are active users of it. Signed-off-by: Bartosz Golaszewski Reviewed-by: Linus Walleij --- drivers/gpio/gpiolib.c | 71 +++++++++++++++++++++++++++++++++------------ include/linux/gpio/driver.h | 3 ++ 2 files changed, 56 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index c9d8f6b57771..d5bdf9cebb29 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -1014,16 +1014,10 @@ void gpiochip_remove(struct gpio_chip *gc) } EXPORT_SYMBOL_GPL(gpiochip_remove); -/** - * gpiochip_find() - iterator for locating a specific gpio_chip - * @data: data to pass to match function - * @match: Callback function to check gpio_chip +/* + * FIXME: This will be removed soon. * - * Similar to bus_find_device. It returns a reference to a gpio_chip as - * determined by a user supplied @match callback. The callback should return - * 0 if the device doesn't match and non-zero if it does. If the callback is - * non-zero, this function will return to the caller and not iterate over any - * more gpio_chips. + * This function is depracated, don't use. */ struct gpio_chip *gpiochip_find(void *data, int (*match)(struct gpio_chip *gc, @@ -1031,21 +1025,62 @@ struct gpio_chip *gpiochip_find(void *data, { struct gpio_device *gdev; struct gpio_chip *gc = NULL; - unsigned long flags; - - spin_lock_irqsave(&gpio_lock, flags); - list_for_each_entry(gdev, &gpio_devices, list) - if (gdev->chip && match(gdev->chip, data)) { - gc = gdev->chip; - break; - } - spin_unlock_irqrestore(&gpio_lock, flags); + gdev = gpio_device_find(data, match); + if (gdev) { + gc = gdev->chip; + gpio_device_put(gdev); + } return gc; } EXPORT_SYMBOL_GPL(gpiochip_find); +/** + * gpio_device_find() - find a specific GPIO device + * @data: data to pass to match function + * @match: Callback function to check gpio_chip + * + * Returns: + * New reference to struct gpio_device. + * + * Similar to bus_find_device(). It returns a reference to a gpio_device as + * determined by a user supplied @match callback. The callback should return + * 0 if the device doesn't match and non-zero if it does. If the callback + * returns non-zero, this function will return to the caller and not iterate + * over any more gpio_devices. + * + * The callback takes the GPIO chip structure as argument. During the execution + * of the callback function the chip is protected from being freed. TODO: This + * actually has yet to be implemented. + * + * If the function returns non-NULL, the returned reference must be freed by + * the caller using gpio_device_put(). + */ +struct gpio_device *gpio_device_find(void *data, + int (*match)(struct gpio_chip *gc, + void *data)) +{ + struct gpio_device *gdev; + + /* + * Not yet but in the future the spinlock below will become a mutex. + * Annotate this function before anyone tries to use it in interrupt + * context like it happened with gpiochip_find(). + */ + might_sleep(); + + guard(spinlock_irqsave)(&gpio_lock); + + list_for_each_entry(gdev, &gpio_devices, list) { + if (gdev->chip && match(gdev->chip, data)) + return gpio_device_get(gdev); + } + + return NULL; +} +EXPORT_SYMBOL_GPL(gpio_device_find); + static int gpiochip_match_name(struct gpio_chip *gc, void *data) { const char *name = data; diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index 1cedbc3d3200..6ad1f1a8ef2e 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -608,6 +608,9 @@ int devm_gpiochip_add_data_with_key(struct device *dev, struct gpio_chip *gc, struct gpio_chip *gpiochip_find(void *data, int (*match)(struct gpio_chip *gc, void *data)); +struct gpio_device *gpio_device_find(void *data, + int (*match)(struct gpio_chip *gc, void *data)); + struct gpio_device *gpio_device_get(struct gpio_device *gdev); void gpio_device_put(struct gpio_device *gdev); -- cgit v1.2.3 From d62fcd9f1897d587f8f9db3f77ccae8797a44b5d Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Wed, 27 Sep 2023 16:29:24 +0200 Subject: gpiolib: provide gpio_device_find_by_label() By far the most common way of looking up GPIO devices is using their label. Provide a helpers for that to avoid every user implementing their own matching function. Signed-off-by: Bartosz Golaszewski Reviewed-by: Linus Walleij --- drivers/gpio/gpiolib.c | 21 +++++++++++++++++++++ include/linux/gpio/driver.h | 1 + 2 files changed, 22 insertions(+) (limited to 'include/linux') diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index d5bdf9cebb29..0a41f6ee8d33 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include @@ -1081,6 +1082,26 @@ struct gpio_device *gpio_device_find(void *data, } EXPORT_SYMBOL_GPL(gpio_device_find); +static int gpio_chip_match_by_label(struct gpio_chip *gc, void *label) +{ + return gc->label && !strcmp(gc->label, label); +} + +/** + * gpio_device_find_by_label() - wrapper around gpio_device_find() finding the + * GPIO device by its backing chip's label + * @label: Label to lookup + * + * Returns: + * Reference to the GPIO device or NULL. Reference must be released with + * gpio_device_put(). + */ +struct gpio_device *gpio_device_find_by_label(const char *label) +{ + return gpio_device_find((void *)label, gpio_chip_match_by_label); +} +EXPORT_SYMBOL_GPL(gpio_device_find_by_label); + static int gpiochip_match_name(struct gpio_chip *gc, void *data) { const char *name = data; diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index 6ad1f1a8ef2e..24996cba6465 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -610,6 +610,7 @@ struct gpio_chip *gpiochip_find(void *data, struct gpio_device *gpio_device_find(void *data, int (*match)(struct gpio_chip *gc, void *data)); +struct gpio_device *gpio_device_find_by_label(const char *label); struct gpio_device *gpio_device_get(struct gpio_device *gdev); void gpio_device_put(struct gpio_device *gdev); -- cgit v1.2.3 From 93548f8bbbbf5b62dbd37c8f61a037a06666787b Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Wed, 27 Sep 2023 16:29:25 +0200 Subject: gpiolib: provide gpio_device_get_desc() Getting the GPIO descriptor directly from the gpio_chip struct is dangerous as we don't take the reference to the underlying GPIO device. In order to start working towards removing gpiochip_get_desc(), let's provide a safer variant that works with an existing reference to struct gpio_device. Signed-off-by: Bartosz Golaszewski Reviewed-by: Linus Walleij --- drivers/gpio/gpiolib.c | 40 +++++++++++++++++++++++++++++++--------- include/linux/gpio/driver.h | 2 ++ 2 files changed, 33 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index 0a41f6ee8d33..f4fdf620ca74 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -147,27 +147,49 @@ struct gpio_desc *gpio_to_desc(unsigned gpio) } EXPORT_SYMBOL_GPL(gpio_to_desc); +/* This function is deprecated and will be removed soon, don't use. */ +struct gpio_desc *gpiochip_get_desc(struct gpio_chip *gc, + unsigned int hwnum) +{ + return gpio_device_get_desc(gc->gpiodev, hwnum); +} +EXPORT_SYMBOL_GPL(gpiochip_get_desc); + /** - * gpiochip_get_desc - get the GPIO descriptor corresponding to the given - * hardware number for this chip - * @gc: GPIO chip + * gpio_device_get_desc() - get the GPIO descriptor corresponding to the given + * hardware number for this GPIO device + * @gdev: GPIO device to get the descriptor from * @hwnum: hardware number of the GPIO for this chip * * Returns: - * A pointer to the GPIO descriptor or ``ERR_PTR(-EINVAL)`` if no GPIO exists - * in the given chip for the specified hardware number. + * A pointer to the GPIO descriptor or %EINVAL if no GPIO exists in the given + * chip for the specified hardware number or %ENODEV if the underlying chip + * already vanished. + * + * The reference count of struct gpio_device is *NOT* increased like when the + * GPIO is being requested for exclusive usage. It's up to the caller to make + * sure the GPIO device will stay alive together with the descriptor returned + * by this function. */ -struct gpio_desc *gpiochip_get_desc(struct gpio_chip *gc, - unsigned int hwnum) +struct gpio_desc * +gpio_device_get_desc(struct gpio_device *gdev, unsigned int hwnum) { - struct gpio_device *gdev = gc->gpiodev; + struct gpio_chip *gc; + + /* + * FIXME: This will be locked once we protect gdev->chip everywhere + * with SRCU. + */ + gc = gdev->chip; + if (!gc) + return ERR_PTR(-ENODEV); if (hwnum >= gdev->ngpio) return ERR_PTR(-EINVAL); return &gdev->descs[hwnum]; } -EXPORT_SYMBOL_GPL(gpiochip_get_desc); +EXPORT_SYMBOL_GPL(gpio_device_get_desc); /** * desc_to_gpio - convert a GPIO descriptor to the integer namespace diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index 24996cba6465..3fdf3f14bb13 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -770,6 +770,8 @@ struct gpio_desc *gpiochip_request_own_desc(struct gpio_chip *gc, void gpiochip_free_own_desc(struct gpio_desc *desc); struct gpio_desc *gpiochip_get_desc(struct gpio_chip *gc, unsigned int hwnum); +struct gpio_desc * +gpio_device_get_desc(struct gpio_device *gdev, unsigned int hwnum); #ifdef CONFIG_GPIOLIB -- cgit v1.2.3 From 9b418780844c677669ab474f6f29940ef545c954 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Wed, 27 Sep 2023 16:29:26 +0200 Subject: gpiolib: reluctantly provide gpio_device_get_chip() The process of converting all unauthorized users of struct gpio_chip to using dedicated struct gpio_device function will be long so in the meantime we must provide a way of retrieving the pointer to struct gpio_chip from a GPIO device. Signed-off-by: Bartosz Golaszewski Reviewed-by: Linus Walleij --- drivers/gpio/gpiolib.c | 21 +++++++++++++++++++++ include/linux/gpio/driver.h | 2 ++ 2 files changed, 23 insertions(+) (limited to 'include/linux') diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index f4fdf620ca74..6bb8d4a0b1d4 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -220,6 +220,27 @@ struct gpio_chip *gpiod_to_chip(const struct gpio_desc *desc) } EXPORT_SYMBOL_GPL(gpiod_to_chip); +/** + * gpio_device_get_chip() - Get the gpio_chip implementation of this GPIO device + * @gdev: GPIO device + * + * Returns: + * Address of the GPIO chip backing this device. + * + * Until we can get rid of all non-driver users of struct gpio_chip, we must + * provide a way of retrieving the pointer to it from struct gpio_device. This + * is *NOT* safe as the GPIO API is considered to be hot-unpluggable and the + * chip can dissapear at any moment (unlike reference-counted struct + * gpio_device). + * + * Use at your own risk. + */ +struct gpio_chip *gpio_device_get_chip(struct gpio_device *gdev) +{ + return gdev->chip; +} +EXPORT_SYMBOL_GPL(gpio_device_get_chip); + /* dynamic allocation of GPIOs, e.g. on a hotplugged device */ static int gpiochip_find_base(int ngpio) { diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index 3fdf3f14bb13..f8ad7f40100c 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -773,6 +773,8 @@ struct gpio_desc *gpiochip_get_desc(struct gpio_chip *gc, unsigned int hwnum); struct gpio_desc * gpio_device_get_desc(struct gpio_device *gdev, unsigned int hwnum); +struct gpio_chip *gpio_device_get_chip(struct gpio_device *gdev); + #ifdef CONFIG_GPIOLIB /* lock/unlock as IRQ */ -- cgit v1.2.3 From 8e56b063c86569e51eed1c5681ce6361fa97fc7a Mon Sep 17 00:00:00 2001 From: Xin Long Date: Tue, 3 Oct 2023 13:17:53 -0400 Subject: netfilter: handle the connecting collision properly in nf_conntrack_proto_sctp In Scenario A and B below, as the delayed INIT_ACK always changes the peer vtag, SCTP ct with the incorrect vtag may cause packet loss. Scenario A: INIT_ACK is delayed until the peer receives its own INIT_ACK 192.168.1.2 > 192.168.1.1: [INIT] [init tag: 1328086772] 192.168.1.1 > 192.168.1.2: [INIT] [init tag: 1414468151] 192.168.1.2 > 192.168.1.1: [INIT ACK] [init tag: 1328086772] 192.168.1.1 > 192.168.1.2: [INIT ACK] [init tag: 1650211246] * 192.168.1.2 > 192.168.1.1: [COOKIE ECHO] 192.168.1.1 > 192.168.1.2: [COOKIE ECHO] 192.168.1.2 > 192.168.1.1: [COOKIE ACK] Scenario B: INIT_ACK is delayed until the peer completes its own handshake 192.168.1.2 > 192.168.1.1: sctp (1) [INIT] [init tag: 3922216408] 192.168.1.1 > 192.168.1.2: sctp (1) [INIT] [init tag: 144230885] 192.168.1.2 > 192.168.1.1: sctp (1) [INIT ACK] [init tag: 3922216408] 192.168.1.1 > 192.168.1.2: sctp (1) [COOKIE ECHO] 192.168.1.2 > 192.168.1.1: sctp (1) [COOKIE ACK] 192.168.1.1 > 192.168.1.2: sctp (1) [INIT ACK] [init tag: 3914796021] * This patch fixes it as below: In SCTP_CID_INIT processing: - clear ct->proto.sctp.init[!dir] if ct->proto.sctp.init[dir] && ct->proto.sctp.init[!dir]. (Scenario E) - set ct->proto.sctp.init[dir]. In SCTP_CID_INIT_ACK processing: - drop it if !ct->proto.sctp.init[!dir] && ct->proto.sctp.vtag[!dir] && ct->proto.sctp.vtag[!dir] != ih->init_tag. (Scenario B, Scenario C) - drop it if ct->proto.sctp.init[dir] && ct->proto.sctp.init[!dir] && ct->proto.sctp.vtag[!dir] != ih->init_tag. (Scenario A) In SCTP_CID_COOKIE_ACK processing: - clear ct->proto.sctp.init[dir] and ct->proto.sctp.init[!dir]. (Scenario D) Also, it's important to allow the ct state to move forward with cookie_echo and cookie_ack from the opposite dir for the collision scenarios. There are also other Scenarios where it should allow the packet through, addressed by the processing above: Scenario C: new CT is created by INIT_ACK. Scenario D: start INIT on the existing ESTABLISHED ct. Scenario E: start INIT after the old collision on the existing ESTABLISHED ct. 192.168.1.2 > 192.168.1.1: sctp (1) [INIT] [init tag: 3922216408] 192.168.1.1 > 192.168.1.2: sctp (1) [INIT] [init tag: 144230885] (both side are stopped, then start new connection again in hours) 192.168.1.2 > 192.168.1.1: sctp (1) [INIT] [init tag: 242308742] Fixes: 9fb9cbb1082d ("[NETFILTER]: Add nf_conntrack subsystem.") Signed-off-by: Xin Long Signed-off-by: Florian Westphal --- include/linux/netfilter/nf_conntrack_sctp.h | 1 + net/netfilter/nf_conntrack_proto_sctp.c | 43 ++++++++++++++++++++++------- 2 files changed, 34 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/nf_conntrack_sctp.h b/include/linux/netfilter/nf_conntrack_sctp.h index 625f491b95de..fb31312825ae 100644 --- a/include/linux/netfilter/nf_conntrack_sctp.h +++ b/include/linux/netfilter/nf_conntrack_sctp.h @@ -9,6 +9,7 @@ struct ip_ct_sctp { enum sctp_conntrack state; __be32 vtag[IP_CT_DIR_MAX]; + u8 init[IP_CT_DIR_MAX]; u8 last_dir; u8 flags; }; diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c index b6bcc8f2f46b..c6bd533983c1 100644 --- a/net/netfilter/nf_conntrack_proto_sctp.c +++ b/net/netfilter/nf_conntrack_proto_sctp.c @@ -112,7 +112,7 @@ static const u8 sctp_conntracks[2][11][SCTP_CONNTRACK_MAX] = { /* shutdown_ack */ {sSA, sCL, sCW, sCE, sES, sSA, sSA, sSA, sSA}, /* error */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCL},/* Can't have Stale cookie*/ /* cookie_echo */ {sCL, sCL, sCE, sCE, sES, sSS, sSR, sSA, sCL},/* 5.2.4 - Big TODO */ -/* cookie_ack */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCL},/* Can't come in orig dir */ +/* cookie_ack */ {sCL, sCL, sCW, sES, sES, sSS, sSR, sSA, sCL},/* Can't come in orig dir */ /* shutdown_comp*/ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sCL, sCL}, /* heartbeat */ {sHS, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS}, /* heartbeat_ack*/ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS}, @@ -126,7 +126,7 @@ static const u8 sctp_conntracks[2][11][SCTP_CONNTRACK_MAX] = { /* shutdown */ {sIV, sCL, sCW, sCE, sSR, sSS, sSR, sSA, sIV}, /* shutdown_ack */ {sIV, sCL, sCW, sCE, sES, sSA, sSA, sSA, sIV}, /* error */ {sIV, sCL, sCW, sCL, sES, sSS, sSR, sSA, sIV}, -/* cookie_echo */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sIV},/* Can't come in reply dir */ +/* cookie_echo */ {sIV, sCL, sCE, sCE, sES, sSS, sSR, sSA, sIV},/* Can't come in reply dir */ /* cookie_ack */ {sIV, sCL, sCW, sES, sES, sSS, sSR, sSA, sIV}, /* shutdown_comp*/ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sCL, sIV}, /* heartbeat */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS}, @@ -412,6 +412,9 @@ int nf_conntrack_sctp_packet(struct nf_conn *ct, /* (D) vtag must be same as init_vtag as found in INIT_ACK */ if (sh->vtag != ct->proto.sctp.vtag[dir]) goto out_unlock; + } else if (sch->type == SCTP_CID_COOKIE_ACK) { + ct->proto.sctp.init[dir] = 0; + ct->proto.sctp.init[!dir] = 0; } else if (sch->type == SCTP_CID_HEARTBEAT) { if (ct->proto.sctp.vtag[dir] == 0) { pr_debug("Setting %d vtag %x for dir %d\n", sch->type, sh->vtag, dir); @@ -461,16 +464,18 @@ int nf_conntrack_sctp_packet(struct nf_conn *ct, } /* If it is an INIT or an INIT ACK note down the vtag */ - if (sch->type == SCTP_CID_INIT || - sch->type == SCTP_CID_INIT_ACK) { - struct sctp_inithdr _inithdr, *ih; + if (sch->type == SCTP_CID_INIT) { + struct sctp_inithdr _ih, *ih; - ih = skb_header_pointer(skb, offset + sizeof(_sch), - sizeof(_inithdr), &_inithdr); - if (ih == NULL) + ih = skb_header_pointer(skb, offset + sizeof(_sch), sizeof(*ih), &_ih); + if (!ih) goto out_unlock; - pr_debug("Setting vtag %x for dir %d\n", - ih->init_tag, !dir); + + if (ct->proto.sctp.init[dir] && ct->proto.sctp.init[!dir]) + ct->proto.sctp.init[!dir] = 0; + ct->proto.sctp.init[dir] = 1; + + pr_debug("Setting vtag %x for dir %d\n", ih->init_tag, !dir); ct->proto.sctp.vtag[!dir] = ih->init_tag; /* don't renew timeout on init retransmit so @@ -481,6 +486,24 @@ int nf_conntrack_sctp_packet(struct nf_conn *ct, old_state == SCTP_CONNTRACK_CLOSED && nf_ct_is_confirmed(ct)) ignore = true; + } else if (sch->type == SCTP_CID_INIT_ACK) { + struct sctp_inithdr _ih, *ih; + __be32 vtag; + + ih = skb_header_pointer(skb, offset + sizeof(_sch), sizeof(*ih), &_ih); + if (!ih) + goto out_unlock; + + vtag = ct->proto.sctp.vtag[!dir]; + if (!ct->proto.sctp.init[!dir] && vtag && vtag != ih->init_tag) + goto out_unlock; + /* collision */ + if (ct->proto.sctp.init[dir] && ct->proto.sctp.init[!dir] && + vtag != ih->init_tag) + goto out_unlock; + + pr_debug("Setting vtag %x for dir %d\n", ih->init_tag, !dir); + ct->proto.sctp.vtag[!dir] = ih->init_tag; } ct->proto.sctp.state = new_state; -- cgit v1.2.3 From 09361abc346102c505657db4f3ae19ed70e1b703 Mon Sep 17 00:00:00 2001 From: Yue Haibing Date: Wed, 9 Aug 2023 16:00:12 +0800 Subject: dmaengine: Remove unused declaration dma_chan_cleanup() Commit f27c580c3628 ("dmaengine: remove 'bigref' infrastructure") removed the implementation but left declaration in place. Remove it. Signed-off-by: Yue Haibing Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20230809080012.22000-1-yuehaibing@huawei.com Signed-off-by: Vinod Koul --- include/linux/dmaengine.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index c3656e590213..3df70d6131c8 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -517,8 +517,6 @@ static inline const char *dma_chan_name(struct dma_chan *chan) return dev_name(&chan->dev->device); } -void dma_chan_cleanup(struct kref *kref); - /** * typedef dma_filter_fn - callback filter for dma_request_channel * @chan: channel to be reviewed -- cgit v1.2.3 From 4785aa8028536c2be656d22c74ec1995b97056f3 Mon Sep 17 00:00:00 2001 From: Oza Pawandeep Date: Tue, 3 Oct 2023 10:33:33 -0700 Subject: cpuidle, ACPI: Evaluate LPI arch_flags for broadcast timer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Arm® Functional Fixed Hardware Specification defines LPI states, which provide an architectural context loss flags field that can be used to describe the context that might be lost when an LPI state is entered. - Core context Lost - General purpose registers. - Floating point and SIMD registers. - System registers, include the System register based - generic timer for the core. - Debug register in the core power domain. - PMU registers in the core power domain. - Trace register in the core power domain. - Trace context loss - GICR - GICD Qualcomm's custom CPUs preserves the architectural state, including keeping the power domain for local timers active. when core is power gated, the local timers are sufficient to wake the core up without needing broadcast timer. The patch fixes the evaluation of cpuidle arch_flags, and moves only to broadcast timer if core context lost is defined in ACPI LPI. Fixes: a36a7fecfe60 ("ACPI / processor_idle: Add support for Low Power Idle(LPI) states") Reviewed-by: Sudeep Holla Acked-by: Rafael J. Wysocki Signed-off-by: Oza Pawandeep Link: https://lore.kernel.org/r/20231003173333.2865323-1-quic_poza@quicinc.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/acpi.h | 19 +++++++++++++++++++ drivers/acpi/processor_idle.c | 3 +-- include/linux/acpi.h | 9 +++++++++ 3 files changed, 29 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/arch/arm64/include/asm/acpi.h b/arch/arm64/include/asm/acpi.h index 4d537d56eb84..6792a1f83f2a 100644 --- a/arch/arm64/include/asm/acpi.h +++ b/arch/arm64/include/asm/acpi.h @@ -9,6 +9,7 @@ #ifndef _ASM_ACPI_H #define _ASM_ACPI_H +#include #include #include #include @@ -44,6 +45,24 @@ #define ACPI_MADT_GICC_TRBE (offsetof(struct acpi_madt_generic_interrupt, \ trbe_interrupt) + sizeof(u16)) +/* + * Arm® Functional Fixed Hardware Specification Version 1.2. + * Table 2: Arm Architecture context loss flags + */ +#define CPUIDLE_CORE_CTXT BIT(0) /* Core context Lost */ + +static inline unsigned int arch_get_idle_state_flags(u32 arch_flags) +{ + if (arch_flags & CPUIDLE_CORE_CTXT) + return CPUIDLE_FLAG_TIMER_STOP; + + return 0; +} +#define arch_get_idle_state_flags arch_get_idle_state_flags + +#define CPUIDLE_TRACE_CTXT BIT(1) /* Trace context loss */ +#define CPUIDLE_GICR_CTXT BIT(2) /* GICR */ +#define CPUIDLE_GICD_CTXT BIT(3) /* GICD */ /* Basic configuration for ACPI */ #ifdef CONFIG_ACPI diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c index dc615ef6550a..3a34a8c425fe 100644 --- a/drivers/acpi/processor_idle.c +++ b/drivers/acpi/processor_idle.c @@ -1217,8 +1217,7 @@ static int acpi_processor_setup_lpi_states(struct acpi_processor *pr) strscpy(state->desc, lpi->desc, CPUIDLE_DESC_LEN); state->exit_latency = lpi->wake_latency; state->target_residency = lpi->min_residency; - if (lpi->arch_flags) - state->flags |= CPUIDLE_FLAG_TIMER_STOP; + state->flags |= arch_get_idle_state_flags(lpi->arch_flags); if (i != 0 && lpi->entry_method == ACPI_CSTATE_FFH) state->flags |= CPUIDLE_FLAG_RCU_IDLE; state->enter = acpi_idle_lpi_enter; diff --git a/include/linux/acpi.h b/include/linux/acpi.h index a73246c3c35e..afd94c9b8b8a 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -1480,6 +1480,15 @@ static inline int lpit_read_residency_count_address(u64 *address) } #endif +#ifdef CONFIG_ACPI_PROCESSOR_IDLE +#ifndef arch_get_idle_state_flags +static inline unsigned int arch_get_idle_state_flags(u32 arch_flags) +{ + return 0; +} +#endif +#endif /* CONFIG_ACPI_PROCESSOR_IDLE */ + #ifdef CONFIG_ACPI_PPTT int acpi_pptt_cpu_is_thread(unsigned int cpu); int find_acpi_cpu_topology(unsigned int cpu, int level); -- cgit v1.2.3 From c964c1f5ee96e1460606d44f80a47bdacd8fe568 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 8 Sep 2023 22:35:59 +0200 Subject: rcu: Assume rcu_report_dead() is always called locally rcu_report_dead() has to be called locally by the CPU that is going to exit the RCU state machine. Passing a cpu argument here is error-prone and leaves the possibility for a racy remote call. Use local access instead. Reviewed-by: Paul E. McKenney Signed-off-by: Frederic Weisbecker --- arch/arm64/kernel/smp.c | 2 +- include/linux/rcupdate.h | 2 +- kernel/cpu.c | 2 +- kernel/rcu/tree.c | 4 ++-- 4 files changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 960b98b43506..8fa646c90c67 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -401,7 +401,7 @@ void __noreturn cpu_die_early(void) /* Mark this CPU absent */ set_cpu_present(cpu, 0); - rcu_report_dead(cpu); + rcu_report_dead(); if (IS_ENABLED(CONFIG_HOTPLUG_CPU)) { update_cpu_boot_status(CPU_KILL_ME); diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 5e5f920ade90..aa351ddcbe8d 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -122,7 +122,7 @@ static inline void call_rcu_hurry(struct rcu_head *head, rcu_callback_t func) void rcu_init(void); extern int rcu_scheduler_active; void rcu_sched_clock_irq(int user); -void rcu_report_dead(unsigned int cpu); +void rcu_report_dead(void); void rcutree_migrate_callbacks(int cpu); #ifdef CONFIG_TASKS_RCU_GENERIC diff --git a/kernel/cpu.c b/kernel/cpu.c index 6de7c6bb74ee..076e75fed8bb 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -1388,7 +1388,7 @@ void cpuhp_report_idle_dead(void) struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state); BUG_ON(st->state != CPUHP_AP_OFFLINE); - rcu_report_dead(smp_processor_id()); + rcu_report_dead(); st->state = CPUHP_AP_IDLE_DEAD; /* * We cannot call complete after rcu_report_dead() so we delegate it diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 8c2954502e55..2e1e7eadf2cc 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -4560,11 +4560,11 @@ void rcu_cpu_starting(unsigned int cpu) * from the outgoing CPU rather than from the cpuhp_step mechanism. * This is because this function must be invoked at a precise location. */ -void rcu_report_dead(unsigned int cpu) +void rcu_report_dead(void) { unsigned long flags; unsigned long mask; - struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu); + struct rcu_data *rdp = this_cpu_ptr(&rcu_data); struct rcu_node *rnp = rdp->mynode; /* Outgoing CPU's rdp & rnp. */ /* -- cgit v1.2.3 From bc0c3357601e3ff1b006600530079bd246ef0d82 Mon Sep 17 00:00:00 2001 From: Mateusz Guzik Date: Wed, 23 Aug 2023 19:05:56 +0200 Subject: mm: remove remnants of SPLIT_RSS_COUNTING The feature got retired in f1a7941243c1 ("mm: convert mm's rss stats into percpu_counter"), but the patch failed to fully clean it up. Link: https://lkml.kernel.org/r/20230823170556.2281747-1-mjguzik@gmail.com Signed-off-by: Mateusz Guzik Acked-by: Shakeel Butt Signed-off-by: Andrew Morton --- fs/exec.c | 2 -- include/linux/mm.h | 8 -------- kernel/exit.c | 4 ---- kernel/fork.c | 4 ---- kernel/kthread.c | 1 - mm/madvise.c | 5 +---- mm/memory.c | 2 -- 7 files changed, 1 insertion(+), 25 deletions(-) (limited to 'include/linux') diff --git a/fs/exec.c b/fs/exec.c index 6518e33ea813..eb039041ad6a 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -986,8 +986,6 @@ static int exec_mmap(struct mm_struct *mm) tsk = current; old_mm = current->mm; exec_mm_release(tsk, old_mm); - if (old_mm) - sync_mm_rss(old_mm); ret = down_write_killable(&tsk->signal->exec_update_lock); if (ret) diff --git a/include/linux/mm.h b/include/linux/mm.h index bf5d0b1b16f4..7613150acab9 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2628,14 +2628,6 @@ static inline void setmax_mm_hiwater_rss(unsigned long *maxrss, *maxrss = hiwater_rss; } -#if defined(SPLIT_RSS_COUNTING) -void sync_mm_rss(struct mm_struct *mm); -#else -static inline void sync_mm_rss(struct mm_struct *mm) -{ -} -#endif - #ifndef CONFIG_ARCH_HAS_PTE_SPECIAL static inline int pte_special(pte_t pte) { diff --git a/kernel/exit.c b/kernel/exit.c index edb50b4c9972..3cdbe797008f 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -539,7 +539,6 @@ static void exit_mm(void) exit_mm_release(current, mm); if (!mm) return; - sync_mm_rss(mm); mmap_read_lock(mm); mmgrab_lazy_tlb(mm); BUG_ON(mm != current->active_mm); @@ -829,9 +828,6 @@ void __noreturn do_exit(long code) io_uring_files_cancel(); exit_signals(tsk); /* sets PF_EXITING */ - /* sync mm's RSS info before statistics gathering */ - if (tsk->mm) - sync_mm_rss(tsk->mm); acct_update_integrals(tsk); group_dead = atomic_dec_and_test(&tsk->signal->live); if (group_dead) { diff --git a/kernel/fork.c b/kernel/fork.c index 3b6d20dfb9a8..1779183a7cb3 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -2406,10 +2406,6 @@ __latent_entropy struct task_struct *copy_process( p->io_uring = NULL; #endif -#if defined(SPLIT_RSS_COUNTING) - memset(&p->rss_stat, 0, sizeof(p->rss_stat)); -#endif - p->default_timer_slack_ns = current->timer_slack_ns; #ifdef CONFIG_PSI diff --git a/kernel/kthread.c b/kernel/kthread.c index 1eea53050bab..c46128ec0c0a 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -1469,7 +1469,6 @@ void kthread_unuse_mm(struct mm_struct *mm) * clearing tsk->mm. */ smp_mb__after_spinlock(); - sync_mm_rss(mm); local_irq_disable(); tsk->mm = NULL; membarrier_update_current_mm(NULL); diff --git a/mm/madvise.c b/mm/madvise.c index 4dded5d27e7e..59e8860c86af 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -746,11 +746,8 @@ static int madvise_free_pte_range(pmd_t *pmd, unsigned long addr, folio_mark_lazyfree(folio); } - if (nr_swap) { - if (current->mm == mm) - sync_mm_rss(mm); + if (nr_swap) add_mm_counter(mm, MM_SWAPENTS, nr_swap); - } if (start_pte) { arch_leave_lazy_mmu_mode(); pte_unmap_unlock(start_pte, ptl); diff --git a/mm/memory.c b/mm/memory.c index 6c264d2f969c..0739ccb00e61 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -471,8 +471,6 @@ static inline void add_mm_rss_vec(struct mm_struct *mm, int *rss) { int i; - if (current->mm == mm) - sync_mm_rss(mm); for (i = 0; i < NR_MM_COUNTERS; i++) if (rss[i]) add_mm_counter(mm, i, rss[i]); -- cgit v1.2.3 From 91e79d22be75fec88ae58d274a7c9e49d6215099 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 23 Aug 2023 00:13:14 +0100 Subject: mm: convert DAX lock/unlock page to lock/unlock folio The one caller of DAX lock/unlock page already calls compound_head(), so use page_folio() instead, then use a folio throughout the DAX code to remove uses of page->mapping and page->index. [jane.chu@oracle.com: add comment to mf_generic_kill_procss(), simplify mf_generic_kill_procs:folio initialization] Link: https://lkml.kernel.org/r/20230908222336.186313-1-jane.chu@oracle.com Link: https://lkml.kernel.org/r/20230822231314.349200-1-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Jane Chu Acked-by: Naoya Horiguchi Cc: Dan Williams Cc: Jane Chu Signed-off-by: Andrew Morton --- fs/dax.c | 24 ++++++++++++------------ include/linux/dax.h | 10 +++++----- mm/memory-failure.c | 29 ++++++++++++++++------------- 3 files changed, 33 insertions(+), 30 deletions(-) (limited to 'include/linux') diff --git a/fs/dax.c b/fs/dax.c index 8fafecbe42b1..3380b43cb6bb 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -412,23 +412,23 @@ static struct page *dax_busy_page(void *entry) return NULL; } -/* - * dax_lock_page - Lock the DAX entry corresponding to a page - * @page: The page whose entry we want to lock +/** + * dax_lock_folio - Lock the DAX entry corresponding to a folio + * @folio: The folio whose entry we want to lock * * Context: Process context. - * Return: A cookie to pass to dax_unlock_page() or 0 if the entry could + * Return: A cookie to pass to dax_unlock_folio() or 0 if the entry could * not be locked. */ -dax_entry_t dax_lock_page(struct page *page) +dax_entry_t dax_lock_folio(struct folio *folio) { XA_STATE(xas, NULL, 0); void *entry; - /* Ensure page->mapping isn't freed while we look at it */ + /* Ensure folio->mapping isn't freed while we look at it */ rcu_read_lock(); for (;;) { - struct address_space *mapping = READ_ONCE(page->mapping); + struct address_space *mapping = READ_ONCE(folio->mapping); entry = NULL; if (!mapping || !dax_mapping(mapping)) @@ -447,11 +447,11 @@ dax_entry_t dax_lock_page(struct page *page) xas.xa = &mapping->i_pages; xas_lock_irq(&xas); - if (mapping != page->mapping) { + if (mapping != folio->mapping) { xas_unlock_irq(&xas); continue; } - xas_set(&xas, page->index); + xas_set(&xas, folio->index); entry = xas_load(&xas); if (dax_is_locked(entry)) { rcu_read_unlock(); @@ -467,10 +467,10 @@ dax_entry_t dax_lock_page(struct page *page) return (dax_entry_t)entry; } -void dax_unlock_page(struct page *page, dax_entry_t cookie) +void dax_unlock_folio(struct folio *folio, dax_entry_t cookie) { - struct address_space *mapping = page->mapping; - XA_STATE(xas, &mapping->i_pages, page->index); + struct address_space *mapping = folio->mapping; + XA_STATE(xas, &mapping->i_pages, folio->index); if (S_ISCHR(mapping->host->i_mode)) return; diff --git a/include/linux/dax.h b/include/linux/dax.h index 22cd9902345d..b463502b16e1 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -159,8 +159,8 @@ int dax_writeback_mapping_range(struct address_space *mapping, struct page *dax_layout_busy_page(struct address_space *mapping); struct page *dax_layout_busy_page_range(struct address_space *mapping, loff_t start, loff_t end); -dax_entry_t dax_lock_page(struct page *page); -void dax_unlock_page(struct page *page, dax_entry_t cookie); +dax_entry_t dax_lock_folio(struct folio *folio); +void dax_unlock_folio(struct folio *folio, dax_entry_t cookie); dax_entry_t dax_lock_mapping_entry(struct address_space *mapping, unsigned long index, struct page **page); void dax_unlock_mapping_entry(struct address_space *mapping, @@ -182,14 +182,14 @@ static inline int dax_writeback_mapping_range(struct address_space *mapping, return -EOPNOTSUPP; } -static inline dax_entry_t dax_lock_page(struct page *page) +static inline dax_entry_t dax_lock_folio(struct folio *folio) { - if (IS_DAX(page->mapping->host)) + if (IS_DAX(folio->mapping->host)) return ~0UL; return 0; } -static inline void dax_unlock_page(struct page *page, dax_entry_t cookie) +static inline void dax_unlock_folio(struct folio *folio, dax_entry_t cookie) { } diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 4d6e43c88489..660c21859118 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -1713,20 +1713,23 @@ static void unmap_and_kill(struct list_head *to_kill, unsigned long pfn, kill_procs(to_kill, flags & MF_MUST_KILL, false, pfn, flags); } +/* + * Only dev_pagemap pages get here, such as fsdax when the filesystem + * either do not claim or fails to claim a hwpoison event, or devdax. + * The fsdax pages are initialized per base page, and the devdax pages + * could be initialized either as base pages, or as compound pages with + * vmemmap optimization enabled. Devdax is simplistic in its dealing with + * hwpoison, such that, if a subpage of a compound page is poisoned, + * simply mark the compound head page is by far sufficient. + */ static int mf_generic_kill_procs(unsigned long long pfn, int flags, struct dev_pagemap *pgmap) { - struct page *page = pfn_to_page(pfn); + struct folio *folio = pfn_folio(pfn); LIST_HEAD(to_kill); dax_entry_t cookie; int rc = 0; - /* - * Pages instantiated by device-dax (not filesystem-dax) - * may be compound pages. - */ - page = compound_head(page); - /* * Prevent the inode from being freed while we are interrogating * the address_space, typically this would be handled by @@ -1734,11 +1737,11 @@ static int mf_generic_kill_procs(unsigned long long pfn, int flags, * also prevents changes to the mapping of this pfn until * poison signaling is complete. */ - cookie = dax_lock_page(page); + cookie = dax_lock_folio(folio); if (!cookie) return -EBUSY; - if (hwpoison_filter(page)) { + if (hwpoison_filter(&folio->page)) { rc = -EOPNOTSUPP; goto unlock; } @@ -1760,7 +1763,7 @@ static int mf_generic_kill_procs(unsigned long long pfn, int flags, * Use this flag as an indication that the dax page has been * remapped UC to prevent speculative consumption of poison. */ - SetPageHWPoison(page); + SetPageHWPoison(&folio->page); /* * Unlike System-RAM there is no possibility to swap in a @@ -1769,11 +1772,11 @@ static int mf_generic_kill_procs(unsigned long long pfn, int flags, * SIGBUS (i.e. MF_MUST_KILL) */ flags |= MF_ACTION_REQUIRED | MF_MUST_KILL; - collect_procs(page, &to_kill, true); + collect_procs(&folio->page, &to_kill, true); - unmap_and_kill(&to_kill, pfn, page->mapping, page->index, flags); + unmap_and_kill(&to_kill, pfn, folio->mapping, folio->index, flags); unlock: - dax_unlock_page(page, cookie); + dax_unlock_folio(folio, cookie); return rc; } -- cgit v1.2.3 From b1e5a3dee255a11cbdd5a0e814829276bd33a793 Mon Sep 17 00:00:00 2001 From: "Joel Fernandes (Google)" Date: Sun, 3 Sep 2023 15:13:23 +0000 Subject: mm/mremap: allow moves within the same VMA for stack moves For the stack move happening in shift_arg_pages(), the move is happening within the same VMA which spans the old and new ranges. In case the aligned address happens to fall within that VMA, allow such moves and don't abort the mremap alignment optimization. In the regular non-stack mremap case, we cannot allow any such moves as will end up destroying some part of the mapping (either the source of the move, or part of the existing mapping). So just avoid it for stack moves. Link: https://lkml.kernel.org/r/20230903151328.2981432-3-joel@joelfernandes.org Signed-off-by: Joel Fernandes (Google) Reviewed-by: Lorenzo Stoakes Cc: Kalesh Singh Cc: "Kirill A. Shutemov" Cc: Liam R. Howlett Cc: Linus Torvalds Cc: Lokesh Gidra Cc: Michal Hocko Cc: Paul E. McKenney Cc: Shuah Khan Cc: Suren Baghdasaryan Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- fs/exec.c | 2 +- include/linux/mm.h | 2 +- mm/mremap.c | 33 +++++++++++++++++++-------------- 3 files changed, 21 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/fs/exec.c b/fs/exec.c index eb039041ad6a..4aa19b24f281 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -713,7 +713,7 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift) * process cleanup to remove whatever mess we made. */ if (length != move_page_tables(vma, old_start, - vma, new_start, length, false)) + vma, new_start, length, false, true)) return -ENOMEM; lru_add_drain(); diff --git a/include/linux/mm.h b/include/linux/mm.h index 7613150acab9..21c3d4e8a282 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2480,7 +2480,7 @@ int get_cmdline(struct task_struct *task, char *buffer, int buflen); extern unsigned long move_page_tables(struct vm_area_struct *vma, unsigned long old_addr, struct vm_area_struct *new_vma, unsigned long new_addr, unsigned long len, - bool need_rmap_locks); + bool need_rmap_locks, bool for_stack); /* * Flags used by change_protection(). For now we make it a bitmap so diff --git a/mm/mremap.c b/mm/mremap.c index e2b65a17148e..ce8a23ef325a 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -490,12 +490,13 @@ static bool move_pgt_entry(enum pgt_entry entry, struct vm_area_struct *vma, } /* - * A helper to check if a previous mapping exists. Required for - * move_page_tables() and realign_addr() to determine if a previous mapping - * exists before we can do realignment optimizations. + * A helper to check if aligning down is OK. The aligned address should fall + * on *no mapping*. For the stack moving down, that's a special move within + * the VMA that is created to span the source and destination of the move, + * so we make an exception for it. */ static bool can_align_down(struct vm_area_struct *vma, unsigned long addr_to_align, - unsigned long mask) + unsigned long mask, bool for_stack) { unsigned long addr_masked = addr_to_align & mask; @@ -504,9 +505,13 @@ static bool can_align_down(struct vm_area_struct *vma, unsigned long addr_to_ali * of the corresponding VMA, we can't align down or we will destroy part * of the current mapping. */ - if (vma->vm_start != addr_to_align) + if (!for_stack && vma->vm_start != addr_to_align) return false; + /* In the stack case we explicitly permit in-VMA alignment. */ + if (for_stack && addr_masked >= vma->vm_start) + return true; + /* * Make sure the realignment doesn't cause the address to fall on an * existing mapping. @@ -517,7 +522,7 @@ static bool can_align_down(struct vm_area_struct *vma, unsigned long addr_to_ali /* Opportunistically realign to specified boundary for faster copy. */ static void try_realign_addr(unsigned long *old_addr, struct vm_area_struct *old_vma, unsigned long *new_addr, struct vm_area_struct *new_vma, - unsigned long mask) + unsigned long mask, bool for_stack) { /* Skip if the addresses are already aligned. */ if ((*old_addr & ~mask) == 0) @@ -528,8 +533,8 @@ static void try_realign_addr(unsigned long *old_addr, struct vm_area_struct *old return; /* Ensure realignment doesn't cause overlap with existing mappings. */ - if (!can_align_down(old_vma, *old_addr, mask) || - !can_align_down(new_vma, *new_addr, mask)) + if (!can_align_down(old_vma, *old_addr, mask, for_stack) || + !can_align_down(new_vma, *new_addr, mask, for_stack)) return; *old_addr = *old_addr & mask; @@ -539,7 +544,7 @@ static void try_realign_addr(unsigned long *old_addr, struct vm_area_struct *old unsigned long move_page_tables(struct vm_area_struct *vma, unsigned long old_addr, struct vm_area_struct *new_vma, unsigned long new_addr, unsigned long len, - bool need_rmap_locks) + bool need_rmap_locks, bool for_stack) { unsigned long extent, old_end; struct mmu_notifier_range range; @@ -559,9 +564,9 @@ unsigned long move_page_tables(struct vm_area_struct *vma, * If possible, realign addresses to PMD boundary for faster copy. * Only realign if the mremap copying hits a PMD boundary. */ - if ((vma != new_vma) - && (len >= PMD_SIZE - (old_addr & ~PMD_MASK))) - try_realign_addr(&old_addr, vma, &new_addr, new_vma, PMD_MASK); + if (len >= PMD_SIZE - (old_addr & ~PMD_MASK)) + try_realign_addr(&old_addr, vma, &new_addr, new_vma, PMD_MASK, + for_stack); flush_cache_range(vma, old_addr, old_end); mmu_notifier_range_init(&range, MMU_NOTIFY_UNMAP, 0, vma->vm_mm, @@ -708,7 +713,7 @@ static unsigned long move_vma(struct vm_area_struct *vma, } moved_len = move_page_tables(vma, old_addr, new_vma, new_addr, old_len, - need_rmap_locks); + need_rmap_locks, false); if (moved_len < old_len) { err = -ENOMEM; } else if (vma->vm_ops && vma->vm_ops->mremap) { @@ -722,7 +727,7 @@ static unsigned long move_vma(struct vm_area_struct *vma, * and then proceed to unmap new area instead of old. */ move_page_tables(new_vma, new_addr, vma, old_addr, moved_len, - true); + true, false); vma = new_vma; old_len = new_len; old_addr = new_addr; -- cgit v1.2.3 From d896073fc767ebb40c11a6a9de71c390757ac64b Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Thu, 7 Sep 2023 02:29:27 +0000 Subject: mm/damon/core: add more comments for nr_accesses The comment on struct damon_region about nr_accesses field looks not sufficient. Many people actually used to ask what nr_accesses mean. There is more detailed explanation of the mechanism on the comment for struct damon_attrs, but it is also ambiguous, as it doesn't specify the name of the counter for aggregating the access check results. Make those more detailed. Link: https://lkml.kernel.org/r/20230907022929.91361-10-sj@kernel.org Signed-off-by: SeongJae Park Cc: Jonathan Corbet Cc: Steven Rostedt (Google) Signed-off-by: Andrew Morton --- include/linux/damon.h | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/damon.h b/include/linux/damon.h index ae2664d1d5f1..266f92b34dd2 100644 --- a/include/linux/damon.h +++ b/include/linux/damon.h @@ -43,6 +43,10 @@ struct damon_addr_range { * @list: List head for siblings. * @age: Age of this region. * + * @nr_accesses is reset to zero for every &damon_attrs->aggr_interval and be + * increased for every &damon_attrs->sample_interval if an access to the region + * during the last sampling interval is found. + * * @age is initially zero, increased for each aggregation interval, and reset * to zero again if the access frequency is significantly changed. If two * regions are merged into a new region, both @nr_accesses and @age of the new @@ -472,13 +476,14 @@ struct damon_callback { * regions. * * For each @sample_interval, DAMON checks whether each region is accessed or - * not. It aggregates and keeps the access information (number of accesses to - * each region) for @aggr_interval time. DAMON also checks whether the target - * memory regions need update (e.g., by ``mmap()`` calls from the application, - * in case of virtual memory monitoring) and applies the changes for each - * @ops_update_interval. All time intervals are in micro-seconds. - * Please refer to &struct damon_operations and &struct damon_callback for more - * detail. + * not during the last @sample_interval. If such access is found, DAMON + * aggregates the information by increasing &damon_region->nr_accesses for + * @aggr_interval time. For each @aggr_interval, the count is reset. DAMON + * also checks whether the target memory regions need update (e.g., by + * ``mmap()`` calls from the application, in case of virtual memory monitoring) + * and applies the changes for each @ops_update_interval. All time intervals + * are in micro-seconds. Please refer to &struct damon_operations and &struct + * damon_callback for more detail. */ struct damon_attrs { unsigned long sample_interval; -- cgit v1.2.3 From cf0a96bd3ab4d9d8a1c92baf1a822f2ddbca3a34 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Thu, 7 Sep 2023 02:29:28 +0000 Subject: mm/damon/core: remove duplicated comment for watermarks-based deactivation The comment for explaining about watermarks-based monitoring part deactivation is duplicated in two paragraphs. Remove one. Link: https://lkml.kernel.org/r/20230907022929.91361-11-sj@kernel.org Signed-off-by: SeongJae Park Cc: Jonathan Corbet Cc: Steven Rostedt (Google) Signed-off-by: Andrew Morton --- include/linux/damon.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/damon.h b/include/linux/damon.h index 266f92b34dd2..ab3089de1478 100644 --- a/include/linux/damon.h +++ b/include/linux/damon.h @@ -317,9 +317,6 @@ struct damos_access_pattern { * monitoring context are inactive, DAMON stops monitoring either, and just * repeatedly checks the watermarks. * - * If all schemes that registered to a &struct damon_ctx are inactive, DAMON - * stops monitoring and just repeatedly checks the watermarks. - * * Before applying the &action to a memory region, &struct damon_operations * implementation could check pages of the region and skip &action to respect * &filters -- cgit v1.2.3 From 7cd34dd3c9bf1d67ce7d1ab3fe8886c583ae0d9a Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 11 Sep 2023 14:21:13 +0300 Subject: efi/unaccepted: do not let /proc/vmcore try to access unaccepted memory Patch series "Do not try to access unaccepted memory", v2. Support for unaccepted memory was added recently, refer commit dcdfdd40fa82 ("mm: Add support for unaccepted memory"), whereby a virtual machine may need to accept memory before it can be used. Plug a few gaps where RAM is exposed without checking if it is unaccepted memory. This patch (of 2): Support for unaccepted memory was added recently, refer commit dcdfdd40fa82 ("mm: Add support for unaccepted memory"), whereby a virtual machine may need to accept memory before it can be used. Do not let /proc/vmcore try to access unaccepted memory because it can cause the guest to fail. For /proc/vmcore, which is read-only, this means a read or mmap of unaccepted memory will return zeros. Link: https://lkml.kernel.org/r/20230911112114.91323-1-adrian.hunter@intel.com Link: https://lkml.kernel.org/r/20230911112114.91323-2-adrian.hunter@intel.com Signed-off-by: Adrian Hunter Cc: Ard Biesheuvel Cc: Baoquan He Cc: Borislav Petkov (AMD) Cc: Dave Hansen Cc: Dave Young Cc: Kirill A. Shutemov Cc: Lorenzo Stoakes Cc: Mike Rapoport Cc: Tom Lendacky Cc: Vivek Goyal Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- drivers/firmware/efi/unaccepted_memory.c | 20 ++++++++++++++++++++ include/linux/mm.h | 7 +++++++ 2 files changed, 27 insertions(+) (limited to 'include/linux') diff --git a/drivers/firmware/efi/unaccepted_memory.c b/drivers/firmware/efi/unaccepted_memory.c index 853f7dc3c21d..79ba576b22e3 100644 --- a/drivers/firmware/efi/unaccepted_memory.c +++ b/drivers/firmware/efi/unaccepted_memory.c @@ -3,6 +3,7 @@ #include #include #include +#include #include /* Protects unaccepted memory bitmap */ @@ -145,3 +146,22 @@ bool range_contains_unaccepted_memory(phys_addr_t start, phys_addr_t end) return ret; } + +#ifdef CONFIG_PROC_VMCORE +static bool unaccepted_memory_vmcore_pfn_is_ram(struct vmcore_cb *cb, + unsigned long pfn) +{ + return !pfn_is_unaccepted_memory(pfn); +} + +static struct vmcore_cb vmcore_cb = { + .pfn_is_ram = unaccepted_memory_vmcore_pfn_is_ram, +}; + +static int __init unaccepted_memory_init_kdump(void) +{ + register_vmcore_cb(&vmcore_cb); + return 0; +} +core_initcall(unaccepted_memory_init_kdump); +#endif /* CONFIG_PROC_VMCORE */ diff --git a/include/linux/mm.h b/include/linux/mm.h index 21c3d4e8a282..31dc25d3f6b5 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -4054,4 +4054,11 @@ static inline void accept_memory(phys_addr_t start, phys_addr_t end) #endif +static inline bool pfn_is_unaccepted_memory(unsigned long pfn) +{ + phys_addr_t paddr = pfn << PAGE_SHIFT; + + return range_contains_unaccepted_memory(paddr, paddr + PAGE_SIZE); +} + #endif /* _LINUX_MM_H */ -- cgit v1.2.3 From 3ee0aa9f06756e959633ffda37856c6741d948ed Mon Sep 17 00:00:00 2001 From: Qi Zheng Date: Mon, 11 Sep 2023 17:25:14 +0800 Subject: mm: move some shrinker-related function declarations to mm/internal.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Patch series "cleanups for lockless slab shrink", v4. This series is some cleanups for lockless slab shrink. This patch (of 4): The following functions are only used inside the mm subsystem, so it's better to move their declarations to the mm/internal.h file. 1. shrinker_debugfs_add() 2. shrinker_debugfs_detach() 3. shrinker_debugfs_remove() Link: https://lkml.kernel.org/r/20230911092517.64141-1-zhengqi.arch@bytedance.com Link: https://lkml.kernel.org/r/20230911092517.64141-2-zhengqi.arch@bytedance.com Signed-off-by: Qi Zheng Reviewed-by: Muchun Song Cc: Christian Brauner Cc: Christian König Cc: Chuck Lever Cc: Daniel Vetter Cc: Darrick J. Wong Cc: Dave Chinner Cc: Greg Kroah-Hartman Cc: Joel Fernandes Cc: Kirill Tkhai Cc: Paul E. McKenney Cc: Roman Gushchin Cc: Sergey Senozhatsky Cc: Steven Price Cc: Theodore Ts'o Cc: Vlastimil Babka Cc: Daniel Vetter Cc: Abhinav Kumar Cc: Alasdair Kergon Cc: Alexander Viro Cc: Alyssa Rosenzweig Cc: Andreas Dilger Cc: Andreas Gruenbacher Cc: Anna Schumaker Cc: Arnd Bergmann Cc: Bob Peterson Cc: Borislav Petkov Cc: Carlos Llamas Cc: Chandan Babu R Cc: Chao Yu Cc: Chris Mason Cc: Coly Li Cc: Dai Ngo Cc: Dave Hansen Cc: David Airlie Cc: David Hildenbrand Cc: David Sterba Cc: Dmitry Baryshkov Cc: Gao Xiang Cc: Huang Rui Cc: Ingo Molnar Cc: Jaegeuk Kim Cc: Jani Nikula Cc: Jan Kara Cc: Jason Wang Cc: Jeff Layton Cc: Jeffle Xu Cc: Joonas Lahtinen Cc: Josef Bacik Cc: Juergen Gross Cc: Kent Overstreet Cc: Marijn Suijten Cc: "Michael S. Tsirkin" Cc: Mike Snitzer Cc: Minchan Kim Cc: Muchun Song Cc: Nadav Amit Cc: Neil Brown Cc: Oleksandr Tyshchenko Cc: Olga Kornievskaia Cc: Richard Weinberger Cc: Rob Clark Cc: Rob Herring Cc: Rodrigo Vivi Cc: Sean Paul Cc: Song Liu Cc: Stefano Stabellini Cc: Thomas Gleixner Cc: Tomeu Vizoso Cc: Tom Talpey Cc: Trond Myklebust Cc: Tvrtko Ursulin Cc: Xuan Zhuo Cc: Yue Hu Signed-off-by: Andrew Morton --- include/linux/shrinker.h | 19 ------------------- mm/internal.h | 26 ++++++++++++++++++++++++++ mm/shrinker_debug.c | 2 ++ 3 files changed, 28 insertions(+), 19 deletions(-) (limited to 'include/linux') diff --git a/include/linux/shrinker.h b/include/linux/shrinker.h index 224293b2dd06..8dc15aa37410 100644 --- a/include/linux/shrinker.h +++ b/include/linux/shrinker.h @@ -106,28 +106,9 @@ extern void free_prealloced_shrinker(struct shrinker *shrinker); extern void synchronize_shrinkers(void); #ifdef CONFIG_SHRINKER_DEBUG -extern int shrinker_debugfs_add(struct shrinker *shrinker); -extern struct dentry *shrinker_debugfs_detach(struct shrinker *shrinker, - int *debugfs_id); -extern void shrinker_debugfs_remove(struct dentry *debugfs_entry, - int debugfs_id); extern int __printf(2, 3) shrinker_debugfs_rename(struct shrinker *shrinker, const char *fmt, ...); #else /* CONFIG_SHRINKER_DEBUG */ -static inline int shrinker_debugfs_add(struct shrinker *shrinker) -{ - return 0; -} -static inline struct dentry *shrinker_debugfs_detach(struct shrinker *shrinker, - int *debugfs_id) -{ - *debugfs_id = -1; - return NULL; -} -static inline void shrinker_debugfs_remove(struct dentry *debugfs_entry, - int debugfs_id) -{ -} static inline __printf(2, 3) int shrinker_debugfs_rename(struct shrinker *shrinker, const char *fmt, ...) { diff --git a/mm/internal.h b/mm/internal.h index 30cf724ddbce..939d1227a527 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -1154,4 +1154,30 @@ struct vma_prepare { struct vm_area_struct *remove; struct vm_area_struct *remove2; }; + +/* shrinker related functions */ + +#ifdef CONFIG_SHRINKER_DEBUG +extern int shrinker_debugfs_add(struct shrinker *shrinker); +extern struct dentry *shrinker_debugfs_detach(struct shrinker *shrinker, + int *debugfs_id); +extern void shrinker_debugfs_remove(struct dentry *debugfs_entry, + int debugfs_id); +#else /* CONFIG_SHRINKER_DEBUG */ +static inline int shrinker_debugfs_add(struct shrinker *shrinker) +{ + return 0; +} +static inline struct dentry *shrinker_debugfs_detach(struct shrinker *shrinker, + int *debugfs_id) +{ + *debugfs_id = -1; + return NULL; +} +static inline void shrinker_debugfs_remove(struct dentry *debugfs_entry, + int debugfs_id) +{ +} +#endif /* CONFIG_SHRINKER_DEBUG */ + #endif /* __MM_INTERNAL_H */ diff --git a/mm/shrinker_debug.c b/mm/shrinker_debug.c index 3ab53fad8876..ee0cddb4530f 100644 --- a/mm/shrinker_debug.c +++ b/mm/shrinker_debug.c @@ -6,6 +6,8 @@ #include #include +#include "internal.h" + /* defined in vmscan.c */ extern struct rw_semaphore shrinker_rwsem; extern struct list_head shrinker_list; -- cgit v1.2.3 From 0b2f5ea1aa39c0ed34bdadb53faf519e3d84ac4a Mon Sep 17 00:00:00 2001 From: Qi Zheng Date: Mon, 11 Sep 2023 17:25:17 +0800 Subject: drm/ttm: introduce pool_shrink_rwsem MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently, synchronize_shrinkers() is only used by TTM pool. It only requires that no shrinkers run in parallel. After we use RCU+refcount method to implement the lockless slab shrink, we can not use shrinker_rwsem or synchronize_rcu() to guarantee that all shrinker invocations have seen an update before freeing memory. So we introduce a new pool_shrink_rwsem to implement a private ttm_pool_synchronize_shrinkers(), so as to achieve the same purpose. Link: https://lkml.kernel.org/r/20230911092517.64141-5-zhengqi.arch@bytedance.com Signed-off-by: Qi Zheng Reviewed-by: Muchun Song Reviewed-by: Christian König Acked-by: Daniel Vetter Cc: Christian Brauner Cc: Chuck Lever Cc: Daniel Vetter Cc: Darrick J. Wong Cc: Dave Chinner Cc: Greg Kroah-Hartman Cc: Joel Fernandes Cc: Kirill Tkhai Cc: Paul E. McKenney Cc: Roman Gushchin Cc: Sergey Senozhatsky Cc: Steven Price Cc: Theodore Ts'o Cc: Vlastimil Babka Cc: Abhinav Kumar Cc: Alasdair Kergon Cc: Alexander Viro Cc: Alyssa Rosenzweig Cc: Andreas Dilger Cc: Andreas Gruenbacher Cc: Anna Schumaker Cc: Arnd Bergmann Cc: Bob Peterson Cc: Borislav Petkov Cc: Carlos Llamas Cc: Chandan Babu R Cc: Chao Yu Cc: Chris Mason Cc: Coly Li Cc: Dai Ngo Cc: Dave Hansen Cc: David Airlie Cc: David Hildenbrand Cc: David Sterba Cc: Dmitry Baryshkov Cc: Gao Xiang Cc: Huang Rui Cc: Ingo Molnar Cc: Jaegeuk Kim Cc: Jani Nikula Cc: Jan Kara Cc: Jason Wang Cc: Jeff Layton Cc: Jeffle Xu Cc: Joonas Lahtinen Cc: Josef Bacik Cc: Juergen Gross Cc: Kent Overstreet Cc: Marijn Suijten Cc: "Michael S. Tsirkin" Cc: Mike Snitzer Cc: Minchan Kim Cc: Muchun Song Cc: Nadav Amit Cc: Neil Brown Cc: Oleksandr Tyshchenko Cc: Olga Kornievskaia Cc: Richard Weinberger Cc: Rob Clark Cc: Rob Herring Cc: Rodrigo Vivi Cc: Sean Paul Cc: Song Liu Cc: Stefano Stabellini Cc: Thomas Gleixner Cc: Tomeu Vizoso Cc: Tom Talpey Cc: Trond Myklebust Cc: Tvrtko Ursulin Cc: Xuan Zhuo Cc: Yue Hu Signed-off-by: Andrew Morton --- drivers/gpu/drm/ttm/ttm_pool.c | 17 ++++++++++++++++- include/linux/shrinker.h | 1 - mm/shrinker.c | 15 --------------- 3 files changed, 16 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c index cddb9151d20f..648ca70403a7 100644 --- a/drivers/gpu/drm/ttm/ttm_pool.c +++ b/drivers/gpu/drm/ttm/ttm_pool.c @@ -74,6 +74,7 @@ static struct ttm_pool_type global_dma32_uncached[MAX_ORDER + 1]; static spinlock_t shrinker_lock; static struct list_head shrinker_list; static struct shrinker mm_shrinker; +static DECLARE_RWSEM(pool_shrink_rwsem); /* Allocate pages of size 1 << order with the given gfp_flags */ static struct page *ttm_pool_alloc_page(struct ttm_pool *pool, gfp_t gfp_flags, @@ -317,6 +318,7 @@ static unsigned int ttm_pool_shrink(void) unsigned int num_pages; struct page *p; + down_read(&pool_shrink_rwsem); spin_lock(&shrinker_lock); pt = list_first_entry(&shrinker_list, typeof(*pt), shrinker_list); list_move_tail(&pt->shrinker_list, &shrinker_list); @@ -329,6 +331,7 @@ static unsigned int ttm_pool_shrink(void) } else { num_pages = 0; } + up_read(&pool_shrink_rwsem); return num_pages; } @@ -572,6 +575,18 @@ void ttm_pool_init(struct ttm_pool *pool, struct device *dev, } EXPORT_SYMBOL(ttm_pool_init); +/** + * ttm_pool_synchronize_shrinkers - Wait for all running shrinkers to complete. + * + * This is useful to guarantee that all shrinker invocations have seen an + * update, before freeing memory, similar to rcu. + */ +static void ttm_pool_synchronize_shrinkers(void) +{ + down_write(&pool_shrink_rwsem); + up_write(&pool_shrink_rwsem); +} + /** * ttm_pool_fini - Cleanup a pool * @@ -593,7 +608,7 @@ void ttm_pool_fini(struct ttm_pool *pool) /* We removed the pool types from the LRU, but we need to also make sure * that no shrinker is concurrently freeing pages from the pool. */ - synchronize_shrinkers(); + ttm_pool_synchronize_shrinkers(); } EXPORT_SYMBOL(ttm_pool_fini); diff --git a/include/linux/shrinker.h b/include/linux/shrinker.h index 8dc15aa37410..6b5843c3b827 100644 --- a/include/linux/shrinker.h +++ b/include/linux/shrinker.h @@ -103,7 +103,6 @@ extern int __printf(2, 3) register_shrinker(struct shrinker *shrinker, const char *fmt, ...); extern void unregister_shrinker(struct shrinker *shrinker); extern void free_prealloced_shrinker(struct shrinker *shrinker); -extern void synchronize_shrinkers(void); #ifdef CONFIG_SHRINKER_DEBUG extern int __printf(2, 3) shrinker_debugfs_rename(struct shrinker *shrinker, diff --git a/mm/shrinker.c b/mm/shrinker.c index 043c87ccfab4..a16cd448b924 100644 --- a/mm/shrinker.c +++ b/mm/shrinker.c @@ -692,18 +692,3 @@ void unregister_shrinker(struct shrinker *shrinker) shrinker->nr_deferred = NULL; } EXPORT_SYMBOL(unregister_shrinker); - -/** - * synchronize_shrinkers - Wait for all running shrinkers to complete. - * - * This is equivalent to calling unregister_shrink() and register_shrinker(), - * but atomically and with less overhead. This is useful to guarantee that all - * shrinker invocations have seen an update, before freeing memory, similar to - * rcu. - */ -void synchronize_shrinkers(void) -{ - down_write(&shrinker_rwsem); - up_write(&shrinker_rwsem); -} -EXPORT_SYMBOL(synchronize_shrinkers); -- cgit v1.2.3 From c42d50aefd17a6bad3ed617769edbbb579137545 Mon Sep 17 00:00:00 2001 From: Qi Zheng Date: Mon, 11 Sep 2023 17:44:00 +0800 Subject: mm: shrinker: add infrastructure for dynamically allocating shrinker Patch series "use refcount+RCU method to implement lockless slab shrink", v6. 1. Background ============= We used to implement the lockless slab shrink with SRCU [1], but then kernel test robot reported -88.8% regression in stress-ng.ramfs.ops_per_sec test case [2], so we reverted it [3]. This patch series aims to re-implement the lockless slab shrink using the refcount+RCU method proposed by Dave Chinner [4]. [1]. https://lore.kernel.org/lkml/20230313112819.38938-1-zhengqi.arch@bytedance.com/ [2]. https://lore.kernel.org/lkml/202305230837.db2c233f-yujie.liu@intel.com/ [3]. https://lore.kernel.org/all/20230609081518.3039120-1-qi.zheng@linux.dev/ [4]. https://lore.kernel.org/lkml/ZIJhou1d55d4H1s0@dread.disaster.area/ 2. Implementation ================= Currently, the shrinker instances can be divided into the following three types: a) global shrinker instance statically defined in the kernel, such as workingset_shadow_shrinker. b) global shrinker instance statically defined in the kernel modules, such as mmu_shrinker in x86. c) shrinker instance embedded in other structures. For case a, the memory of shrinker instance is never freed. For case b, the memory of shrinker instance will be freed after synchronize_rcu() when the module is unloaded. For case c, the memory of shrinker instance will be freed along with the structure it is embedded in. In preparation for implementing lockless slab shrink, we need to dynamically allocate those shrinker instances in case c, then the memory can be dynamically freed alone by calling kfree_rcu(). This patchset adds the following new APIs for dynamically allocating shrinker, and add a private_data field to struct shrinker to record and get the original embedded structure. 1. shrinker_alloc() 2. shrinker_register() 3. shrinker_free() In order to simplify shrinker-related APIs and make shrinker more independent of other kernel mechanisms, this patchset uses the above APIs to convert all shrinkers (including case a and b) to dynamically allocated, and then remove all existing APIs. This will also have another advantage mentioned by Dave Chinner: ``` The other advantage of this is that it will break all the existing out of tree code and third party modules using the old API and will no longer work with a kernel using lockless slab shrinkers. They need to break (both at the source and binary levels) to stop bad things from happening due to using uncoverted shrinkers in the new setup. ``` Then we free the shrinker by calling call_rcu(), and use rcu_read_{lock,unlock}() to ensure that the shrinker instance is valid. And the shrinker::refcount mechanism ensures that the shrinker instance will not be run again after unregistration. So the structure that records the pointer of shrinker instance can be safely freed without waiting for the RCU read-side critical section. In this way, while we implement the lockless slab shrink, we don't need to be blocked in unregister_shrinker() to wait RCU read-side critical section. PATCH 1: introduce new APIs PATCH 2~38: convert all shrinnkers to use new APIs PATCH 39: remove old APIs PATCH 40~41: some cleanups and preparations PATCH 42-43: implement the lockless slab shrink PATCH 44~45: convert shrinker_rwsem to mutex 3. Testing ========== 3.1 slab shrink stress test --------------------------- We can reproduce the down_read_trylock() hotspot through the following script: ``` DIR="/root/shrinker/memcg/mnt" do_create() { mkdir -p /sys/fs/cgroup/memory/test echo 4G > /sys/fs/cgroup/memory/test/memory.limit_in_bytes for i in `seq 0 $1`; do mkdir -p /sys/fs/cgroup/memory/test/$i; echo $$ > /sys/fs/cgroup/memory/test/$i/cgroup.procs; mkdir -p $DIR/$i; done } do_mount() { for i in `seq $1 $2`; do mount -t tmpfs $i $DIR/$i; done } do_touch() { for i in `seq $1 $2`; do echo $$ > /sys/fs/cgroup/memory/test/$i/cgroup.procs; dd if=/dev/zero of=$DIR/$i/file$i bs=1M count=1 & done } case "$1" in touch) do_touch $2 $3 ;; test) do_create 4000 do_mount 0 4000 do_touch 0 3000 ;; *) exit 1 ;; esac ``` Save the above script, then run test and touch commands. Then we can use the following perf command to view hotspots: perf top -U -F 999 1) Before applying this patchset: 33.15% [kernel] [k] down_read_trylock 25.38% [kernel] [k] shrink_slab 21.75% [kernel] [k] up_read 4.45% [kernel] [k] _find_next_bit 2.27% [kernel] [k] do_shrink_slab 1.80% [kernel] [k] intel_idle_irq 1.79% [kernel] [k] shrink_lruvec 0.67% [kernel] [k] xas_descend 0.41% [kernel] [k] mem_cgroup_iter 0.40% [kernel] [k] shrink_node 0.38% [kernel] [k] list_lru_count_one 2) After applying this patchset: 64.56% [kernel] [k] shrink_slab 12.18% [kernel] [k] do_shrink_slab 3.30% [kernel] [k] __rcu_read_unlock 2.61% [kernel] [k] shrink_lruvec 2.49% [kernel] [k] __rcu_read_lock 1.93% [kernel] [k] intel_idle_irq 0.89% [kernel] [k] shrink_node 0.81% [kernel] [k] mem_cgroup_iter 0.77% [kernel] [k] mem_cgroup_calculate_protection 0.66% [kernel] [k] list_lru_count_one We can see that the first perf hotspot becomes shrink_slab, which is what we expect. 3.2 registration and unregistration stress test ----------------------------------------------- Run the command below to test: stress-ng --timeout 60 --times --verify --metrics-brief --ramfs 9 & 1) Before applying this patchset: setting to a 60 second run per stressor dispatching hogs: 9 ramfs stressor bogo ops real time usr time sys time bogo ops/s bogo ops/s (secs) (secs) (secs) (real time) (usr+sys time) ramfs 473062 60.00 8.00 279.13 7884.12 1647.59 for a 60.01s run time: 1440.34s available CPU time 7.99s user time ( 0.55%) 279.13s system time ( 19.38%) 287.12s total time ( 19.93%) load average: 7.12 2.99 1.15 successful run completed in 60.01s (1 min, 0.01 secs) 2) After applying this patchset: setting to a 60 second run per stressor dispatching hogs: 9 ramfs stressor bogo ops real time usr time sys time bogo ops/s bogo ops/s (secs) (secs) (secs) (real time) (usr+sys time) ramfs 477165 60.00 8.13 281.34 7952.55 1648.40 for a 60.01s run time: 1440.33s available CPU time 8.12s user time ( 0.56%) 281.34s system time ( 19.53%) 289.46s total time ( 20.10%) load average: 6.98 3.03 1.19 successful run completed in 60.01s (1 min, 0.01 secs) We can see that the ops/s has hardly changed. This patch (of 45): Currently, the shrinker instances can be divided into the following three types: a) global shrinker instance statically defined in the kernel, such as workingset_shadow_shrinker. b) global shrinker instance statically defined in the kernel modules, such as mmu_shrinker in x86. c) shrinker instance embedded in other structures. For case a, the memory of shrinker instance is never freed. For case b, the memory of shrinker instance will be freed after synchronize_rcu() when the module is unloaded. For case c, the memory of shrinker instance will be freed along with the structure it is embedded in. In preparation for implementing lockless slab shrink, we need to dynamically allocate those shrinker instances in case c, then the memory can be dynamically freed alone by calling kfree_rcu(). So this commit adds the following new APIs for dynamically allocating shrinker, and add a private_data field to struct shrinker to record and get the original embedded structure. 1. shrinker_alloc() Used to allocate shrinker instance itself and related memory, it will return a pointer to the shrinker instance on success and NULL on failure. 2. shrinker_register() Used to register the shrinker instance, which is same as the current register_shrinker_prepared(). 3. shrinker_free() Used to unregister (if needed) and free the shrinker instance. In order to simplify shrinker-related APIs and make shrinker more independent of other kernel mechanisms, subsequent submissions will use the above API to convert all shrinkers (including case a and b) to dynamically allocated, and then remove all existing APIs. This will also have another advantage mentioned by Dave Chinner: ``` The other advantage of this is that it will break all the existing out of tree code and third party modules using the old API and will no longer work with a kernel using lockless slab shrinkers. They need to break (both at the source and binary levels) to stop bad things from happening due to using unconverted shrinkers in the new setup. ``` [zhengqi.arch@bytedance.com: mm: shrinker: some cleanup] Link: https://lkml.kernel.org/r/20230919024607.65463-1-zhengqi.arch@bytedance.com Link: https://lkml.kernel.org/r/20230911094444.68966-1-zhengqi.arch@bytedance.com Link: https://lkml.kernel.org/r/20230911094444.68966-2-zhengqi.arch@bytedance.com Signed-off-by: Qi Zheng Reviewed-by: Muchun Song Cc: Christian Brauner Cc: Chuck Lever Cc: Darrick J. Wong Cc: Dave Chinner Cc: Greg Kroah-Hartman Cc: Kirill Tkhai Cc: Paul E. McKenney Cc: Roman Gushchin Cc: Sergey Senozhatsky Cc: Steven Price Cc: Theodore Ts'o Cc: Vlastimil Babka Cc: Abhinav Kumar Cc: Alasdair Kergon Cc: Alexander Viro Cc: Alyssa Rosenzweig Cc: Andreas Dilger Cc: Andreas Gruenbacher Cc: Anna Schumaker Cc: Arnd Bergmann Cc: Bob Peterson Cc: Borislav Petkov Cc: Carlos Llamas Cc: Chandan Babu R Cc: Chao Yu Cc: Chris Mason Cc: Christian Koenig Cc: Coly Li Cc: Dai Ngo Cc: Daniel Vetter Cc: Daniel Vetter Cc: Dave Hansen Cc: David Airlie Cc: David Hildenbrand Cc: David Sterba Cc: Dmitry Baryshkov Cc: Gao Xiang Cc: Huang Rui Cc: Ingo Molnar Cc: Jaegeuk Kim Cc: Jani Nikula Cc: Jan Kara Cc: Jason Wang Cc: Jeff Layton Cc: Jeffle Xu Cc: Joel Fernandes (Google) Cc: Joonas Lahtinen Cc: Josef Bacik Cc: Juergen Gross Cc: Kent Overstreet Cc: Marijn Suijten Cc: "Michael S. Tsirkin" Cc: Mike Snitzer Cc: Minchan Kim Cc: Nadav Amit Cc: Neil Brown Cc: Oleksandr Tyshchenko Cc: Olga Kornievskaia Cc: Richard Weinberger Cc: Rob Clark Cc: Rob Herring Cc: Rodrigo Vivi Cc: Sean Paul Cc: Song Liu Cc: Stefano Stabellini Cc: Thomas Gleixner Cc: Tomeu Vizoso Cc: Tom Talpey Cc: Trond Myklebust Cc: Tvrtko Ursulin Cc: Xuan Zhuo Cc: Yue Hu Signed-off-by: Andrew Morton --- include/linux/shrinker.h | 19 ++++++--- mm/internal.h | 22 ++++++++++ mm/shrinker.c | 106 +++++++++++++++++++++++++++++++++++++++++++++++ mm/shrinker_debug.c | 3 -- 4 files changed, 142 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/shrinker.h b/include/linux/shrinker.h index 6b5843c3b827..f4a5249f00b2 100644 --- a/include/linux/shrinker.h +++ b/include/linux/shrinker.h @@ -70,6 +70,8 @@ struct shrinker { int seeks; /* seeks to recreate an obj */ unsigned flags; + void *private_data; + /* These are for internal use */ struct list_head list; #ifdef CONFIG_MEMCG @@ -86,15 +88,22 @@ struct shrinker { }; #define DEFAULT_SEEKS 2 /* A good number if you don't know better. */ -/* Flags */ -#define SHRINKER_REGISTERED (1 << 0) -#define SHRINKER_NUMA_AWARE (1 << 1) -#define SHRINKER_MEMCG_AWARE (1 << 2) +/* Internal flags */ +#define SHRINKER_REGISTERED BIT(0) +#define SHRINKER_ALLOCATED BIT(1) + +/* Flags for users to use */ +#define SHRINKER_NUMA_AWARE BIT(2) +#define SHRINKER_MEMCG_AWARE BIT(3) /* * It just makes sense when the shrinker is also MEMCG_AWARE for now, * non-MEMCG_AWARE shrinker should not have this flag set. */ -#define SHRINKER_NONSLAB (1 << 3) +#define SHRINKER_NONSLAB BIT(4) + +struct shrinker *shrinker_alloc(unsigned int flags, const char *fmt, ...); +void shrinker_register(struct shrinker *shrinker); +void shrinker_free(struct shrinker *shrinker); extern int __printf(2, 3) prealloc_shrinker(struct shrinker *shrinker, const char *fmt, ...); diff --git a/mm/internal.h b/mm/internal.h index 0471d6326d01..a273f4d948d8 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -1160,6 +1160,20 @@ unsigned long shrink_slab(gfp_t gfp_mask, int nid, struct mem_cgroup *memcg, int priority); #ifdef CONFIG_SHRINKER_DEBUG +static inline int shrinker_debugfs_name_alloc(struct shrinker *shrinker, + const char *fmt, va_list ap) +{ + shrinker->name = kvasprintf_const(GFP_KERNEL, fmt, ap); + + return shrinker->name ? 0 : -ENOMEM; +} + +static inline void shrinker_debugfs_name_free(struct shrinker *shrinker) +{ + kfree_const(shrinker->name); + shrinker->name = NULL; +} + extern int shrinker_debugfs_add(struct shrinker *shrinker); extern struct dentry *shrinker_debugfs_detach(struct shrinker *shrinker, int *debugfs_id); @@ -1170,6 +1184,14 @@ static inline int shrinker_debugfs_add(struct shrinker *shrinker) { return 0; } +static inline int shrinker_debugfs_name_alloc(struct shrinker *shrinker, + const char *fmt, va_list ap) +{ + return 0; +} +static inline void shrinker_debugfs_name_free(struct shrinker *shrinker) +{ +} static inline struct dentry *shrinker_debugfs_detach(struct shrinker *shrinker, int *debugfs_id) { diff --git a/mm/shrinker.c b/mm/shrinker.c index a16cd448b924..d1032a4d5684 100644 --- a/mm/shrinker.c +++ b/mm/shrinker.c @@ -550,6 +550,112 @@ out: return freed; } +struct shrinker *shrinker_alloc(unsigned int flags, const char *fmt, ...) +{ + struct shrinker *shrinker; + unsigned int size; + va_list ap; + int err; + + shrinker = kzalloc(sizeof(struct shrinker), GFP_KERNEL); + if (!shrinker) + return NULL; + + va_start(ap, fmt); + err = shrinker_debugfs_name_alloc(shrinker, fmt, ap); + va_end(ap); + if (err) + goto err_name; + + shrinker->flags = flags | SHRINKER_ALLOCATED; + shrinker->seeks = DEFAULT_SEEKS; + + if (flags & SHRINKER_MEMCG_AWARE) { + err = prealloc_memcg_shrinker(shrinker); + if (err == -ENOSYS) { + /* Memcg is not supported, fallback to non-memcg-aware shrinker. */ + shrinker->flags &= ~SHRINKER_MEMCG_AWARE; + goto non_memcg; + } + + if (err) + goto err_flags; + + return shrinker; + } + +non_memcg: + /* + * The nr_deferred is available on per memcg level for memcg aware + * shrinkers, so only allocate nr_deferred in the following cases: + * - non-memcg-aware shrinkers + * - !CONFIG_MEMCG + * - memcg is disabled by kernel command line + */ + size = sizeof(*shrinker->nr_deferred); + if (flags & SHRINKER_NUMA_AWARE) + size *= nr_node_ids; + + shrinker->nr_deferred = kzalloc(size, GFP_KERNEL); + if (!shrinker->nr_deferred) + goto err_flags; + + return shrinker; + +err_flags: + shrinker_debugfs_name_free(shrinker); +err_name: + kfree(shrinker); + return NULL; +} +EXPORT_SYMBOL_GPL(shrinker_alloc); + +void shrinker_register(struct shrinker *shrinker) +{ + if (unlikely(!(shrinker->flags & SHRINKER_ALLOCATED))) { + pr_warn("Must use shrinker_alloc() to dynamically allocate the shrinker"); + return; + } + + down_write(&shrinker_rwsem); + list_add_tail(&shrinker->list, &shrinker_list); + shrinker->flags |= SHRINKER_REGISTERED; + shrinker_debugfs_add(shrinker); + up_write(&shrinker_rwsem); +} +EXPORT_SYMBOL_GPL(shrinker_register); + +void shrinker_free(struct shrinker *shrinker) +{ + struct dentry *debugfs_entry = NULL; + int debugfs_id; + + if (!shrinker) + return; + + down_write(&shrinker_rwsem); + if (shrinker->flags & SHRINKER_REGISTERED) { + list_del(&shrinker->list); + debugfs_entry = shrinker_debugfs_detach(shrinker, &debugfs_id); + shrinker->flags &= ~SHRINKER_REGISTERED; + } + + shrinker_debugfs_name_free(shrinker); + + if (shrinker->flags & SHRINKER_MEMCG_AWARE) + unregister_memcg_shrinker(shrinker); + up_write(&shrinker_rwsem); + + if (debugfs_entry) + shrinker_debugfs_remove(debugfs_entry, debugfs_id); + + kfree(shrinker->nr_deferred); + shrinker->nr_deferred = NULL; + + kfree(shrinker); +} +EXPORT_SYMBOL_GPL(shrinker_free); + /* * Add a shrinker callback to be called from the vm. */ diff --git a/mm/shrinker_debug.c b/mm/shrinker_debug.c index e4ce509f619e..24aebe7c24cc 100644 --- a/mm/shrinker_debug.c +++ b/mm/shrinker_debug.c @@ -241,9 +241,6 @@ struct dentry *shrinker_debugfs_detach(struct shrinker *shrinker, lockdep_assert_held(&shrinker_rwsem); - kfree_const(shrinker->name); - shrinker->name = NULL; - *debugfs_id = entry ? shrinker->debugfs_id : -1; shrinker->debugfs_entry = NULL; -- cgit v1.2.3 From 4b403dfa8ea8dfbc9d317b25a0433c1ac6765f7f Mon Sep 17 00:00:00 2001 From: Qi Zheng Date: Mon, 11 Sep 2023 17:44:30 +0800 Subject: jbd2,ext4: dynamically allocate the jbd2-journal shrinker In preparation for implementing lockless slab shrink, use new APIs to dynamically allocate the jbd2-journal shrinker, so that it can be freed asynchronously via RCU. Then it doesn't need to wait for RCU read-side critical section when releasing the struct journal_s. Link: https://lkml.kernel.org/r/20230911094444.68966-32-zhengqi.arch@bytedance.com Signed-off-by: Qi Zheng Reviewed-by: Muchun Song Acked-by: Jan Kara Cc: "Theodore Ts'o" Cc: Abhinav Kumar Cc: Alasdair Kergon Cc: Alexander Viro Cc: Alyssa Rosenzweig Cc: Andreas Dilger Cc: Andreas Gruenbacher Cc: Anna Schumaker Cc: Arnd Bergmann Cc: Bob Peterson Cc: Borislav Petkov Cc: Carlos Llamas Cc: Chandan Babu R Cc: Chao Yu Cc: Chris Mason Cc: Christian Brauner Cc: Christian Koenig Cc: Chuck Lever Cc: Coly Li Cc: Dai Ngo Cc: Daniel Vetter Cc: Daniel Vetter Cc: "Darrick J. Wong" Cc: Dave Chinner Cc: Dave Hansen Cc: David Airlie Cc: David Hildenbrand Cc: David Sterba Cc: Dmitry Baryshkov Cc: Gao Xiang Cc: Greg Kroah-Hartman Cc: Huang Rui Cc: Ingo Molnar Cc: Jaegeuk Kim Cc: Jani Nikula Cc: Jason Wang Cc: Jeff Layton Cc: Jeffle Xu Cc: Joel Fernandes (Google) Cc: Joonas Lahtinen Cc: Josef Bacik Cc: Juergen Gross Cc: Kent Overstreet Cc: Kirill Tkhai Cc: Marijn Suijten Cc: "Michael S. Tsirkin" Cc: Mike Snitzer Cc: Minchan Kim Cc: Muchun Song Cc: Nadav Amit Cc: Neil Brown Cc: Oleksandr Tyshchenko Cc: Olga Kornievskaia Cc: Paul E. McKenney Cc: Richard Weinberger Cc: Rob Clark Cc: Rob Herring Cc: Rodrigo Vivi Cc: Roman Gushchin Cc: Sean Paul Cc: Sergey Senozhatsky Cc: Song Liu Cc: Stefano Stabellini Cc: Steven Price Cc: Thomas Gleixner Cc: Tomeu Vizoso Cc: Tom Talpey Cc: Trond Myklebust Cc: Tvrtko Ursulin Cc: Vlastimil Babka Cc: Xuan Zhuo Cc: Yue Hu Signed-off-by: Andrew Morton --- fs/jbd2/journal.c | 29 ++++++++++++++++++----------- include/linux/jbd2.h | 2 +- 2 files changed, 19 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 30dec2bd2ecc..ed53188472f9 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -1290,7 +1290,7 @@ static int jbd2_min_tag_size(void) static unsigned long jbd2_journal_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) { - journal_t *journal = container_of(shrink, journal_t, j_shrinker); + journal_t *journal = shrink->private_data; unsigned long nr_to_scan = sc->nr_to_scan; unsigned long nr_shrunk; unsigned long count; @@ -1316,7 +1316,7 @@ static unsigned long jbd2_journal_shrink_scan(struct shrinker *shrink, static unsigned long jbd2_journal_shrink_count(struct shrinker *shrink, struct shrink_control *sc) { - journal_t *journal = container_of(shrink, journal_t, j_shrinker); + journal_t *journal = shrink->private_data; unsigned long count; count = percpu_counter_read_positive(&journal->j_checkpoint_jh_count); @@ -1588,14 +1588,21 @@ static journal_t *journal_init_common(struct block_device *bdev, goto err_cleanup; journal->j_shrink_transaction = NULL; - journal->j_shrinker.scan_objects = jbd2_journal_shrink_scan; - journal->j_shrinker.count_objects = jbd2_journal_shrink_count; - journal->j_shrinker.seeks = DEFAULT_SEEKS; - journal->j_shrinker.batch = journal->j_max_transaction_buffers; - err = register_shrinker(&journal->j_shrinker, "jbd2-journal:(%u:%u)", - MAJOR(bdev->bd_dev), MINOR(bdev->bd_dev)); - if (err) + + journal->j_shrinker = shrinker_alloc(0, "jbd2-journal:(%u:%u)", + MAJOR(bdev->bd_dev), + MINOR(bdev->bd_dev)); + if (!journal->j_shrinker) { + err = -ENOMEM; goto err_cleanup; + } + + journal->j_shrinker->scan_objects = jbd2_journal_shrink_scan; + journal->j_shrinker->count_objects = jbd2_journal_shrink_count; + journal->j_shrinker->batch = journal->j_max_transaction_buffers; + journal->j_shrinker->private_data = journal; + + shrinker_register(journal->j_shrinker); return journal; @@ -2172,9 +2179,9 @@ int jbd2_journal_destroy(journal_t *journal) brelse(journal->j_sb_buffer); } - if (journal->j_shrinker.flags & SHRINKER_REGISTERED) { + if (journal->j_shrinker) { percpu_counter_destroy(&journal->j_checkpoint_jh_count); - unregister_shrinker(&journal->j_shrinker); + shrinker_free(journal->j_shrinker); } if (journal->j_proc_entry) jbd2_stats_proc_exit(journal); diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 52772c826c86..6dcbb4eb80fb 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -886,7 +886,7 @@ struct journal_s * Journal head shrinker, reclaim buffer's journal head which * has been written back. */ - struct shrinker j_shrinker; + struct shrinker *j_shrinker; /** * @j_checkpoint_jh_count: -- cgit v1.2.3 From 1720f5dd8d3af34d6023fb9e8c35e5e60e8b6643 Mon Sep 17 00:00:00 2001 From: Qi Zheng Date: Mon, 11 Sep 2023 17:44:37 +0800 Subject: fs: super: dynamically allocate the s_shrink In preparation for implementing lockless slab shrink, use new APIs to dynamically allocate the s_shrink, so that it can be freed asynchronously via RCU. Then it doesn't need to wait for RCU read-side critical section when releasing the struct super_block. Link: https://lkml.kernel.org/r/20230911094444.68966-39-zhengqi.arch@bytedance.com Signed-off-by: Qi Zheng Reviewed-by: Muchun Song Acked-by: David Sterba Cc: Chris Mason Cc: Josef Bacik Cc: Alexander Viro Cc: Christian Brauner Cc: Abhinav Kumar Cc: Alasdair Kergon Cc: Alyssa Rosenzweig Cc: Andreas Dilger Cc: Andreas Gruenbacher Cc: Anna Schumaker Cc: Arnd Bergmann Cc: Bob Peterson Cc: Borislav Petkov Cc: Carlos Llamas Cc: Chandan Babu R Cc: Chao Yu Cc: Christian Koenig Cc: Chuck Lever Cc: Coly Li Cc: Dai Ngo Cc: Daniel Vetter Cc: Daniel Vetter Cc: "Darrick J. Wong" Cc: Dave Chinner Cc: Dave Hansen Cc: David Airlie Cc: David Hildenbrand Cc: Dmitry Baryshkov Cc: Gao Xiang Cc: Greg Kroah-Hartman Cc: Huang Rui Cc: Ingo Molnar Cc: Jaegeuk Kim Cc: Jani Nikula Cc: Jan Kara Cc: Jason Wang Cc: Jeff Layton Cc: Jeffle Xu Cc: Joel Fernandes (Google) Cc: Joonas Lahtinen Cc: Juergen Gross Cc: Kent Overstreet Cc: Kirill Tkhai Cc: Marijn Suijten Cc: "Michael S. Tsirkin" Cc: Mike Snitzer Cc: Minchan Kim Cc: Muchun Song Cc: Nadav Amit Cc: Neil Brown Cc: Oleksandr Tyshchenko Cc: Olga Kornievskaia Cc: Paul E. McKenney Cc: Richard Weinberger Cc: Rob Clark Cc: Rob Herring Cc: Rodrigo Vivi Cc: Roman Gushchin Cc: Sean Paul Cc: Sergey Senozhatsky Cc: Song Liu Cc: Stefano Stabellini Cc: Steven Price Cc: "Theodore Ts'o" Cc: Thomas Gleixner Cc: Tomeu Vizoso Cc: Tom Talpey Cc: Trond Myklebust Cc: Tvrtko Ursulin Cc: Vlastimil Babka Cc: Xuan Zhuo Cc: Yue Hu Signed-off-by: Andrew Morton --- fs/btrfs/super.c | 2 +- fs/kernfs/mount.c | 2 +- fs/proc/root.c | 2 +- fs/super.c | 33 ++++++++++++++++++--------------- include/linux/fs.h | 2 +- 5 files changed, 22 insertions(+), 19 deletions(-) (limited to 'include/linux') diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 1a093ec0f7e3..b1798bed68f2 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1519,7 +1519,7 @@ static struct dentry *btrfs_mount_root(struct file_system_type *fs_type, error = -EBUSY; } else { snprintf(s->s_id, sizeof(s->s_id), "%pg", bdev); - shrinker_debugfs_rename(&s->s_shrink, "sb-%s:%s", fs_type->name, + shrinker_debugfs_rename(s->s_shrink, "sb-%s:%s", fs_type->name, s->s_id); btrfs_sb(s)->bdev_holder = fs_type; error = btrfs_fill_super(s, fs_devices, data); diff --git a/fs/kernfs/mount.c b/fs/kernfs/mount.c index c4bf26142eec..79b96e74a8a0 100644 --- a/fs/kernfs/mount.c +++ b/fs/kernfs/mount.c @@ -265,7 +265,7 @@ static int kernfs_fill_super(struct super_block *sb, struct kernfs_fs_context *k sb->s_time_gran = 1; /* sysfs dentries and inodes don't require IO to create */ - sb->s_shrink.seeks = 0; + sb->s_shrink->seeks = 0; /* get root inode, initialize and unlock it */ down_read(&kf_root->kernfs_rwsem); diff --git a/fs/proc/root.c b/fs/proc/root.c index 9191248f2dac..b55dbc70287b 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c @@ -188,7 +188,7 @@ static int proc_fill_super(struct super_block *s, struct fs_context *fc) s->s_stack_depth = FILESYSTEM_MAX_STACK_DEPTH; /* procfs dentries and inodes don't require IO to create */ - s->s_shrink.seeks = 0; + s->s_shrink->seeks = 0; pde_get(&proc_root); root_inode = proc_get_inode(s, &proc_root); diff --git a/fs/super.c b/fs/super.c index 2d762ce67f6e..adadf6689611 100644 --- a/fs/super.c +++ b/fs/super.c @@ -191,7 +191,7 @@ static unsigned long super_cache_scan(struct shrinker *shrink, long dentries; long inodes; - sb = container_of(shrink, struct super_block, s_shrink); + sb = shrink->private_data; /* * Deadlock avoidance. We may hold various FS locks, and we don't want @@ -244,7 +244,7 @@ static unsigned long super_cache_count(struct shrinker *shrink, struct super_block *sb; long total_objects = 0; - sb = container_of(shrink, struct super_block, s_shrink); + sb = shrink->private_data; /* * We don't call super_trylock_shared() here as it is a scalability @@ -306,7 +306,7 @@ static void destroy_unused_super(struct super_block *s) security_sb_free(s); put_user_ns(s->s_user_ns); kfree(s->s_subtype); - free_prealloced_shrinker(&s->s_shrink); + shrinker_free(s->s_shrink); /* no delays needed */ destroy_super_work(&s->destroy_work); } @@ -383,16 +383,19 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags, s->s_time_min = TIME64_MIN; s->s_time_max = TIME64_MAX; - s->s_shrink.seeks = DEFAULT_SEEKS; - s->s_shrink.scan_objects = super_cache_scan; - s->s_shrink.count_objects = super_cache_count; - s->s_shrink.batch = 1024; - s->s_shrink.flags = SHRINKER_NUMA_AWARE | SHRINKER_MEMCG_AWARE; - if (prealloc_shrinker(&s->s_shrink, "sb-%s", type->name)) + s->s_shrink = shrinker_alloc(SHRINKER_NUMA_AWARE | SHRINKER_MEMCG_AWARE, + "sb-%s", type->name); + if (!s->s_shrink) goto fail; - if (list_lru_init_memcg(&s->s_dentry_lru, &s->s_shrink)) + + s->s_shrink->scan_objects = super_cache_scan; + s->s_shrink->count_objects = super_cache_count; + s->s_shrink->batch = 1024; + s->s_shrink->private_data = s; + + if (list_lru_init_memcg(&s->s_dentry_lru, s->s_shrink)) goto fail; - if (list_lru_init_memcg(&s->s_inode_lru, &s->s_shrink)) + if (list_lru_init_memcg(&s->s_inode_lru, s->s_shrink)) goto fail; return s; @@ -477,7 +480,7 @@ void deactivate_locked_super(struct super_block *s) { struct file_system_type *fs = s->s_type; if (atomic_dec_and_test(&s->s_active)) { - unregister_shrinker(&s->s_shrink); + shrinker_free(s->s_shrink); fs->kill_sb(s); kill_super_notify(s); @@ -818,7 +821,7 @@ retry: hlist_add_head(&s->s_instances, &s->s_type->fs_supers); spin_unlock(&sb_lock); get_filesystem(s->s_type); - register_shrinker_prepared(&s->s_shrink); + shrinker_register(s->s_shrink); return s; share_extant_sb: @@ -901,7 +904,7 @@ retry: hlist_add_head(&s->s_instances, &type->fs_supers); spin_unlock(&sb_lock); get_filesystem(type); - register_shrinker_prepared(&s->s_shrink); + shrinker_register(s->s_shrink); return s; } EXPORT_SYMBOL(sget); @@ -1522,7 +1525,7 @@ int setup_bdev_super(struct super_block *sb, int sb_flags, mutex_unlock(&bdev->bd_fsfreeze_mutex); snprintf(sb->s_id, sizeof(sb->s_id), "%pg", bdev); - shrinker_debugfs_rename(&sb->s_shrink, "sb-%s:%s", sb->s_type->name, + shrinker_debugfs_rename(sb->s_shrink, "sb-%s:%s", sb->s_type->name, sb->s_id); sb_set_blocksize(sb, block_size(bdev)); return 0; diff --git a/include/linux/fs.h b/include/linux/fs.h index b528f063e8ff..fd539c9fef8e 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1265,7 +1265,7 @@ struct super_block { const struct dentry_operations *s_d_op; /* default d_op for dentries */ - struct shrinker s_shrink; /* per-sb shrinker handle */ + struct shrinker *s_shrink; /* per-sb shrinker handle */ /* Number of inodes with nlink == 0 but still referenced */ atomic_long_t s_remove_count; -- cgit v1.2.3 From f2383e01507eeee8a1c1283d61a117a97d6c4ebe Mon Sep 17 00:00:00 2001 From: Qi Zheng Date: Mon, 11 Sep 2023 17:44:38 +0800 Subject: mm: shrinker: remove old APIs Now no users are using the old APIs, just remove them. Link: https://lkml.kernel.org/r/20230911094444.68966-40-zhengqi.arch@bytedance.com Signed-off-by: Qi Zheng Reviewed-by: Muchun Song Cc: Abhinav Kumar Cc: Alasdair Kergon Cc: Alexander Viro Cc: Alyssa Rosenzweig Cc: Andreas Dilger Cc: Andreas Gruenbacher Cc: Anna Schumaker Cc: Arnd Bergmann Cc: Bob Peterson Cc: Borislav Petkov Cc: Carlos Llamas Cc: Chandan Babu R Cc: Chao Yu Cc: Chris Mason Cc: Christian Brauner Cc: Christian Koenig Cc: Chuck Lever Cc: Coly Li Cc: Dai Ngo Cc: Daniel Vetter Cc: Daniel Vetter Cc: "Darrick J. Wong" Cc: Dave Chinner Cc: Dave Hansen Cc: David Airlie Cc: David Hildenbrand Cc: David Sterba Cc: Dmitry Baryshkov Cc: Gao Xiang Cc: Greg Kroah-Hartman Cc: Huang Rui Cc: Ingo Molnar Cc: Jaegeuk Kim Cc: Jani Nikula Cc: Jan Kara Cc: Jason Wang Cc: Jeff Layton Cc: Jeffle Xu Cc: Joel Fernandes (Google) Cc: Joonas Lahtinen Cc: Josef Bacik Cc: Juergen Gross Cc: Kent Overstreet Cc: Kirill Tkhai Cc: Marijn Suijten Cc: "Michael S. Tsirkin" Cc: Mike Snitzer Cc: Minchan Kim Cc: Muchun Song Cc: Nadav Amit Cc: Neil Brown Cc: Oleksandr Tyshchenko Cc: Olga Kornievskaia Cc: Paul E. McKenney Cc: Richard Weinberger Cc: Rob Clark Cc: Rob Herring Cc: Rodrigo Vivi Cc: Roman Gushchin Cc: Sean Paul Cc: Sergey Senozhatsky Cc: Song Liu Cc: Stefano Stabellini Cc: Steven Price Cc: "Theodore Ts'o" Cc: Thomas Gleixner Cc: Tomeu Vizoso Cc: Tom Talpey Cc: Trond Myklebust Cc: Tvrtko Ursulin Cc: Vlastimil Babka Cc: Xuan Zhuo Cc: Yue Hu Signed-off-by: Andrew Morton --- include/linux/shrinker.h | 8 --- mm/shrinker.c | 143 ----------------------------------------------- 2 files changed, 151 deletions(-) (limited to 'include/linux') diff --git a/include/linux/shrinker.h b/include/linux/shrinker.h index f4a5249f00b2..1d3899f37229 100644 --- a/include/linux/shrinker.h +++ b/include/linux/shrinker.h @@ -105,14 +105,6 @@ struct shrinker *shrinker_alloc(unsigned int flags, const char *fmt, ...); void shrinker_register(struct shrinker *shrinker); void shrinker_free(struct shrinker *shrinker); -extern int __printf(2, 3) prealloc_shrinker(struct shrinker *shrinker, - const char *fmt, ...); -extern void register_shrinker_prepared(struct shrinker *shrinker); -extern int __printf(2, 3) register_shrinker(struct shrinker *shrinker, - const char *fmt, ...); -extern void unregister_shrinker(struct shrinker *shrinker); -extern void free_prealloced_shrinker(struct shrinker *shrinker); - #ifdef CONFIG_SHRINKER_DEBUG extern int __printf(2, 3) shrinker_debugfs_rename(struct shrinker *shrinker, const char *fmt, ...); diff --git a/mm/shrinker.c b/mm/shrinker.c index d1032a4d5684..736fa67e8454 100644 --- a/mm/shrinker.c +++ b/mm/shrinker.c @@ -655,146 +655,3 @@ void shrinker_free(struct shrinker *shrinker) kfree(shrinker); } EXPORT_SYMBOL_GPL(shrinker_free); - -/* - * Add a shrinker callback to be called from the vm. - */ -static int __prealloc_shrinker(struct shrinker *shrinker) -{ - unsigned int size; - int err; - - if (shrinker->flags & SHRINKER_MEMCG_AWARE) { - err = prealloc_memcg_shrinker(shrinker); - if (err != -ENOSYS) - return err; - - shrinker->flags &= ~SHRINKER_MEMCG_AWARE; - } - - size = sizeof(*shrinker->nr_deferred); - if (shrinker->flags & SHRINKER_NUMA_AWARE) - size *= nr_node_ids; - - shrinker->nr_deferred = kzalloc(size, GFP_KERNEL); - if (!shrinker->nr_deferred) - return -ENOMEM; - - return 0; -} - -#ifdef CONFIG_SHRINKER_DEBUG -int prealloc_shrinker(struct shrinker *shrinker, const char *fmt, ...) -{ - va_list ap; - int err; - - va_start(ap, fmt); - shrinker->name = kvasprintf_const(GFP_KERNEL, fmt, ap); - va_end(ap); - if (!shrinker->name) - return -ENOMEM; - - err = __prealloc_shrinker(shrinker); - if (err) { - kfree_const(shrinker->name); - shrinker->name = NULL; - } - - return err; -} -#else -int prealloc_shrinker(struct shrinker *shrinker, const char *fmt, ...) -{ - return __prealloc_shrinker(shrinker); -} -#endif - -void free_prealloced_shrinker(struct shrinker *shrinker) -{ -#ifdef CONFIG_SHRINKER_DEBUG - kfree_const(shrinker->name); - shrinker->name = NULL; -#endif - if (shrinker->flags & SHRINKER_MEMCG_AWARE) { - down_write(&shrinker_rwsem); - unregister_memcg_shrinker(shrinker); - up_write(&shrinker_rwsem); - return; - } - - kfree(shrinker->nr_deferred); - shrinker->nr_deferred = NULL; -} - -void register_shrinker_prepared(struct shrinker *shrinker) -{ - down_write(&shrinker_rwsem); - list_add_tail(&shrinker->list, &shrinker_list); - shrinker->flags |= SHRINKER_REGISTERED; - shrinker_debugfs_add(shrinker); - up_write(&shrinker_rwsem); -} - -static int __register_shrinker(struct shrinker *shrinker) -{ - int err = __prealloc_shrinker(shrinker); - - if (err) - return err; - register_shrinker_prepared(shrinker); - return 0; -} - -#ifdef CONFIG_SHRINKER_DEBUG -int register_shrinker(struct shrinker *shrinker, const char *fmt, ...) -{ - va_list ap; - int err; - - va_start(ap, fmt); - shrinker->name = kvasprintf_const(GFP_KERNEL, fmt, ap); - va_end(ap); - if (!shrinker->name) - return -ENOMEM; - - err = __register_shrinker(shrinker); - if (err) { - kfree_const(shrinker->name); - shrinker->name = NULL; - } - return err; -} -#else -int register_shrinker(struct shrinker *shrinker, const char *fmt, ...) -{ - return __register_shrinker(shrinker); -} -#endif -EXPORT_SYMBOL(register_shrinker); - -/* - * Remove one - */ -void unregister_shrinker(struct shrinker *shrinker) -{ - struct dentry *debugfs_entry; - int debugfs_id; - - if (!(shrinker->flags & SHRINKER_REGISTERED)) - return; - - down_write(&shrinker_rwsem); - list_del(&shrinker->list); - shrinker->flags &= ~SHRINKER_REGISTERED; - if (shrinker->flags & SHRINKER_MEMCG_AWARE) - unregister_memcg_shrinker(shrinker); - debugfs_entry = shrinker_debugfs_detach(shrinker, &debugfs_id); - up_write(&shrinker_rwsem); - - shrinker_debugfs_remove(debugfs_entry, debugfs_id); - - kfree(shrinker->nr_deferred); - shrinker->nr_deferred = NULL; -} -EXPORT_SYMBOL(unregister_shrinker); -- cgit v1.2.3 From 307bececcd1205bcb67a3c0d53a69db237ccc9d4 Mon Sep 17 00:00:00 2001 From: Qi Zheng Date: Mon, 11 Sep 2023 17:44:39 +0800 Subject: mm: shrinker: add a secondary array for shrinker_info::{map, nr_deferred} Currently, we maintain two linear arrays per node per memcg, which are shrinker_info::map and shrinker_info::nr_deferred. And we need to resize them when the shrinker_nr_max is exceeded, that is, allocate a new array, and then copy the old array to the new array, and finally free the old array by RCU. For shrinker_info::map, we do set_bit() under the RCU lock, so we may set the value into the old map which is about to be freed. This may cause the value set to be lost. The current solution is not to copy the old map when resizing, but to set all the corresponding bits in the new map to 1. This solves the data loss problem, but bring the overhead of more pointless loops while doing memcg slab shrink. For shrinker_info::nr_deferred, we will only modify it under the read lock of shrinker_rwsem, so it will not run concurrently with the resizing. But after we make memcg slab shrink lockless, there will be the same data loss problem as shrinker_info::map, and we can't work around it like the map. For such resizable arrays, the most straightforward idea is to change it to xarray, like we did for list_lru [1]. We need to do xa_store() in the list_lru_add()-->set_shrinker_bit(), but this will cause memory allocation, and the list_lru_add() doesn't accept failure. A possible solution is to pre-allocate, but the location of pre-allocation is not well determined (such as deferred_split_shrinker case). Therefore, this commit chooses to introduce the following secondary array for shrinker_info::{map, nr_deferred}: +---------------+--------+--------+-----+ | shrinker_info | unit 0 | unit 1 | ... | (secondary array) +---------------+--------+--------+-----+ | v +---------------+-----+ | nr_deferred[] | map | (leaf array) +---------------+-----+ (shrinker_info_unit) The leaf array is never freed unless the memcg is destroyed. The secondary array will be resized every time the shrinker id exceeds shrinker_nr_max. So the shrinker_info_unit can be indexed from both the old and the new shrinker_info->unit[x]. Then even if we get the old secondary array under the RCU lock, the found map and nr_deferred are also true, so the updated nr_deferred and map will not be lost. [1]. https://lore.kernel.org/all/20220228122126.37293-13-songmuchun@bytedance.com/ [zhengqi.arch@bytedance.com: unlock the &shrinker_rwsem before the call to free_shrinker_info()] Link: https://lkml.kernel.org/r/20230928141517.12164-1-zhengqi.arch@bytedance.com Link: https://lkml.kernel.org/r/20230911094444.68966-41-zhengqi.arch@bytedance.com Signed-off-by: Qi Zheng Reviewed-by: Muchun Song Cc: Abhinav Kumar Cc: Alasdair Kergon Cc: Alexander Viro Cc: Alyssa Rosenzweig Cc: Andreas Dilger Cc: Andreas Gruenbacher Cc: Anna Schumaker Cc: Arnd Bergmann Cc: Bob Peterson Cc: Borislav Petkov Cc: Carlos Llamas Cc: Chandan Babu R Cc: Chao Yu Cc: Chris Mason Cc: Christian Brauner Cc: Christian Koenig Cc: Chuck Lever Cc: Coly Li Cc: Dai Ngo Cc: Daniel Vetter Cc: Daniel Vetter Cc: "Darrick J. Wong" Cc: Dave Chinner Cc: Dave Hansen Cc: David Airlie Cc: David Hildenbrand Cc: David Sterba Cc: Dmitry Baryshkov Cc: Gao Xiang Cc: Greg Kroah-Hartman Cc: Huang Rui Cc: Ingo Molnar Cc: Jaegeuk Kim Cc: Jani Nikula Cc: Jan Kara Cc: Jason Wang Cc: Jeff Layton Cc: Jeffle Xu Cc: Joel Fernandes (Google) Cc: Joonas Lahtinen Cc: Josef Bacik Cc: Juergen Gross Cc: Kent Overstreet Cc: Kirill Tkhai Cc: Marijn Suijten Cc: "Michael S. Tsirkin" Cc: Mike Snitzer Cc: Minchan Kim Cc: Muchun Song Cc: Nadav Amit Cc: Neil Brown Cc: Oleksandr Tyshchenko Cc: Olga Kornievskaia Cc: Paul E. McKenney Cc: Richard Weinberger Cc: Rob Clark Cc: Rob Herring Cc: Rodrigo Vivi Cc: Roman Gushchin Cc: Sean Paul Cc: Sergey Senozhatsky Cc: Song Liu Cc: Stefano Stabellini Cc: Steven Price Cc: "Theodore Ts'o" Cc: Thomas Gleixner Cc: Tomeu Vizoso Cc: Tom Talpey Cc: Trond Myklebust Cc: Tvrtko Ursulin Cc: Vlastimil Babka Cc: Xuan Zhuo Cc: Yue Hu Signed-off-by: Andrew Morton --- include/linux/memcontrol.h | 12 +-- include/linux/shrinker.h | 17 +++ mm/shrinker.c | 250 ++++++++++++++++++++++++++++----------------- 3 files changed, 172 insertions(+), 107 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index e4e24da16d2c..031102ac9311 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -21,6 +21,7 @@ #include #include #include +#include struct mem_cgroup; struct obj_cgroup; @@ -88,17 +89,6 @@ struct mem_cgroup_reclaim_iter { unsigned int generation; }; -/* - * Bitmap and deferred work of shrinker::id corresponding to memcg-aware - * shrinkers, which have elements charged to this memcg. - */ -struct shrinker_info { - struct rcu_head rcu; - atomic_long_t *nr_deferred; - unsigned long *map; - int map_nr_max; -}; - struct lruvec_stats_percpu { /* Local (CPU and cgroup) state */ long state[NR_VM_NODE_STAT_ITEMS]; diff --git a/include/linux/shrinker.h b/include/linux/shrinker.h index 1d3899f37229..ba5ac82b5dbd 100644 --- a/include/linux/shrinker.h +++ b/include/linux/shrinker.h @@ -5,6 +5,23 @@ #include #include +#define SHRINKER_UNIT_BITS BITS_PER_LONG + +/* + * Bitmap and deferred work of shrinker::id corresponding to memcg-aware + * shrinkers, which have elements charged to the memcg. + */ +struct shrinker_info_unit { + atomic_long_t nr_deferred[SHRINKER_UNIT_BITS]; + DECLARE_BITMAP(map, SHRINKER_UNIT_BITS); +}; + +struct shrinker_info { + struct rcu_head rcu; + int map_nr_max; + struct shrinker_info_unit *unit[]; +}; + /* * This struct is used to pass information from page reclaim to the shrinkers. * We consolidate the values for easier extension later. diff --git a/mm/shrinker.c b/mm/shrinker.c index 736fa67e8454..e9644cda80b5 100644 --- a/mm/shrinker.c +++ b/mm/shrinker.c @@ -12,15 +12,50 @@ DECLARE_RWSEM(shrinker_rwsem); #ifdef CONFIG_MEMCG static int shrinker_nr_max; -/* The shrinker_info is expanded in a batch of BITS_PER_LONG */ -static inline int shrinker_map_size(int nr_items) +static inline int shrinker_unit_size(int nr_items) { - return (DIV_ROUND_UP(nr_items, BITS_PER_LONG) * sizeof(unsigned long)); + return (DIV_ROUND_UP(nr_items, SHRINKER_UNIT_BITS) * sizeof(struct shrinker_info_unit *)); } -static inline int shrinker_defer_size(int nr_items) +static inline void shrinker_unit_free(struct shrinker_info *info, int start) { - return (round_up(nr_items, BITS_PER_LONG) * sizeof(atomic_long_t)); + struct shrinker_info_unit **unit; + int nr, i; + + if (!info) + return; + + unit = info->unit; + nr = DIV_ROUND_UP(info->map_nr_max, SHRINKER_UNIT_BITS); + + for (i = start; i < nr; i++) { + if (!unit[i]) + break; + + kfree(unit[i]); + unit[i] = NULL; + } +} + +static inline int shrinker_unit_alloc(struct shrinker_info *new, + struct shrinker_info *old, int nid) +{ + struct shrinker_info_unit *unit; + int nr = DIV_ROUND_UP(new->map_nr_max, SHRINKER_UNIT_BITS); + int start = old ? DIV_ROUND_UP(old->map_nr_max, SHRINKER_UNIT_BITS) : 0; + int i; + + for (i = start; i < nr; i++) { + unit = kzalloc_node(sizeof(*unit), GFP_KERNEL, nid); + if (!unit) { + shrinker_unit_free(new, start); + return -ENOMEM; + } + + new->unit[i] = unit; + } + + return 0; } void free_shrinker_info(struct mem_cgroup *memcg) @@ -32,6 +67,7 @@ void free_shrinker_info(struct mem_cgroup *memcg) for_each_node(nid) { pn = memcg->nodeinfo[nid]; info = rcu_dereference_protected(pn->shrinker_info, true); + shrinker_unit_free(info, 0); kvfree(info); rcu_assign_pointer(pn->shrinker_info, NULL); } @@ -40,28 +76,28 @@ void free_shrinker_info(struct mem_cgroup *memcg) int alloc_shrinker_info(struct mem_cgroup *memcg) { struct shrinker_info *info; - int nid, size, ret = 0; - int map_size, defer_size = 0; + int nid, ret = 0; + int array_size = 0; down_write(&shrinker_rwsem); - map_size = shrinker_map_size(shrinker_nr_max); - defer_size = shrinker_defer_size(shrinker_nr_max); - size = map_size + defer_size; + array_size = shrinker_unit_size(shrinker_nr_max); for_each_node(nid) { - info = kvzalloc_node(sizeof(*info) + size, GFP_KERNEL, nid); - if (!info) { - free_shrinker_info(memcg); - ret = -ENOMEM; - break; - } - info->nr_deferred = (atomic_long_t *)(info + 1); - info->map = (void *)info->nr_deferred + defer_size; + info = kvzalloc_node(sizeof(*info) + array_size, GFP_KERNEL, nid); + if (!info) + goto err; info->map_nr_max = shrinker_nr_max; + if (shrinker_unit_alloc(info, NULL, nid)) + goto err; rcu_assign_pointer(memcg->nodeinfo[nid]->shrinker_info, info); } up_write(&shrinker_rwsem); return ret; + +err: + up_write(&shrinker_rwsem); + free_shrinker_info(memcg); + return -ENOMEM; } static struct shrinker_info *shrinker_info_protected(struct mem_cgroup *memcg, @@ -71,15 +107,12 @@ static struct shrinker_info *shrinker_info_protected(struct mem_cgroup *memcg, lockdep_is_held(&shrinker_rwsem)); } -static int expand_one_shrinker_info(struct mem_cgroup *memcg, - int map_size, int defer_size, - int old_map_size, int old_defer_size, - int new_nr_max) +static int expand_one_shrinker_info(struct mem_cgroup *memcg, int new_size, + int old_size, int new_nr_max) { struct shrinker_info *new, *old; struct mem_cgroup_per_node *pn; int nid; - int size = map_size + defer_size; for_each_node(nid) { pn = memcg->nodeinfo[nid]; @@ -92,21 +125,17 @@ static int expand_one_shrinker_info(struct mem_cgroup *memcg, if (new_nr_max <= old->map_nr_max) continue; - new = kvmalloc_node(sizeof(*new) + size, GFP_KERNEL, nid); + new = kvmalloc_node(sizeof(*new) + new_size, GFP_KERNEL, nid); if (!new) return -ENOMEM; - new->nr_deferred = (atomic_long_t *)(new + 1); - new->map = (void *)new->nr_deferred + defer_size; new->map_nr_max = new_nr_max; - /* map: set all old bits, clear all new bits */ - memset(new->map, (int)0xff, old_map_size); - memset((void *)new->map + old_map_size, 0, map_size - old_map_size); - /* nr_deferred: copy old values, clear all new values */ - memcpy(new->nr_deferred, old->nr_deferred, old_defer_size); - memset((void *)new->nr_deferred + old_defer_size, 0, - defer_size - old_defer_size); + memcpy(new->unit, old->unit, old_size); + if (shrinker_unit_alloc(new, old, nid)) { + kvfree(new); + return -ENOMEM; + } rcu_assign_pointer(pn->shrinker_info, new); kvfree_rcu(old, rcu); @@ -118,9 +147,8 @@ static int expand_one_shrinker_info(struct mem_cgroup *memcg, static int expand_shrinker_info(int new_id) { int ret = 0; - int new_nr_max = round_up(new_id + 1, BITS_PER_LONG); - int map_size, defer_size = 0; - int old_map_size, old_defer_size = 0; + int new_nr_max = round_up(new_id + 1, SHRINKER_UNIT_BITS); + int new_size, old_size = 0; struct mem_cgroup *memcg; if (!root_mem_cgroup) @@ -128,15 +156,12 @@ static int expand_shrinker_info(int new_id) lockdep_assert_held(&shrinker_rwsem); - map_size = shrinker_map_size(new_nr_max); - defer_size = shrinker_defer_size(new_nr_max); - old_map_size = shrinker_map_size(shrinker_nr_max); - old_defer_size = shrinker_defer_size(shrinker_nr_max); + new_size = shrinker_unit_size(new_nr_max); + old_size = shrinker_unit_size(shrinker_nr_max); memcg = mem_cgroup_iter(NULL, NULL, NULL); do { - ret = expand_one_shrinker_info(memcg, map_size, defer_size, - old_map_size, old_defer_size, + ret = expand_one_shrinker_info(memcg, new_size, old_size, new_nr_max); if (ret) { mem_cgroup_iter_break(NULL, memcg); @@ -150,17 +175,34 @@ out: return ret; } +static inline int shrinker_id_to_index(int shrinker_id) +{ + return shrinker_id / SHRINKER_UNIT_BITS; +} + +static inline int shrinker_id_to_offset(int shrinker_id) +{ + return shrinker_id % SHRINKER_UNIT_BITS; +} + +static inline int calc_shrinker_id(int index, int offset) +{ + return index * SHRINKER_UNIT_BITS + offset; +} + void set_shrinker_bit(struct mem_cgroup *memcg, int nid, int shrinker_id) { if (shrinker_id >= 0 && memcg && !mem_cgroup_is_root(memcg)) { struct shrinker_info *info; + struct shrinker_info_unit *unit; rcu_read_lock(); info = rcu_dereference(memcg->nodeinfo[nid]->shrinker_info); + unit = info->unit[shrinker_id_to_index(shrinker_id)]; if (!WARN_ON_ONCE(shrinker_id >= info->map_nr_max)) { /* Pairs with smp mb in shrink_slab() */ smp_mb__before_atomic(); - set_bit(shrinker_id, info->map); + set_bit(shrinker_id_to_offset(shrinker_id), unit->map); } rcu_read_unlock(); } @@ -209,26 +251,31 @@ static long xchg_nr_deferred_memcg(int nid, struct shrinker *shrinker, struct mem_cgroup *memcg) { struct shrinker_info *info; + struct shrinker_info_unit *unit; info = shrinker_info_protected(memcg, nid); - return atomic_long_xchg(&info->nr_deferred[shrinker->id], 0); + unit = info->unit[shrinker_id_to_index(shrinker->id)]; + return atomic_long_xchg(&unit->nr_deferred[shrinker_id_to_offset(shrinker->id)], 0); } static long add_nr_deferred_memcg(long nr, int nid, struct shrinker *shrinker, struct mem_cgroup *memcg) { struct shrinker_info *info; + struct shrinker_info_unit *unit; info = shrinker_info_protected(memcg, nid); - return atomic_long_add_return(nr, &info->nr_deferred[shrinker->id]); + unit = info->unit[shrinker_id_to_index(shrinker->id)]; + return atomic_long_add_return(nr, &unit->nr_deferred[shrinker_id_to_offset(shrinker->id)]); } void reparent_shrinker_deferred(struct mem_cgroup *memcg) { - int i, nid; + int nid, index, offset; long nr; struct mem_cgroup *parent; struct shrinker_info *child_info, *parent_info; + struct shrinker_info_unit *child_unit, *parent_unit; parent = parent_mem_cgroup(memcg); if (!parent) @@ -239,9 +286,13 @@ void reparent_shrinker_deferred(struct mem_cgroup *memcg) for_each_node(nid) { child_info = shrinker_info_protected(memcg, nid); parent_info = shrinker_info_protected(parent, nid); - for (i = 0; i < child_info->map_nr_max; i++) { - nr = atomic_long_read(&child_info->nr_deferred[i]); - atomic_long_add(nr, &parent_info->nr_deferred[i]); + for (index = 0; index < shrinker_id_to_index(child_info->map_nr_max); index++) { + child_unit = child_info->unit[index]; + parent_unit = parent_info->unit[index]; + for (offset = 0; offset < SHRINKER_UNIT_BITS; offset++) { + nr = atomic_long_read(&child_unit->nr_deferred[offset]); + atomic_long_add(nr, &parent_unit->nr_deferred[offset]); + } } } up_read(&shrinker_rwsem); @@ -407,7 +458,7 @@ static unsigned long shrink_slab_memcg(gfp_t gfp_mask, int nid, { struct shrinker_info *info; unsigned long ret, freed = 0; - int i; + int offset, index = 0; if (!mem_cgroup_online(memcg)) return 0; @@ -419,56 +470,63 @@ static unsigned long shrink_slab_memcg(gfp_t gfp_mask, int nid, if (unlikely(!info)) goto unlock; - for_each_set_bit(i, info->map, info->map_nr_max) { - struct shrink_control sc = { - .gfp_mask = gfp_mask, - .nid = nid, - .memcg = memcg, - }; - struct shrinker *shrinker; + for (; index < shrinker_id_to_index(info->map_nr_max); index++) { + struct shrinker_info_unit *unit; - shrinker = idr_find(&shrinker_idr, i); - if (unlikely(!shrinker || !(shrinker->flags & SHRINKER_REGISTERED))) { - if (!shrinker) - clear_bit(i, info->map); - continue; - } + unit = info->unit[index]; - /* Call non-slab shrinkers even though kmem is disabled */ - if (!memcg_kmem_online() && - !(shrinker->flags & SHRINKER_NONSLAB)) - continue; + for_each_set_bit(offset, unit->map, SHRINKER_UNIT_BITS) { + struct shrink_control sc = { + .gfp_mask = gfp_mask, + .nid = nid, + .memcg = memcg, + }; + struct shrinker *shrinker; + int shrinker_id = calc_shrinker_id(index, offset); - ret = do_shrink_slab(&sc, shrinker, priority); - if (ret == SHRINK_EMPTY) { - clear_bit(i, info->map); - /* - * After the shrinker reported that it had no objects to - * free, but before we cleared the corresponding bit in - * the memcg shrinker map, a new object might have been - * added. To make sure, we have the bit set in this - * case, we invoke the shrinker one more time and reset - * the bit if it reports that it is not empty anymore. - * The memory barrier here pairs with the barrier in - * set_shrinker_bit(): - * - * list_lru_add() shrink_slab_memcg() - * list_add_tail() clear_bit() - * - * set_bit() do_shrink_slab() - */ - smp_mb__after_atomic(); - ret = do_shrink_slab(&sc, shrinker, priority); - if (ret == SHRINK_EMPTY) - ret = 0; - else - set_shrinker_bit(memcg, nid, i); - } - freed += ret; + shrinker = idr_find(&shrinker_idr, shrinker_id); + if (unlikely(!shrinker || !(shrinker->flags & SHRINKER_REGISTERED))) { + if (!shrinker) + clear_bit(offset, unit->map); + continue; + } - if (rwsem_is_contended(&shrinker_rwsem)) { - freed = freed ? : 1; - break; + /* Call non-slab shrinkers even though kmem is disabled */ + if (!memcg_kmem_online() && + !(shrinker->flags & SHRINKER_NONSLAB)) + continue; + + ret = do_shrink_slab(&sc, shrinker, priority); + if (ret == SHRINK_EMPTY) { + clear_bit(offset, unit->map); + /* + * After the shrinker reported that it had no objects to + * free, but before we cleared the corresponding bit in + * the memcg shrinker map, a new object might have been + * added. To make sure, we have the bit set in this + * case, we invoke the shrinker one more time and reset + * the bit if it reports that it is not empty anymore. + * The memory barrier here pairs with the barrier in + * set_shrinker_bit(): + * + * list_lru_add() shrink_slab_memcg() + * list_add_tail() clear_bit() + * + * set_bit() do_shrink_slab() + */ + smp_mb__after_atomic(); + ret = do_shrink_slab(&sc, shrinker, priority); + if (ret == SHRINK_EMPTY) + ret = 0; + else + set_shrinker_bit(memcg, nid, shrinker_id); + } + freed += ret; + + if (rwsem_is_contended(&shrinker_rwsem)) { + freed = freed ? : 1; + goto unlock; + } } } unlock: -- cgit v1.2.3 From ca1d36b823944f24b5755311e95883fb5fdb807b Mon Sep 17 00:00:00 2001 From: Qi Zheng Date: Mon, 11 Sep 2023 17:44:41 +0800 Subject: mm: shrinker: make global slab shrink lockless The shrinker_rwsem is a global read-write lock in shrinkers subsystem, which protects most operations such as slab shrink, registration and unregistration of shrinkers, etc. This can easily cause problems in the following cases. 1) When the memory pressure is high and there are many filesystems mounted or unmounted at the same time, slab shrink will be affected (down_read_trylock() failed). Such as the real workload mentioned by Kirill Tkhai: ``` One of the real workloads from my experience is start of an overcommitted node containing many starting containers after node crash (or many resuming containers after reboot for kernel update). In these cases memory pressure is huge, and the node goes round in long reclaim. ``` 2) If a shrinker is blocked (such as the case mentioned in [1]) and a writer comes in (such as mount a fs), then this writer will be blocked and cause all subsequent shrinker-related operations to be blocked. Even if there is no competitor when shrinking slab, there may still be a problem. The down_read_trylock() may become a perf hotspot with frequent calls to shrink_slab(). Because of the poor multicore scalability of atomic operations, this can lead to a significant drop in IPC (instructions per cycle). We used to implement the lockless slab shrink with SRCU [2], but then kernel test robot reported -88.8% regression in stress-ng.ramfs.ops_per_sec test case [3], so we reverted it [4]. This commit uses the refcount+RCU method [5] proposed by Dave Chinner to re-implement the lockless global slab shrink. The memcg slab shrink is handled in the subsequent patch. For now, all shrinker instances are converted to dynamically allocated and will be freed by call_rcu(). So we can use rcu_read_{lock,unlock}() to ensure that the shrinker instance is valid. And the shrinker instance will not be run again after unregistration. So the structure that records the pointer of shrinker instance can be safely freed without waiting for the RCU read-side critical section. In this way, while we implement the lockless slab shrink, we don't need to be blocked in unregister_shrinker(). The following are the test results: stress-ng --timeout 60 --times --verify --metrics-brief --ramfs 9 & 1) Before applying this patchset: setting to a 60 second run per stressor dispatching hogs: 9 ramfs stressor bogo ops real time usr time sys time bogo ops/s bogo ops/s (secs) (secs) (secs) (real time) (usr+sys time) ramfs 473062 60.00 8.00 279.13 7884.12 1647.59 for a 60.01s run time: 1440.34s available CPU time 7.99s user time ( 0.55%) 279.13s system time ( 19.38%) 287.12s total time ( 19.93%) load average: 7.12 2.99 1.15 successful run completed in 60.01s (1 min, 0.01 secs) 2) After applying this patchset: setting to a 60 second run per stressor dispatching hogs: 9 ramfs stressor bogo ops real time usr time sys time bogo ops/s bogo ops/s (secs) (secs) (secs) (real time) (usr+sys time) ramfs 477165 60.00 8.13 281.34 7952.55 1648.40 for a 60.01s run time: 1440.33s available CPU time 8.12s user time ( 0.56%) 281.34s system time ( 19.53%) 289.46s total time ( 20.10%) load average: 6.98 3.03 1.19 successful run completed in 60.01s (1 min, 0.01 secs) We can see that the ops/s has hardly changed. [1]. https://lore.kernel.org/lkml/20191129214541.3110-1-ptikhomirov@virtuozzo.com/ [2]. https://lore.kernel.org/lkml/20230313112819.38938-1-zhengqi.arch@bytedance.com/ [3]. https://lore.kernel.org/lkml/202305230837.db2c233f-yujie.liu@intel.com/ [4]. https://lore.kernel.org/all/20230609081518.3039120-1-qi.zheng@linux.dev/ [5]. https://lore.kernel.org/lkml/ZIJhou1d55d4H1s0@dread.disaster.area/ Link: https://lkml.kernel.org/r/20230911094444.68966-43-zhengqi.arch@bytedance.com Signed-off-by: Qi Zheng Cc: Abhinav Kumar Cc: Alasdair Kergon Cc: Alexander Viro Cc: Alyssa Rosenzweig Cc: Andreas Dilger Cc: Andreas Gruenbacher Cc: Anna Schumaker Cc: Arnd Bergmann Cc: Bob Peterson Cc: Borislav Petkov Cc: Carlos Llamas Cc: Chandan Babu R Cc: Chao Yu Cc: Chris Mason Cc: Christian Brauner Cc: Christian Koenig Cc: Chuck Lever Cc: Coly Li Cc: Dai Ngo Cc: Daniel Vetter Cc: Daniel Vetter Cc: "Darrick J. Wong" Cc: Dave Chinner Cc: Dave Hansen Cc: David Airlie Cc: David Hildenbrand Cc: David Sterba Cc: Dmitry Baryshkov Cc: Gao Xiang Cc: Greg Kroah-Hartman Cc: Huang Rui Cc: Ingo Molnar Cc: Jaegeuk Kim Cc: Jani Nikula Cc: Jan Kara Cc: Jason Wang Cc: Jeff Layton Cc: Jeffle Xu Cc: Joel Fernandes (Google) Cc: Joonas Lahtinen Cc: Josef Bacik Cc: Juergen Gross Cc: Kent Overstreet Cc: Kirill Tkhai Cc: Marijn Suijten Cc: "Michael S. Tsirkin" Cc: Mike Snitzer Cc: Minchan Kim Cc: Muchun Song Cc: Muchun Song Cc: Nadav Amit Cc: Neil Brown Cc: Oleksandr Tyshchenko Cc: Olga Kornievskaia Cc: Paul E. McKenney Cc: Richard Weinberger Cc: Rob Clark Cc: Rob Herring Cc: Rodrigo Vivi Cc: Roman Gushchin Cc: Sean Paul Cc: Sergey Senozhatsky Cc: Song Liu Cc: Stefano Stabellini Cc: Steven Price Cc: "Theodore Ts'o" Cc: Thomas Gleixner Cc: Tomeu Vizoso Cc: Tom Talpey Cc: Trond Myklebust Cc: Tvrtko Ursulin Cc: Vlastimil Babka Cc: Xuan Zhuo Cc: Yue Hu Signed-off-by: Andrew Morton --- include/linux/shrinker.h | 24 +++++++++++++ mm/shrinker.c | 89 ++++++++++++++++++++++++++++++++++++------------ 2 files changed, 92 insertions(+), 21 deletions(-) (limited to 'include/linux') diff --git a/include/linux/shrinker.h b/include/linux/shrinker.h index ba5ac82b5dbd..e4f93120e0ab 100644 --- a/include/linux/shrinker.h +++ b/include/linux/shrinker.h @@ -4,6 +4,8 @@ #include #include +#include +#include #define SHRINKER_UNIT_BITS BITS_PER_LONG @@ -87,6 +89,17 @@ struct shrinker { int seeks; /* seeks to recreate an obj */ unsigned flags; + /* + * The reference count of this shrinker. Registered shrinker have an + * initial refcount of 1, then the lookup operations are now allowed + * to use it via shrinker_try_get(). Later in the unregistration step, + * the initial refcount will be discarded, and will free the shrinker + * asynchronously via RCU after its refcount reaches 0. + */ + refcount_t refcount; + struct completion done; /* use to wait for refcount to reach 0 */ + struct rcu_head rcu; + void *private_data; /* These are for internal use */ @@ -122,6 +135,17 @@ struct shrinker *shrinker_alloc(unsigned int flags, const char *fmt, ...); void shrinker_register(struct shrinker *shrinker); void shrinker_free(struct shrinker *shrinker); +static inline bool shrinker_try_get(struct shrinker *shrinker) +{ + return refcount_inc_not_zero(&shrinker->refcount); +} + +static inline void shrinker_put(struct shrinker *shrinker) +{ + if (refcount_dec_and_test(&shrinker->refcount)) + complete(&shrinker->done); +} + #ifdef CONFIG_SHRINKER_DEBUG extern int __printf(2, 3) shrinker_debugfs_rename(struct shrinker *shrinker, const char *fmt, ...); diff --git a/mm/shrinker.c b/mm/shrinker.c index e2e832b15803..9bb2e61092b3 100644 --- a/mm/shrinker.c +++ b/mm/shrinker.c @@ -2,6 +2,7 @@ #include #include #include +#include #include #include "internal.h" @@ -577,33 +578,50 @@ unsigned long shrink_slab(gfp_t gfp_mask, int nid, struct mem_cgroup *memcg, if (!mem_cgroup_disabled() && !mem_cgroup_is_root(memcg)) return shrink_slab_memcg(gfp_mask, nid, memcg, priority); - if (!down_read_trylock(&shrinker_rwsem)) - goto out; - - list_for_each_entry(shrinker, &shrinker_list, list) { + /* + * lockless algorithm of global shrink. + * + * In the unregistration setp, the shrinker will be freed asynchronously + * via RCU after its refcount reaches 0. So both rcu_read_lock() and + * shrinker_try_get() can be used to ensure the existence of the shrinker. + * + * So in the global shrink: + * step 1: use rcu_read_lock() to guarantee existence of the shrinker + * and the validity of the shrinker_list walk. + * step 2: use shrinker_try_get() to try get the refcount, if successful, + * then the existence of the shrinker can also be guaranteed, + * so we can release the RCU lock to do do_shrink_slab() that + * may sleep. + * step 3: *MUST* to reacquire the RCU lock before calling shrinker_put(), + * which ensures that neither this shrinker nor the next shrinker + * will be freed in the next traversal operation. + * step 4: do shrinker_put() paired with step 2 to put the refcount, + * if the refcount reaches 0, then wake up the waiter in + * shrinker_free() by calling complete(). + */ + rcu_read_lock(); + list_for_each_entry_rcu(shrinker, &shrinker_list, list) { struct shrink_control sc = { .gfp_mask = gfp_mask, .nid = nid, .memcg = memcg, }; + if (!shrinker_try_get(shrinker)) + continue; + + rcu_read_unlock(); + ret = do_shrink_slab(&sc, shrinker, priority); if (ret == SHRINK_EMPTY) ret = 0; freed += ret; - /* - * Bail out if someone want to register a new shrinker to - * prevent the registration from being stalled for long periods - * by parallel ongoing shrinking. - */ - if (rwsem_is_contended(&shrinker_rwsem)) { - freed = freed ? : 1; - break; - } + + rcu_read_lock(); + shrinker_put(shrinker); } - up_read(&shrinker_rwsem); -out: + rcu_read_unlock(); cond_resched(); return freed; } @@ -676,13 +694,29 @@ void shrinker_register(struct shrinker *shrinker) } down_write(&shrinker_rwsem); - list_add_tail(&shrinker->list, &shrinker_list); + list_add_tail_rcu(&shrinker->list, &shrinker_list); shrinker->flags |= SHRINKER_REGISTERED; shrinker_debugfs_add(shrinker); up_write(&shrinker_rwsem); + + init_completion(&shrinker->done); + /* + * Now the shrinker is fully set up, take the first reference to it to + * indicate that lookup operations are now allowed to use it via + * shrinker_try_get(). + */ + refcount_set(&shrinker->refcount, 1); } EXPORT_SYMBOL_GPL(shrinker_register); +static void shrinker_free_rcu_cb(struct rcu_head *head) +{ + struct shrinker *shrinker = container_of(head, struct shrinker, rcu); + + kfree(shrinker->nr_deferred); + kfree(shrinker); +} + void shrinker_free(struct shrinker *shrinker) { struct dentry *debugfs_entry = NULL; @@ -691,9 +725,25 @@ void shrinker_free(struct shrinker *shrinker) if (!shrinker) return; + if (shrinker->flags & SHRINKER_REGISTERED) { + /* drop the initial refcount */ + shrinker_put(shrinker); + /* + * Wait for all lookups of the shrinker to complete, after that, + * no shrinker is running or will run again, then we can safely + * free it asynchronously via RCU and safely free the structure + * where the shrinker is located, such as super_block etc. + */ + wait_for_completion(&shrinker->done); + } + down_write(&shrinker_rwsem); if (shrinker->flags & SHRINKER_REGISTERED) { - list_del(&shrinker->list); + /* + * Now we can safely remove it from the shrinker_list and then + * free it. + */ + list_del_rcu(&shrinker->list); debugfs_entry = shrinker_debugfs_detach(shrinker, &debugfs_id); shrinker->flags &= ~SHRINKER_REGISTERED; } @@ -707,9 +757,6 @@ void shrinker_free(struct shrinker *shrinker) if (debugfs_entry) shrinker_debugfs_remove(debugfs_entry, debugfs_id); - kfree(shrinker->nr_deferred); - shrinker->nr_deferred = NULL; - - kfree(shrinker); + call_rcu(&shrinker->rcu, shrinker_free_rcu_cb); } EXPORT_SYMBOL_GPL(shrinker_free); -- cgit v1.2.3 From 09c550508a4b8f7844b197cc16877dd0f7c42d8f Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 13 Sep 2023 14:51:13 +0200 Subject: mm/rmap: pass folio to hugepage_add_anon_rmap() Let's pass a folio; we are always mapping the entire thing. Link: https://lkml.kernel.org/r/20230913125113.313322-7-david@redhat.com Signed-off-by: David Hildenbrand Cc: Matthew Wilcox Cc: Mike Kravetz Cc: Muchun Song Signed-off-by: Andrew Morton --- include/linux/rmap.h | 2 +- mm/migrate.c | 2 +- mm/rmap.c | 8 +++----- 3 files changed, 5 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rmap.h b/include/linux/rmap.h index 51cc21ebb568..d22f4d21a11c 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -203,7 +203,7 @@ void folio_add_file_rmap_range(struct folio *, struct page *, unsigned int nr, void page_remove_rmap(struct page *, struct vm_area_struct *, bool compound); -void hugepage_add_anon_rmap(struct page *, struct vm_area_struct *, +void hugepage_add_anon_rmap(struct folio *, struct vm_area_struct *, unsigned long address, rmap_t flags); void hugepage_add_new_anon_rmap(struct folio *, struct vm_area_struct *, unsigned long address); diff --git a/mm/migrate.c b/mm/migrate.c index 2053b54556ca..eb6bc4053bc4 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -249,7 +249,7 @@ static bool remove_migration_pte(struct folio *folio, pte = arch_make_huge_pte(pte, shift, vma->vm_flags); if (folio_test_anon(folio)) - hugepage_add_anon_rmap(new, vma, pvmw.address, + hugepage_add_anon_rmap(folio, vma, pvmw.address, rmap_flags); else page_dup_file_rmap(new, true); diff --git a/mm/rmap.c b/mm/rmap.c index ed4b602bcbd5..d24e2c36372e 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -2542,18 +2542,16 @@ void rmap_walk_locked(struct folio *folio, struct rmap_walk_control *rwc) * * RMAP_COMPOUND is ignored. */ -void hugepage_add_anon_rmap(struct page *page, struct vm_area_struct *vma, +void hugepage_add_anon_rmap(struct folio *folio, struct vm_area_struct *vma, unsigned long address, rmap_t flags) { - struct folio *folio = page_folio(page); - VM_WARN_ON_FOLIO(!folio_test_anon(folio), folio); atomic_inc(&folio->_entire_mapcount); if (flags & RMAP_EXCLUSIVE) - SetPageAnonExclusive(page); + SetPageAnonExclusive(&folio->page); VM_WARN_ON_FOLIO(folio_entire_mapcount(folio) > 1 && - PageAnonExclusive(page), folio); + PageAnonExclusive(&folio->page), folio); } void hugepage_add_new_anon_rmap(struct folio *folio, -- cgit v1.2.3 From 73eab3ca481e5be0f1fd8140365d604482f84ee1 Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Wed, 13 Sep 2023 17:51:27 +0800 Subject: mm: migrate: convert migrate_misplaced_page() to migrate_misplaced_folio() At present, numa balance only support base page and PMD-mapped THP, but we will expand to support to migrate large folio/pte-mapped THP in the future, it is better to make migrate_misplaced_page() to take a folio instead of a page, and rename it to migrate_misplaced_folio(), it is a preparation, also this remove several compound_head() calls. Link: https://lkml.kernel.org/r/20230913095131.2426871-5-wangkefeng.wang@huawei.com Signed-off-by: Kefeng Wang Reviewed-by: Zi Yan Cc: David Hildenbrand Cc: "Huang, Ying" Cc: Hugh Dickins Cc: Matthew Wilcox (Oracle) Cc: Mike Kravetz Signed-off-by: Andrew Morton --- include/linux/migrate.h | 4 ++-- mm/huge_memory.c | 2 +- mm/memory.c | 2 +- mm/migrate.c | 39 +++++++++++++++++++++------------------ 4 files changed, 25 insertions(+), 22 deletions(-) (limited to 'include/linux') diff --git a/include/linux/migrate.h b/include/linux/migrate.h index 711dd9412561..2ce13e8a309b 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -142,10 +142,10 @@ const struct movable_operations *page_movable_ops(struct page *page) } #ifdef CONFIG_NUMA_BALANCING -int migrate_misplaced_page(struct page *page, struct vm_area_struct *vma, +int migrate_misplaced_folio(struct folio *folio, struct vm_area_struct *vma, int node); #else -static inline int migrate_misplaced_page(struct page *page, +static inline int migrate_misplaced_folio(struct folio *folio, struct vm_area_struct *vma, int node) { return -EAGAIN; /* can't migrate now */ diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 1cfd83e91748..5c20e43782e4 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1567,7 +1567,7 @@ vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf) spin_unlock(vmf->ptl); writable = false; - migrated = migrate_misplaced_page(page, vma, target_nid); + migrated = migrate_misplaced_folio(page_folio(page), vma, target_nid); if (migrated) { flags |= TNF_MIGRATED; page_nid = target_nid; diff --git a/mm/memory.c b/mm/memory.c index 0739ccb00e61..d956b231e835 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -4812,7 +4812,7 @@ static vm_fault_t do_numa_page(struct vm_fault *vmf) writable = false; /* Migrate to the requested node */ - if (migrate_misplaced_page(page, vma, target_nid)) { + if (migrate_misplaced_folio(page_folio(page), vma, target_nid)) { page_nid = target_nid; flags |= TNF_MIGRATED; } else { diff --git a/mm/migrate.c b/mm/migrate.c index 1f1aebe8da18..1b848f6b5fbc 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -2516,55 +2516,58 @@ static int numamigrate_isolate_folio(pg_data_t *pgdat, struct folio *folio) } /* - * Attempt to migrate a misplaced page to the specified destination + * Attempt to migrate a misplaced folio to the specified destination * node. Caller is expected to have an elevated reference count on - * the page that will be dropped by this function before returning. + * the folio that will be dropped by this function before returning. */ -int migrate_misplaced_page(struct page *page, struct vm_area_struct *vma, - int node) +int migrate_misplaced_folio(struct folio *folio, struct vm_area_struct *vma, + int node) { pg_data_t *pgdat = NODE_DATA(node); int isolated; int nr_remaining; unsigned int nr_succeeded; LIST_HEAD(migratepages); - int nr_pages = thp_nr_pages(page); + int nr_pages = folio_nr_pages(folio); /* - * Don't migrate file pages that are mapped in multiple processes + * Don't migrate file folios that are mapped in multiple processes * with execute permissions as they are probably shared libraries. + * To check if the folio is shared, ideally we want to make sure + * every page is mapped to the same process. Doing that is very + * expensive, so check the estimated mapcount of the folio instead. */ - if (page_mapcount(page) != 1 && page_is_file_lru(page) && + if (folio_estimated_sharers(folio) != 1 && folio_is_file_lru(folio) && (vma->vm_flags & VM_EXEC)) goto out; /* - * Also do not migrate dirty pages as not all filesystems can move - * dirty pages in MIGRATE_ASYNC mode which is a waste of cycles. + * Also do not migrate dirty folios as not all filesystems can move + * dirty folios in MIGRATE_ASYNC mode which is a waste of cycles. */ - if (page_is_file_lru(page) && PageDirty(page)) + if (folio_is_file_lru(folio) && folio_test_dirty(folio)) goto out; - isolated = numamigrate_isolate_folio(pgdat, page_folio(page)); + isolated = numamigrate_isolate_folio(pgdat, folio); if (!isolated) goto out; - list_add(&page->lru, &migratepages); + list_add(&folio->lru, &migratepages); nr_remaining = migrate_pages(&migratepages, alloc_misplaced_dst_folio, NULL, node, MIGRATE_ASYNC, MR_NUMA_MISPLACED, &nr_succeeded); if (nr_remaining) { if (!list_empty(&migratepages)) { - list_del(&page->lru); - mod_node_page_state(page_pgdat(page), NR_ISOLATED_ANON + - page_is_file_lru(page), -nr_pages); - putback_lru_page(page); + list_del(&folio->lru); + node_stat_mod_folio(folio, NR_ISOLATED_ANON + + folio_is_file_lru(folio), -nr_pages); + folio_putback_lru(folio); } isolated = 0; } if (nr_succeeded) { count_vm_numa_events(NUMA_PAGE_MIGRATE, nr_succeeded); - if (!node_is_toptier(page_to_nid(page)) && node_is_toptier(node)) + if (!node_is_toptier(folio_nid(folio)) && node_is_toptier(node)) mod_node_page_state(pgdat, PGPROMOTE_SUCCESS, nr_succeeded); } @@ -2572,7 +2575,7 @@ int migrate_misplaced_page(struct page *page, struct vm_area_struct *vma, return isolated; out: - put_page(page); + folio_put(folio); return 0; } #endif /* CONFIG_NUMA_BALANCING */ -- cgit v1.2.3 From 2a41815784e029cbef571511384f00fa40f2a82e Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 14 Sep 2023 16:00:04 +0100 Subject: buffer: pass GFP flags to folio_alloc_buffers() Patch series "Add and use bdev_getblk()", v2. This patch series fixes a bug reported by Hui Zhu; see proposed patches v1 and v2: https://lore.kernel.org/linux-fsdevel/20230811035705.3296-1-teawaterz@linux.alibaba.com/ https://lore.kernel.org/linux-fsdevel/20230811071519.1094-1-teawaterz@linux.alibaba.com/ I decided to go in a rather different direction for this fix, and fix a related problem at the same time. I don't think there's any urgency to rush this into Linus' tree, nor have I marked it for stable. Reasonable people may disagree. This patch (of 8): Instead of creating entirely new flags, inherit them from grow_dev_page(). The other callers create the same flags that this function used to create. Link: https://lkml.kernel.org/r/20230914150011.843330-1-willy@infradead.org Link: https://lkml.kernel.org/r/20230914150011.843330-2-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Cc: Hui Zhu Signed-off-by: Andrew Morton --- fs/buffer.c | 17 +++++++++-------- include/linux/buffer_head.h | 2 +- 2 files changed, 10 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/fs/buffer.c b/fs/buffer.c index 12e9a71c693d..32338b7cfeb9 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -915,16 +915,12 @@ int remove_inode_buffers(struct inode *inode) * which may not fail from ordinary buffer allocations. */ struct buffer_head *folio_alloc_buffers(struct folio *folio, unsigned long size, - bool retry) + gfp_t gfp) { struct buffer_head *bh, *head; - gfp_t gfp = GFP_NOFS | __GFP_ACCOUNT; long offset; struct mem_cgroup *memcg, *old_memcg; - if (retry) - gfp |= __GFP_NOFAIL; - /* The folio lock pins the memcg */ memcg = folio_memcg(folio); old_memcg = set_active_memcg(memcg); @@ -967,7 +963,11 @@ EXPORT_SYMBOL_GPL(folio_alloc_buffers); struct buffer_head *alloc_page_buffers(struct page *page, unsigned long size, bool retry) { - return folio_alloc_buffers(page_folio(page), size, retry); + gfp_t gfp = GFP_NOFS | __GFP_ACCOUNT; + if (retry) + gfp |= __GFP_NOFAIL; + + return folio_alloc_buffers(page_folio(page), size, gfp); } EXPORT_SYMBOL_GPL(alloc_page_buffers); @@ -1069,7 +1069,7 @@ grow_dev_page(struct block_device *bdev, sector_t block, goto failed; } - bh = folio_alloc_buffers(folio, size, true); + bh = folio_alloc_buffers(folio, size, gfp_mask | __GFP_ACCOUNT); /* * Link the folio to the buffers and initialise them. Take the @@ -1644,8 +1644,9 @@ void folio_create_empty_buffers(struct folio *folio, unsigned long blocksize, unsigned long b_state) { struct buffer_head *bh, *head, *tail; + gfp_t gfp = GFP_NOFS | __GFP_ACCOUNT | __GFP_NOFAIL; - head = folio_alloc_buffers(folio, blocksize, true); + head = folio_alloc_buffers(folio, blocksize, gfp); bh = head; do { bh->b_state |= b_state; diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 44e9de51eedf..67d94d2be475 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -198,7 +198,7 @@ void touch_buffer(struct buffer_head *bh); void folio_set_bh(struct buffer_head *bh, struct folio *folio, unsigned long offset); struct buffer_head *folio_alloc_buffers(struct folio *folio, unsigned long size, - bool retry); + gfp_t gfp); struct buffer_head *alloc_page_buffers(struct page *page, unsigned long size, bool retry); void create_empty_buffers(struct page *, unsigned long, -- cgit v1.2.3 From 3ed65f04aac4d1cd025f30ee3fac174bcbf2b018 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 14 Sep 2023 16:00:05 +0100 Subject: buffer: hoist GFP flags from grow_dev_page() to __getblk_gfp() grow_dev_page() is only called by grow_buffers(). grow_buffers() is only called by __getblk_slow() and __getblk_slow() is only called from __getblk_gfp(), so it is safe to move the GFP flags setting all the way up. With that done, add a new bdev_getblk() entry point that leaves the GFP flags the way the caller specified them. [willy@infradead.org: fix grow_dev_page() error handling] Link: https://lkml.kernel.org/r/ZRREEIwqiy5DijKB@casper.infradead.org Link: https://lkml.kernel.org/r/20230914150011.843330-3-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Cc: Hui Zhu Cc: Dan Carpenter Signed-off-by: Andrew Morton --- fs/buffer.c | 61 +++++++++++++++++++++++++++++---------------- include/linux/buffer_head.h | 2 ++ 2 files changed, 42 insertions(+), 21 deletions(-) (limited to 'include/linux') diff --git a/fs/buffer.c b/fs/buffer.c index 32338b7cfeb9..80e96c1fcd33 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -1043,20 +1043,11 @@ grow_dev_page(struct block_device *bdev, sector_t block, struct buffer_head *bh; sector_t end_block; int ret = 0; - gfp_t gfp_mask; - - gfp_mask = mapping_gfp_constraint(inode->i_mapping, ~__GFP_FS) | gfp; - - /* - * XXX: __getblk_slow() can not really deal with failure and - * will endlessly loop on improvised global reclaim. Prefer - * looping in the allocator rather than here, at least that - * code knows what it's doing. - */ - gfp_mask |= __GFP_NOFAIL; folio = __filemap_get_folio(inode->i_mapping, index, - FGP_LOCK | FGP_ACCESSED | FGP_CREAT, gfp_mask); + FGP_LOCK | FGP_ACCESSED | FGP_CREAT, gfp); + if (IS_ERR(folio)) + return PTR_ERR(folio); bh = folio_buffers(folio); if (bh) { @@ -1069,7 +1060,10 @@ grow_dev_page(struct block_device *bdev, sector_t block, goto failed; } - bh = folio_alloc_buffers(folio, size, gfp_mask | __GFP_ACCOUNT); + ret = -ENOMEM; + bh = folio_alloc_buffers(folio, size, gfp | __GFP_ACCOUNT); + if (!bh) + goto failed; /* * Link the folio to the buffers and initialise them. Take the @@ -1420,24 +1414,49 @@ __find_get_block(struct block_device *bdev, sector_t block, unsigned size) } EXPORT_SYMBOL(__find_get_block); +/** + * bdev_getblk - Get a buffer_head in a block device's buffer cache. + * @bdev: The block device. + * @block: The block number. + * @size: The size of buffer_heads for this @bdev. + * @gfp: The memory allocation flags to use. + * + * In contrast to __getblk_gfp(), the @gfp flags must be all of the flags; + * they are not augmented with the mapping's GFP flags. + * + * Return: The buffer head, or NULL if memory could not be allocated. + */ +struct buffer_head *bdev_getblk(struct block_device *bdev, sector_t block, + unsigned size, gfp_t gfp) +{ + struct buffer_head *bh = __find_get_block(bdev, block, size); + + might_alloc(gfp); + if (bh) + return bh; + + return __getblk_slow(bdev, block, size, gfp); +} +EXPORT_SYMBOL(bdev_getblk); + /* * __getblk_gfp() will locate (and, if necessary, create) the buffer_head * which corresponds to the passed block_device, block and size. The * returned buffer has its reference count incremented. - * - * __getblk_gfp() will lock up the machine if grow_dev_page's - * try_to_free_buffers() attempt is failing. FIXME, perhaps? */ struct buffer_head * __getblk_gfp(struct block_device *bdev, sector_t block, unsigned size, gfp_t gfp) { - struct buffer_head *bh = __find_get_block(bdev, block, size); + gfp |= mapping_gfp_constraint(bdev->bd_inode->i_mapping, ~__GFP_FS); - might_sleep(); - if (bh == NULL) - bh = __getblk_slow(bdev, block, size, gfp); - return bh; + /* + * Prefer looping in the allocator rather than here, at least that + * code knows what it's doing. + */ + gfp |= __GFP_NOFAIL; + + return bdev_getblk(bdev, block, size, gfp); } EXPORT_SYMBOL(__getblk_gfp); diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 67d94d2be475..7825bb3d63a7 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -227,6 +227,8 @@ void __wait_on_buffer(struct buffer_head *); wait_queue_head_t *bh_waitq_head(struct buffer_head *bh); struct buffer_head *__find_get_block(struct block_device *bdev, sector_t block, unsigned size); +struct buffer_head *bdev_getblk(struct block_device *bdev, sector_t block, + unsigned size, gfp_t gfp); struct buffer_head *__getblk_gfp(struct block_device *bdev, sector_t block, unsigned size, gfp_t gfp); void __brelse(struct buffer_head *); -- cgit v1.2.3 From c645e65c0675dd4df7ee68b995154dc1c1e7ce3b Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 14 Sep 2023 16:00:08 +0100 Subject: buffer: convert getblk_unmovable() and __getblk() to use bdev_getblk() Move these two functions up in the file for the benefit of the next patch, and pass in all of the GFP flags to use instead of the partial GFP flags used by __getblk_gfp(). Link: https://lkml.kernel.org/r/20230914150011.843330-6-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Cc: Hui Zhu Signed-off-by: Andrew Morton --- include/linux/buffer_head.h | 36 ++++++++++++++++++++++-------------- 1 file changed, 22 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 7825bb3d63a7..9a3ca5f6d63d 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -340,6 +340,28 @@ sb_breadahead(struct super_block *sb, sector_t block) __breadahead(sb->s_bdev, block, sb->s_blocksize); } +static inline struct buffer_head *getblk_unmovable(struct block_device *bdev, + sector_t block, unsigned size) +{ + gfp_t gfp; + + gfp = mapping_gfp_constraint(bdev->bd_inode->i_mapping, ~__GFP_FS); + gfp |= __GFP_NOFAIL; + + return bdev_getblk(bdev, block, size, gfp); +} + +static inline struct buffer_head *__getblk(struct block_device *bdev, + sector_t block, unsigned size) +{ + gfp_t gfp; + + gfp = mapping_gfp_constraint(bdev->bd_inode->i_mapping, ~__GFP_FS); + gfp |= __GFP_MOVABLE | __GFP_NOFAIL; + + return bdev_getblk(bdev, block, size, gfp); +} + static inline struct buffer_head * sb_getblk(struct super_block *sb, sector_t block) { @@ -387,20 +409,6 @@ static inline void lock_buffer(struct buffer_head *bh) __lock_buffer(bh); } -static inline struct buffer_head *getblk_unmovable(struct block_device *bdev, - sector_t block, - unsigned size) -{ - return __getblk_gfp(bdev, block, size, 0); -} - -static inline struct buffer_head *__getblk(struct block_device *bdev, - sector_t block, - unsigned size) -{ - return __getblk_gfp(bdev, block, size, __GFP_MOVABLE); -} - static inline void bh_readahead(struct buffer_head *bh, blk_opf_t op_flags) { if (!buffer_uptodate(bh) && trylock_buffer(bh)) { -- cgit v1.2.3 From 4b9c8b1919323f7f359376ca31a4c721cb2b3acf Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 14 Sep 2023 16:00:09 +0100 Subject: buffer: convert sb_getblk() to call __getblk() Now that __getblk() is in the right place in the file, it is trivial to call it from sb_getblk(). Link: https://lkml.kernel.org/r/20230914150011.843330-7-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Cc: Hui Zhu Signed-off-by: Andrew Morton --- include/linux/buffer_head.h | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 9a3ca5f6d63d..b294e2cccbae 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -362,13 +362,12 @@ static inline struct buffer_head *__getblk(struct block_device *bdev, return bdev_getblk(bdev, block, size, gfp); } -static inline struct buffer_head * -sb_getblk(struct super_block *sb, sector_t block) +static inline struct buffer_head *sb_getblk(struct super_block *sb, + sector_t block) { - return __getblk_gfp(sb->s_bdev, block, sb->s_blocksize, __GFP_MOVABLE); + return __getblk(sb->s_bdev, block, sb->s_blocksize); } - static inline struct buffer_head * sb_getblk_gfp(struct super_block *sb, sector_t block, gfp_t gfp) { -- cgit v1.2.3 From 8a83ac54940d27b8f56d766e1cb270d150fedd50 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 14 Sep 2023 16:00:10 +0100 Subject: ext4: call bdev_getblk() from sb_getblk_gfp() Most of the callers of sb_getblk_gfp() already assumed that they were passing the entire GFP flags to use. Fix up the two callers that didn't, and remove the __GFP_NOFAIL from them since they both appear to correctly handle failure. Link: https://lkml.kernel.org/r/20230914150011.843330-8-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Cc: Hui Zhu Signed-off-by: Andrew Morton --- fs/ext4/super.c | 10 ++++++++-- include/linux/buffer_head.h | 6 +++--- 2 files changed, 11 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 268d812b0add..c00ec159dea5 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -244,13 +244,19 @@ static struct buffer_head *__ext4_sb_bread_gfp(struct super_block *sb, struct buffer_head *ext4_sb_bread(struct super_block *sb, sector_t block, blk_opf_t op_flags) { - return __ext4_sb_bread_gfp(sb, block, op_flags, __GFP_MOVABLE); + gfp_t gfp = mapping_gfp_constraint(sb->s_bdev->bd_inode->i_mapping, + ~__GFP_FS) | __GFP_MOVABLE; + + return __ext4_sb_bread_gfp(sb, block, op_flags, gfp); } struct buffer_head *ext4_sb_bread_unmovable(struct super_block *sb, sector_t block) { - return __ext4_sb_bread_gfp(sb, block, 0, 0); + gfp_t gfp = mapping_gfp_constraint(sb->s_bdev->bd_inode->i_mapping, + ~__GFP_FS); + + return __ext4_sb_bread_gfp(sb, block, 0, gfp); } void ext4_sb_breadahead_unmovable(struct super_block *sb, sector_t block) diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index b294e2cccbae..22f13eece719 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -368,10 +368,10 @@ static inline struct buffer_head *sb_getblk(struct super_block *sb, return __getblk(sb->s_bdev, block, sb->s_blocksize); } -static inline struct buffer_head * -sb_getblk_gfp(struct super_block *sb, sector_t block, gfp_t gfp) +static inline struct buffer_head *sb_getblk_gfp(struct super_block *sb, + sector_t block, gfp_t gfp) { - return __getblk_gfp(sb->s_bdev, block, sb->s_blocksize, gfp); + return bdev_getblk(sb->s_bdev, block, sb->s_blocksize, gfp); } static inline struct buffer_head * -- cgit v1.2.3 From 93b13ecaa713e1fcbf23b7483eec065d300c5ad8 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 14 Sep 2023 16:00:11 +0100 Subject: buffer: remove __getblk_gfp() Inline it into __bread_gfp(). Link: https://lkml.kernel.org/r/20230914150011.843330-9-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Cc: Hui Zhu Signed-off-by: Andrew Morton --- fs/buffer.c | 36 +++++++++++------------------------- include/linux/buffer_head.h | 2 -- 2 files changed, 11 insertions(+), 27 deletions(-) (limited to 'include/linux') diff --git a/fs/buffer.c b/fs/buffer.c index edd118594565..edec8652788c 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -1421,9 +1421,6 @@ EXPORT_SYMBOL(__find_get_block); * @size: The size of buffer_heads for this @bdev. * @gfp: The memory allocation flags to use. * - * In contrast to __getblk_gfp(), the @gfp flags must be all of the flags; - * they are not augmented with the mapping's GFP flags. - * * Return: The buffer head, or NULL if memory could not be allocated. */ struct buffer_head *bdev_getblk(struct block_device *bdev, sector_t block, @@ -1439,27 +1436,6 @@ struct buffer_head *bdev_getblk(struct block_device *bdev, sector_t block, } EXPORT_SYMBOL(bdev_getblk); -/* - * __getblk_gfp() will locate (and, if necessary, create) the buffer_head - * which corresponds to the passed block_device, block and size. The - * returned buffer has its reference count incremented. - */ -struct buffer_head * -__getblk_gfp(struct block_device *bdev, sector_t block, - unsigned size, gfp_t gfp) -{ - gfp |= mapping_gfp_constraint(bdev->bd_inode->i_mapping, ~__GFP_FS); - - /* - * Prefer looping in the allocator rather than here, at least that - * code knows what it's doing. - */ - gfp |= __GFP_NOFAIL; - - return bdev_getblk(bdev, block, size, gfp); -} -EXPORT_SYMBOL(__getblk_gfp); - /* * Do async read-ahead on a buffer.. */ @@ -1491,7 +1467,17 @@ struct buffer_head * __bread_gfp(struct block_device *bdev, sector_t block, unsigned size, gfp_t gfp) { - struct buffer_head *bh = __getblk_gfp(bdev, block, size, gfp); + struct buffer_head *bh; + + gfp |= mapping_gfp_constraint(bdev->bd_inode->i_mapping, ~__GFP_FS); + + /* + * Prefer looping in the allocator rather than here, at least that + * code knows what it's doing. + */ + gfp |= __GFP_NOFAIL; + + bh = bdev_getblk(bdev, block, size, gfp); if (likely(bh) && !buffer_uptodate(bh)) bh = __bread_slow(bh); diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 22f13eece719..3dc4720e4773 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -229,8 +229,6 @@ struct buffer_head *__find_get_block(struct block_device *bdev, sector_t block, unsigned size); struct buffer_head *bdev_getblk(struct block_device *bdev, sector_t block, unsigned size, gfp_t gfp); -struct buffer_head *__getblk_gfp(struct block_device *bdev, sector_t block, - unsigned size, gfp_t gfp); void __brelse(struct buffer_head *); void __bforget(struct buffer_head *); void __breadahead(struct block_device *, sector_t block, unsigned int size); -- cgit v1.2.3 From 3dfbb555c98ac55b9d911f9af0e35014b445fb41 Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Thu, 14 Sep 2023 15:16:39 +0200 Subject: mm, vmscan: remove ISOLATE_UNMAPPED This isolate_mode_t flag is effectively unused since 89f6c88a6ab4 ("mm: __isolate_lru_page_prepare() in isolate_migratepages_block()") as sc->may_unmap is now checked directly (and only node_reclaim has a mode that sets it to 0). The last remaining place is mm_vmscan_lru_isolate tracepoint for the isolate_mode parameter. That one was mainly used to indicate the active/inactive mode, which the trace-vmscan-postprocess.pl script consumed, but that got silently broken. After fixing the script by the previous patch, it does not need the isolate_mode anymore. So just remove the parameter and with that the whole ISOLATE_UNMAPPED flag. Link: https://lkml.kernel.org/r/20230914131637.12204-4-vbabka@suse.cz Signed-off-by: Vlastimil Babka Cc: Hugh Dickins Cc: Mel Gorman Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- Documentation/trace/postprocess/trace-vmscan-postprocess.pl | 8 ++++---- include/linux/mmzone.h | 2 -- include/trace/events/vmscan.h | 8 ++------ mm/vmscan.c | 3 +-- 4 files changed, 7 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/Documentation/trace/postprocess/trace-vmscan-postprocess.pl b/Documentation/trace/postprocess/trace-vmscan-postprocess.pl index 725d41a8d4ef..048dc0dbce64 100644 --- a/Documentation/trace/postprocess/trace-vmscan-postprocess.pl +++ b/Documentation/trace/postprocess/trace-vmscan-postprocess.pl @@ -112,7 +112,7 @@ my $regex_direct_end_default = 'nr_reclaimed=([0-9]*)'; my $regex_kswapd_wake_default = 'nid=([0-9]*) order=([0-9]*)'; my $regex_kswapd_sleep_default = 'nid=([0-9]*)'; my $regex_wakeup_kswapd_default = 'nid=([0-9]*) order=([0-9]*) gfp_flags=([A-Z_|]*)'; -my $regex_lru_isolate_default = 'isolate_mode=([0-9]*) classzone=([0-9]*) order=([0-9]*) nr_requested=([0-9]*) nr_scanned=([0-9]*) nr_skipped=([0-9]*) nr_taken=([0-9]*) lru=([a-z_]*)'; +my $regex_lru_isolate_default = 'classzone=([0-9]*) order=([0-9]*) nr_requested=([0-9]*) nr_scanned=([0-9]*) nr_skipped=([0-9]*) nr_taken=([0-9]*) lru=([a-z_]*)'; my $regex_lru_shrink_inactive_default = 'nid=([0-9]*) nr_scanned=([0-9]*) nr_reclaimed=([0-9]*) nr_dirty=([0-9]*) nr_writeback=([0-9]*) nr_congested=([0-9]*) nr_immediate=([0-9]*) nr_activate_anon=([0-9]*) nr_activate_file=([0-9]*) nr_ref_keep=([0-9]*) nr_unmap_fail=([0-9]*) priority=([0-9]*) flags=([A-Z_|]*)'; my $regex_lru_shrink_active_default = 'lru=([A-Z_]*) nr_taken=([0-9]*) nr_active=([0-9]*) nr_deactivated=([0-9]*) nr_referenced=([0-9]*) priority=([0-9]*) flags=([A-Z_|]*)' ; my $regex_writepage_default = 'page=([0-9a-f]*) pfn=([0-9]*) flags=([A-Z_|]*)'; @@ -204,7 +204,7 @@ $regex_wakeup_kswapd = generate_traceevent_regex( $regex_lru_isolate = generate_traceevent_regex( "vmscan/mm_vmscan_lru_isolate", $regex_lru_isolate_default, - "isolate_mode", classzone", "order", + "classzone", "order", "nr_requested", "nr_scanned", "nr_skipped", "nr_taken", "lru"); $regex_lru_shrink_inactive = generate_traceevent_regex( @@ -379,8 +379,8 @@ EVENT_PROCESS: print " $regex_lru_isolate/o\n"; next; } - my $nr_scanned = $5; - my $lru = $8; + my $nr_scanned = $4; + my $lru = $7; # To closer match vmstat scanning statistics, only count # inactive lru as scanning diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 4106fbc5b4b3..486587fcd27f 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -639,8 +639,6 @@ struct lruvec { #endif }; -/* Isolate unmapped pages */ -#define ISOLATE_UNMAPPED ((__force isolate_mode_t)0x2) /* Isolate for asynchronous migration */ #define ISOLATE_ASYNC_MIGRATE ((__force isolate_mode_t)0x4) /* Isolate unevictable pages */ diff --git a/include/trace/events/vmscan.h b/include/trace/events/vmscan.h index d2123dd960d5..1a488c30afa5 100644 --- a/include/trace/events/vmscan.h +++ b/include/trace/events/vmscan.h @@ -285,10 +285,9 @@ TRACE_EVENT(mm_vmscan_lru_isolate, unsigned long nr_scanned, unsigned long nr_skipped, unsigned long nr_taken, - isolate_mode_t isolate_mode, int lru), - TP_ARGS(highest_zoneidx, order, nr_requested, nr_scanned, nr_skipped, nr_taken, isolate_mode, lru), + TP_ARGS(highest_zoneidx, order, nr_requested, nr_scanned, nr_skipped, nr_taken, lru), TP_STRUCT__entry( __field(int, highest_zoneidx) @@ -297,7 +296,6 @@ TRACE_EVENT(mm_vmscan_lru_isolate, __field(unsigned long, nr_scanned) __field(unsigned long, nr_skipped) __field(unsigned long, nr_taken) - __field(unsigned int, isolate_mode) __field(int, lru) ), @@ -308,7 +306,6 @@ TRACE_EVENT(mm_vmscan_lru_isolate, __entry->nr_scanned = nr_scanned; __entry->nr_skipped = nr_skipped; __entry->nr_taken = nr_taken; - __entry->isolate_mode = (__force unsigned int)isolate_mode; __entry->lru = lru; ), @@ -316,8 +313,7 @@ TRACE_EVENT(mm_vmscan_lru_isolate, * classzone is previous name of the highest_zoneidx. * Reason not to change it is the ABI requirement of the tracepoint. */ - TP_printk("isolate_mode=%d classzone=%d order=%d nr_requested=%lu nr_scanned=%lu nr_skipped=%lu nr_taken=%lu lru=%s", - __entry->isolate_mode, + TP_printk("classzone=%d order=%d nr_requested=%lu nr_scanned=%lu nr_skipped=%lu nr_taken=%lu lru=%s", __entry->highest_zoneidx, __entry->order, __entry->nr_requested, diff --git a/mm/vmscan.c b/mm/vmscan.c index acf115468bf8..3df0e2a59052 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1689,8 +1689,7 @@ move: } *nr_scanned = total_scan; trace_mm_vmscan_lru_isolate(sc->reclaim_idx, sc->order, nr_to_scan, - total_scan, skipped, nr_taken, - sc->may_unmap ? 0 : ISOLATE_UNMAPPED, lru); + total_scan, skipped, nr_taken, lru); update_lru_sizes(lruvec, lru, nr_zone_taken); return nr_taken; } -- cgit v1.2.3 From 4472edf63d6630e6cf65e205b4fc8c3c94d0afe5 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Thu, 14 Sep 2023 02:15:23 +0000 Subject: mm/damon/core: use number of passed access sampling as a timer DAMON sleeps for sampling interval after each sampling, and check if the aggregation interval and the ops update interval have passed using ktime_get_coarse_ts64() and baseline timestamps for the intervals. That design is for making the operations occur at deterministic timing regardless of the time that spend for each work. However, it turned out it is not that useful, and incur not-that-intuitive results. After all, timer functions, and especially sleep functions that DAMON uses to wait for specific timing, are not necessarily strictly accurate. It is legal design, so no problem. However, depending on such inaccuracies, the nr_accesses can be larger than aggregation interval divided by sampling interval. For example, with the default setting (5 ms sampling interval and 100 ms aggregation interval) we frequently show regions having nr_accesses larger than 20. Also, if the execution of a DAMOS scheme takes a long time, next aggregation could happen before enough number of samples are collected. This is not what usual users would intuitively expect. Since access check sampling is the smallest unit work of DAMON, using the number of passed sampling intervals as the DAMON-internal timer can easily avoid these problems. That is, convert aggregation and ops update intervals to numbers of sampling intervals that need to be passed before those operations be executed, count the number of passed sampling intervals, and invoke the operations as soon as the specific amount of sampling intervals passed. Make the change. Note that this could make a behavioral change to settings that using intervals that not aligned by the sampling interval. For example, if the sampling interval is 5 ms and the aggregation interval is 12 ms, DAMON effectively uses 15 ms as its aggregation interval, because it checks whether the aggregation interval after sleeping the sampling interval. This change will make DAMON to effectively use 10 ms as aggregation interval, since it uses 'aggregation interval / sampling interval * sampling interval' as the effective aggregation interval, and we don't use floating point types. Usual users would have used aligned intervals, so this behavioral change is not expected to make any meaningful impact, so just make this change. Link: https://lkml.kernel.org/r/20230914021523.60649-1-sj@kernel.org Signed-off-by: SeongJae Park Signed-off-by: Andrew Morton --- include/linux/damon.h | 14 ++++++-- mm/damon/core.c | 96 +++++++++++++++++++++++++-------------------------- 2 files changed, 59 insertions(+), 51 deletions(-) (limited to 'include/linux') diff --git a/include/linux/damon.h b/include/linux/damon.h index ab3089de1478..9a32b8fd0bd3 100644 --- a/include/linux/damon.h +++ b/include/linux/damon.h @@ -524,8 +524,18 @@ struct damon_ctx { struct damon_attrs attrs; /* private: internal use only */ - struct timespec64 last_aggregation; - struct timespec64 last_ops_update; + /* number of sample intervals that passed since this context started */ + unsigned long passed_sample_intervals; + /* + * number of sample intervals that should be passed before next + * aggregation + */ + unsigned long next_aggregation_sis; + /* + * number of sample intervals that should be passed before next ops + * update + */ + unsigned long next_ops_update_sis; /* public: */ struct task_struct *kdamond; diff --git a/mm/damon/core.c b/mm/damon/core.c index 3ca34a252a3c..c5b7296c69a0 100644 --- a/mm/damon/core.c +++ b/mm/damon/core.c @@ -427,8 +427,10 @@ struct damon_ctx *damon_new_ctx(void) ctx->attrs.aggr_interval = 100 * 1000; ctx->attrs.ops_update_interval = 60 * 1000 * 1000; - ktime_get_coarse_ts64(&ctx->last_aggregation); - ctx->last_ops_update = ctx->last_aggregation; + ctx->passed_sample_intervals = 0; + /* These will be set from kdamond_init_intervals_sis() */ + ctx->next_aggregation_sis = 0; + ctx->next_ops_update_sis = 0; mutex_init(&ctx->kdamond_lock); @@ -552,6 +554,9 @@ static void damon_update_monitoring_results(struct damon_ctx *ctx, */ int damon_set_attrs(struct damon_ctx *ctx, struct damon_attrs *attrs) { + unsigned long sample_interval = attrs->sample_interval ? + attrs->sample_interval : 1; + if (attrs->min_nr_regions < 3) return -EINVAL; if (attrs->min_nr_regions > attrs->max_nr_regions) @@ -559,6 +564,11 @@ int damon_set_attrs(struct damon_ctx *ctx, struct damon_attrs *attrs) if (attrs->sample_interval > attrs->aggr_interval) return -EINVAL; + ctx->next_aggregation_sis = ctx->passed_sample_intervals + + attrs->aggr_interval / sample_interval; + ctx->next_ops_update_sis = ctx->passed_sample_intervals + + attrs->ops_update_interval / sample_interval; + damon_update_monitoring_results(ctx, attrs); ctx->attrs = *attrs; return 0; @@ -732,38 +742,6 @@ int damon_stop(struct damon_ctx **ctxs, int nr_ctxs) return err; } -/* - * damon_check_reset_time_interval() - Check if a time interval is elapsed. - * @baseline: the time to check whether the interval has elapsed since - * @interval: the time interval (microseconds) - * - * See whether the given time interval has passed since the given baseline - * time. If so, it also updates the baseline to current time for next check. - * - * Return: true if the time interval has passed, or false otherwise. - */ -static bool damon_check_reset_time_interval(struct timespec64 *baseline, - unsigned long interval) -{ - struct timespec64 now; - - ktime_get_coarse_ts64(&now); - if ((timespec64_to_ns(&now) - timespec64_to_ns(baseline)) < - interval * 1000) - return false; - *baseline = now; - return true; -} - -/* - * Check whether it is time to flush the aggregated information - */ -static bool kdamond_aggregate_interval_passed(struct damon_ctx *ctx) -{ - return damon_check_reset_time_interval(&ctx->last_aggregation, - ctx->attrs.aggr_interval); -} - /* * Reset the aggregated monitoring results ('nr_accesses' of each region). */ @@ -1274,18 +1252,6 @@ static void kdamond_split_regions(struct damon_ctx *ctx) last_nr_regions = nr_regions; } -/* - * Check whether it is time to check and apply the operations-related data - * structures. - * - * Returns true if it is. - */ -static bool kdamond_need_update_operations(struct damon_ctx *ctx) -{ - return damon_check_reset_time_interval(&ctx->last_ops_update, - ctx->attrs.ops_update_interval); -} - /* * Check whether current monitoring should be stopped * @@ -1397,6 +1363,17 @@ static int kdamond_wait_activation(struct damon_ctx *ctx) return -EBUSY; } +static void kdamond_init_intervals_sis(struct damon_ctx *ctx) +{ + unsigned long sample_interval = ctx->attrs.sample_interval ? + ctx->attrs.sample_interval : 1; + + ctx->passed_sample_intervals = 0; + ctx->next_aggregation_sis = ctx->attrs.aggr_interval / sample_interval; + ctx->next_ops_update_sis = ctx->attrs.ops_update_interval / + sample_interval; +} + /* * The monitoring daemon that runs as a kernel thread */ @@ -1410,6 +1387,8 @@ static int kdamond_fn(void *data) pr_debug("kdamond (%d) starts\n", current->pid); + kdamond_init_intervals_sis(ctx); + if (ctx->ops.init) ctx->ops.init(ctx); if (ctx->callback.before_start && ctx->callback.before_start(ctx)) @@ -1418,6 +1397,17 @@ static int kdamond_fn(void *data) sz_limit = damon_region_sz_limit(ctx); while (!kdamond_need_stop(ctx)) { + /* + * ctx->attrs and ctx->next_{aggregation,ops_update}_sis could + * be changed from after_wmarks_check() or after_aggregation() + * callbacks. Read the values here, and use those for this + * iteration. That is, damon_set_attrs() updated new values + * are respected from next iteration. + */ + unsigned long next_aggregation_sis = ctx->next_aggregation_sis; + unsigned long next_ops_update_sis = ctx->next_ops_update_sis; + unsigned long sample_interval = ctx->attrs.sample_interval; + if (kdamond_wait_activation(ctx)) break; @@ -1427,12 +1417,17 @@ static int kdamond_fn(void *data) ctx->callback.after_sampling(ctx)) break; - kdamond_usleep(ctx->attrs.sample_interval); + kdamond_usleep(sample_interval); + ctx->passed_sample_intervals++; if (ctx->ops.check_accesses) max_nr_accesses = ctx->ops.check_accesses(ctx); - if (kdamond_aggregate_interval_passed(ctx)) { + sample_interval = ctx->attrs.sample_interval ? + ctx->attrs.sample_interval : 1; + if (ctx->passed_sample_intervals == next_aggregation_sis) { + ctx->next_aggregation_sis = next_aggregation_sis + + ctx->attrs.aggr_interval / sample_interval; kdamond_merge_regions(ctx, max_nr_accesses / 10, sz_limit); @@ -1447,7 +1442,10 @@ static int kdamond_fn(void *data) ctx->ops.reset_aggregated(ctx); } - if (kdamond_need_update_operations(ctx)) { + if (ctx->passed_sample_intervals == next_ops_update_sis) { + ctx->next_ops_update_sis = next_ops_update_sis + + ctx->attrs.ops_update_interval / + sample_interval; if (ctx->ops.update) ctx->ops.update(ctx); sz_limit = damon_region_sz_limit(ctx); -- cgit v1.2.3 From 78fbfb155d204428119310d1b9df665ab88da6e8 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Fri, 15 Sep 2023 02:52:44 +0000 Subject: mm/damon/core: define and use a dedicated function for region access rate update Patch series "mm/damon: provide pseudo-moving sum based access rate". DAMON checks the access to each region for every sampling interval, increase the access rate counter of the region, namely nr_accesses, if the access was made. For every aggregation interval, the counter is reset. The counter is exposed to users to be used as a metric showing the relative access rate (frequency) of each region. In other words, DAMON provides access rate of each region in every aggregation interval. The aggregation avoids temporal access pattern changes making things confusing. However, this also makes a few DAMON-related operations to unnecessarily need to be aligned to the aggregation interval. This can restrict the flexibility of DAMON applications, especially when the aggregation interval is huge. To provide the monitoring results in finer-grained timing while keeping handling of temporal access pattern change, this patchset implements a pseudo-moving sum based access rate metric. It is pseudo-moving sum because strict moving sum implementation would need to keep all values for last time window, and that could incur high overhead of there could be arbitrary number of values in a time window. Especially in case of the nr_accesses, since the sampling interval and aggregation interval can arbitrarily set and the past values should be maintained for every region, it could be risky. The pseudo-moving sum assumes there were no temporal access pattern change in last discrete time window to remove the needs for keeping the list of the last time window values. As a result, it beocmes not strict moving sum implementation, but provides a reasonable accuracy. Also, it keeps an important property of the moving sum. That is, the moving sum becomes same to discrete-window based sum at the time that aligns to the time window. This means using the pseudo moving sum based nr_accesses makes no change to users who shows the value for every aggregation interval. Patches Sequence ---------------- The sequence of the patches is as follows. The first four patches are for preparation of the change. The first two (patches 1 and 2) implements a helper function for nr_accesses update and eliminate corner case that skips use of the function, respectively. Following two (patches 3 and 4) respectively implement the pseudo-moving sum function and its simple unit test case. Two patches for making DAMON to use the pseudo-moving sum follow. The fifthe one (patch 5) introduces a new field for representing the pseudo-moving sum-based access rate of each region, and the sixth one makes the new representation to actually updated with the pseudo-moving sum function. Last two patches (patches 7 and 8) makes followup fixes for skipping unnecessary updates and marking the moving sum function as static, respectively. This patch (of 8): Each DAMON operarions set is updating nr_accesses field of each damon_region for each of their access check results, from the check_accesses() callback. Directly accessing the field could make things complex to manage and change in future. Define and use a dedicated function for the purpose. Link: https://lkml.kernel.org/r/20230915025251.72816-1-sj@kernel.org Link: https://lkml.kernel.org/r/20230915025251.72816-2-sj@kernel.org Signed-off-by: SeongJae Park Cc: Brendan Higgins Signed-off-by: Andrew Morton --- include/linux/damon.h | 5 ++++- mm/damon/core.c | 16 ++++++++++++++++ mm/damon/paddr.c | 6 ++---- mm/damon/vaddr.c | 6 ++---- 4 files changed, 24 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/damon.h b/include/linux/damon.h index 9a32b8fd0bd3..17c504d236b9 100644 --- a/include/linux/damon.h +++ b/include/linux/damon.h @@ -45,7 +45,9 @@ struct damon_addr_range { * * @nr_accesses is reset to zero for every &damon_attrs->aggr_interval and be * increased for every &damon_attrs->sample_interval if an access to the region - * during the last sampling interval is found. + * during the last sampling interval is found. The update of this field should + * not be done with direct access but with the helper function, + * damon_update_region_access_rate(). * * @age is initially zero, increased for each aggregation interval, and reset * to zero again if the access frequency is significantly changed. If two @@ -620,6 +622,7 @@ void damon_add_region(struct damon_region *r, struct damon_target *t); void damon_destroy_region(struct damon_region *r, struct damon_target *t); int damon_set_regions(struct damon_target *t, struct damon_addr_range *ranges, unsigned int nr_ranges); +void damon_update_region_access_rate(struct damon_region *r, bool accessed); struct damos_filter *damos_new_filter(enum damos_filter_type type, bool matching); diff --git a/mm/damon/core.c b/mm/damon/core.c index c5b7296c69a0..10532159323a 100644 --- a/mm/damon/core.c +++ b/mm/damon/core.c @@ -1549,6 +1549,22 @@ int damon_set_region_biggest_system_ram_default(struct damon_target *t, return damon_set_regions(t, &addr_range, 1); } +/** + * damon_update_region_access_rate() - Update the access rate of a region. + * @r: The DAMON region to update for its access check result. + * @accessed: Whether the region has accessed during last sampling interval. + * + * Update the access rate of a region with the region's last sampling interval + * access check result. + * + * Usually this will be called by &damon_operations->check_accesses callback. + */ +void damon_update_region_access_rate(struct damon_region *r, bool accessed) +{ + if (accessed) + r->nr_accesses++; +} + static int __init damon_init(void) { damon_region_cache = KMEM_CACHE(damon_region, 0); diff --git a/mm/damon/paddr.c b/mm/damon/paddr.c index 909db25efb35..44f21860b555 100644 --- a/mm/damon/paddr.c +++ b/mm/damon/paddr.c @@ -157,14 +157,12 @@ static void __damon_pa_check_access(struct damon_region *r) /* If the region is in the last checked page, reuse the result */ if (ALIGN_DOWN(last_addr, last_folio_sz) == ALIGN_DOWN(r->sampling_addr, last_folio_sz)) { - if (last_accessed) - r->nr_accesses++; + damon_update_region_access_rate(r, last_accessed); return; } last_accessed = damon_pa_young(r->sampling_addr, &last_folio_sz); - if (last_accessed) - r->nr_accesses++; + damon_update_region_access_rate(r, last_accessed); last_addr = r->sampling_addr; } diff --git a/mm/damon/vaddr.c b/mm/damon/vaddr.c index cf8a9fc5c9d1..53371bbec605 100644 --- a/mm/damon/vaddr.c +++ b/mm/damon/vaddr.c @@ -567,14 +567,12 @@ static void __damon_va_check_access(struct mm_struct *mm, /* If the region is in the last checked page, reuse the result */ if (same_target && (ALIGN_DOWN(last_addr, last_folio_sz) == ALIGN_DOWN(r->sampling_addr, last_folio_sz))) { - if (last_accessed) - r->nr_accesses++; + damon_update_region_access_rate(r, last_accessed); return; } last_accessed = damon_va_young(mm, r->sampling_addr, &last_folio_sz); - if (last_accessed) - r->nr_accesses++; + damon_update_region_access_rate(r, last_accessed); last_addr = r->sampling_addr; } -- cgit v1.2.3 From d2c062ade07ffd206dd16bf085f02abc59651309 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Fri, 15 Sep 2023 02:52:46 +0000 Subject: mm/damon/core: implement a pseudo-moving sum function For values that continuously change, moving average or sum are good ways to provide fast updates while handling temporal and errorneous variability of the value. For example, the access rate counter (nr_accesses) is calculated as a sum of the number of positive sampled access check results that collected during a discrete time window (aggregation interval), and hence it handles temporal and errorneous access check results, but provides the update only for every aggregation interval. Using a moving sum method for that could allow providing the value for every sampling interval. That could be useful for getting monitoring results snapshot or running DAMOS in fine-grained timing. However, supporting the moving sum for cases that number of samples in the time window is arbirary could impose high overhead, since the number of past values that it needs to keep could be too high. The nr_accesses would also be one of the cases. To mitigate the overhead, implement a pseudo-moving sum function that only provides an estimated pseudo-moving sum. It assumes there was no error in last discrete time window and subtract constant portion of last discrete time window sum. Note that the function is not strictly implementing the moving sum, but it keeps a property of moving sum, which makes the value same to the dsicrete-window based sum for each time window-aligned timing. Hence, people collecting the value in the old timings would show no difference. Link: https://lkml.kernel.org/r/20230915025251.72816-4-sj@kernel.org Signed-off-by: SeongJae Park Cc: Brendan Higgins Signed-off-by: Andrew Morton --- include/linux/damon.h | 2 ++ mm/damon/core.c | 40 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 42 insertions(+) (limited to 'include/linux') diff --git a/include/linux/damon.h b/include/linux/damon.h index 17c504d236b9..487a545a11b4 100644 --- a/include/linux/damon.h +++ b/include/linux/damon.h @@ -622,6 +622,8 @@ void damon_add_region(struct damon_region *r, struct damon_target *t); void damon_destroy_region(struct damon_region *r, struct damon_target *t); int damon_set_regions(struct damon_target *t, struct damon_addr_range *ranges, unsigned int nr_ranges); +unsigned int damon_moving_sum(unsigned int mvsum, unsigned int nomvsum, + unsigned int len_window, unsigned int new_value); void damon_update_region_access_rate(struct damon_region *r, bool accessed); struct damos_filter *damos_new_filter(enum damos_filter_type type, diff --git a/mm/damon/core.c b/mm/damon/core.c index 10532159323a..b005dc15009f 100644 --- a/mm/damon/core.c +++ b/mm/damon/core.c @@ -1549,6 +1549,46 @@ int damon_set_region_biggest_system_ram_default(struct damon_target *t, return damon_set_regions(t, &addr_range, 1); } +/* + * damon_moving_sum() - Calculate an inferred moving sum value. + * @mvsum: Inferred sum of the last @len_window values. + * @nomvsum: Non-moving sum of the last discrete @len_window window values. + * @len_window: The number of last values to take care of. + * @new_value: New value that will be added to the pseudo moving sum. + * + * Moving sum (moving average * window size) is good for handling noise, but + * the cost of keeping past values can be high for arbitrary window size. This + * function implements a lightweight pseudo moving sum function that doesn't + * keep the past window values. + * + * It simply assumes there was no noise in the past, and get the no-noise + * assumed past value to drop from @nomvsum and @len_window. @nomvsum is a + * non-moving sum of the last window. For example, if @len_window is 10 and we + * have 25 values, @nomvsum is the sum of the 11th to 20th values of the 25 + * values. Hence, this function simply drops @nomvsum / @len_window from + * given @mvsum and add @new_value. + * + * For example, if @len_window is 10 and @nomvsum is 50, the last 10 values for + * the last window could be vary, e.g., 0, 10, 0, 10, 0, 10, 0, 0, 0, 20. For + * calculating next moving sum with a new value, we should drop 0 from 50 and + * add the new value. However, this function assumes it got value 5 for each + * of the last ten times. Based on the assumption, when the next value is + * measured, it drops the assumed past value, 5 from the current sum, and add + * the new value to get the updated pseduo-moving average. + * + * This means the value could have errors, but the errors will be disappeared + * for every @len_window aligned calls. For example, if @len_window is 10, the + * pseudo moving sum with 11th value to 19th value would have an error. But + * the sum with 20th value will not have the error. + * + * Return: Pseudo-moving average after getting the @new_value. + */ +unsigned int damon_moving_sum(unsigned int mvsum, unsigned int nomvsum, + unsigned int len_window, unsigned int new_value) +{ + return mvsum - nomvsum / len_window + new_value; +} + /** * damon_update_region_access_rate() - Update the access rate of a region. * @r: The DAMON region to update for its access check result. -- cgit v1.2.3 From 80333828ea7728ebe85d079bb5c1467eb9fc6c8c Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Fri, 15 Sep 2023 02:52:48 +0000 Subject: mm/damon/core: introduce nr_accesses_bp Add yet another representation of the access rate of each region, namely nr_accesses_bp. It is just same to the nr_accesses but represents the value in basis point (1 in 10,000), and updated at once in every aggregation interval. That is, moving_accesses_bp is just nr_accesses * 10000. This may seems useless at the moment. However, it will be useful for representing less than one nr_accesses value that will be needed to make moving sum-based nr_accesses. Link: https://lkml.kernel.org/r/20230915025251.72816-6-sj@kernel.org Signed-off-by: SeongJae Park Cc: Brendan Higgins Signed-off-by: Andrew Morton --- include/linux/damon.h | 5 +++++ mm/damon/core-test.h | 5 +++++ mm/damon/core.c | 6 ++++++ 3 files changed, 16 insertions(+) (limited to 'include/linux') diff --git a/include/linux/damon.h b/include/linux/damon.h index 487a545a11b4..15f24b23c9a0 100644 --- a/include/linux/damon.h +++ b/include/linux/damon.h @@ -40,6 +40,7 @@ struct damon_addr_range { * @ar: The address range of the region. * @sampling_addr: Address of the sample for the next access check. * @nr_accesses: Access frequency of this region. + * @nr_accesses_bp: @nr_accesses in basis point (0.01%). * @list: List head for siblings. * @age: Age of this region. * @@ -49,6 +50,9 @@ struct damon_addr_range { * not be done with direct access but with the helper function, * damon_update_region_access_rate(). * + * @nr_accesses_bp is another representation of @nr_accesses in basis point + * (1 in 10,000) that updated every aggregation interval. + * * @age is initially zero, increased for each aggregation interval, and reset * to zero again if the access frequency is significantly changed. If two * regions are merged into a new region, both @nr_accesses and @age of the new @@ -58,6 +62,7 @@ struct damon_region { struct damon_addr_range ar; unsigned long sampling_addr; unsigned int nr_accesses; + unsigned int nr_accesses_bp; struct list_head list; unsigned int age; diff --git a/mm/damon/core-test.h b/mm/damon/core-test.h index c539f0e8377e..79f1f12e0dd5 100644 --- a/mm/damon/core-test.h +++ b/mm/damon/core-test.h @@ -94,6 +94,7 @@ static void damon_test_aggregate(struct kunit *test) for (ir = 0; ir < 3; ir++) { r = damon_new_region(saddr[it][ir], eaddr[it][ir]); r->nr_accesses = accesses[it][ir]; + r->nr_accesses_bp = accesses[it][ir] * 10000; damon_add_region(r, t); } it++; @@ -147,9 +148,11 @@ static void damon_test_merge_two(struct kunit *test) t = damon_new_target(); r = damon_new_region(0, 100); r->nr_accesses = 10; + r->nr_accesses_bp = 100000; damon_add_region(r, t); r2 = damon_new_region(100, 300); r2->nr_accesses = 20; + r2->nr_accesses_bp = 200000; damon_add_region(r2, t); damon_merge_two_regions(t, r, r2); @@ -196,6 +199,7 @@ static void damon_test_merge_regions_of(struct kunit *test) for (i = 0; i < ARRAY_SIZE(sa); i++) { r = damon_new_region(sa[i], ea[i]); r->nr_accesses = nrs[i]; + r->nr_accesses_bp = nrs[i] * 10000; damon_add_region(r, t); } @@ -297,6 +301,7 @@ static void damon_test_update_monitoring_result(struct kunit *test) struct damon_region *r = damon_new_region(3, 7); r->nr_accesses = 15; + r->nr_accesses_bp = 150000; r->age = 20; new_attrs = (struct damon_attrs){ diff --git a/mm/damon/core.c b/mm/damon/core.c index b005dc15009f..ce85c00b0a4c 100644 --- a/mm/damon/core.c +++ b/mm/damon/core.c @@ -128,6 +128,7 @@ struct damon_region *damon_new_region(unsigned long start, unsigned long end) region->ar.start = start; region->ar.end = end; region->nr_accesses = 0; + region->nr_accesses_bp = 0; INIT_LIST_HEAD(®ion->list); region->age = 0; @@ -508,6 +509,7 @@ static void damon_update_monitoring_result(struct damon_region *r, { r->nr_accesses = damon_nr_accesses_for_new_attrs(r->nr_accesses, old_attrs, new_attrs); + r->nr_accesses_bp = r->nr_accesses * 10000; r->age = damon_age_for_new_attrs(r->age, old_attrs, new_attrs); } @@ -1115,6 +1117,7 @@ static void damon_merge_two_regions(struct damon_target *t, l->nr_accesses = (l->nr_accesses * sz_l + r->nr_accesses * sz_r) / (sz_l + sz_r); + l->nr_accesses_bp = l->nr_accesses * 10000; l->age = (l->age * sz_l + r->age * sz_r) / (sz_l + sz_r); l->ar.end = r->ar.end; damon_destroy_region(r, t); @@ -1138,6 +1141,8 @@ static void damon_merge_regions_of(struct damon_target *t, unsigned int thres, else r->age++; + r->nr_accesses_bp = r->nr_accesses * 10000; + if (prev && prev->ar.end == r->ar.start && abs(prev->nr_accesses - r->nr_accesses) <= thres && damon_sz_region(prev) + damon_sz_region(r) <= sz_limit) @@ -1186,6 +1191,7 @@ static void damon_split_region_at(struct damon_target *t, new->age = r->age; new->last_nr_accesses = r->last_nr_accesses; + new->nr_accesses_bp = r->nr_accesses_bp; damon_insert_region(new, r, damon_next_region(r), t); } -- cgit v1.2.3 From ace30fb21af5f1be1605db72c16040b95b1557ef Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Fri, 15 Sep 2023 02:52:49 +0000 Subject: mm/damon/core: use pseudo-moving sum for nr_accesses_bp Let nr_accesses_bp be calculated as a pseudo-moving sum that updated for every sampling interval, using damon_moving_sum(). This is assumed to be useful for cases that the aggregation interval is set quite huge, but the monivoting results need to be collected earlier than next aggregation interval is passed. Link: https://lkml.kernel.org/r/20230915025251.72816-7-sj@kernel.org Signed-off-by: SeongJae Park Cc: Brendan Higgins Signed-off-by: Andrew Morton --- include/linux/damon.h | 12 +++++++++--- mm/damon/core.c | 16 +++++++++++++++- mm/damon/paddr.c | 9 +++++---- mm/damon/vaddr.c | 12 +++++++----- 4 files changed, 36 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/damon.h b/include/linux/damon.h index 15f24b23c9a0..0fe13482df63 100644 --- a/include/linux/damon.h +++ b/include/linux/damon.h @@ -40,7 +40,8 @@ struct damon_addr_range { * @ar: The address range of the region. * @sampling_addr: Address of the sample for the next access check. * @nr_accesses: Access frequency of this region. - * @nr_accesses_bp: @nr_accesses in basis point (0.01%). + * @nr_accesses_bp: @nr_accesses in basis point (0.01%) that updated for + * each sampling interval. * @list: List head for siblings. * @age: Age of this region. * @@ -51,7 +52,11 @@ struct damon_addr_range { * damon_update_region_access_rate(). * * @nr_accesses_bp is another representation of @nr_accesses in basis point - * (1 in 10,000) that updated every aggregation interval. + * (1 in 10,000) that updated for every &damon_attrs->sample_interval in a + * manner similar to moving sum. By the algorithm, this value becomes + * @nr_accesses * 10000 for every &struct damon_attrs->aggr_interval. This can + * be used when the aggregation interval is too huge and therefore cannot wait + * for it before getting the access monitoring results. * * @age is initially zero, increased for each aggregation interval, and reset * to zero again if the access frequency is significantly changed. If two @@ -629,7 +634,8 @@ int damon_set_regions(struct damon_target *t, struct damon_addr_range *ranges, unsigned int nr_ranges); unsigned int damon_moving_sum(unsigned int mvsum, unsigned int nomvsum, unsigned int len_window, unsigned int new_value); -void damon_update_region_access_rate(struct damon_region *r, bool accessed); +void damon_update_region_access_rate(struct damon_region *r, bool accessed, + struct damon_attrs *attrs); struct damos_filter *damos_new_filter(enum damos_filter_type type, bool matching); diff --git a/mm/damon/core.c b/mm/damon/core.c index ce85c00b0a4c..29ee1fc18393 100644 --- a/mm/damon/core.c +++ b/mm/damon/core.c @@ -1599,14 +1599,28 @@ unsigned int damon_moving_sum(unsigned int mvsum, unsigned int nomvsum, * damon_update_region_access_rate() - Update the access rate of a region. * @r: The DAMON region to update for its access check result. * @accessed: Whether the region has accessed during last sampling interval. + * @attrs: The damon_attrs of the DAMON context. * * Update the access rate of a region with the region's last sampling interval * access check result. * * Usually this will be called by &damon_operations->check_accesses callback. */ -void damon_update_region_access_rate(struct damon_region *r, bool accessed) +void damon_update_region_access_rate(struct damon_region *r, bool accessed, + struct damon_attrs *attrs) { + unsigned int len_window = 1; + + /* + * sample_interval can be zero, but cannot be larger than + * aggr_interval, owing to validation of damon_set_attrs(). + */ + if (attrs->sample_interval) + len_window = attrs->aggr_interval / attrs->sample_interval; + r->nr_accesses_bp = damon_moving_sum(r->nr_accesses_bp, + r->last_nr_accesses * 10000, len_window, + accessed ? 10000 : 0); + if (accessed) r->nr_accesses++; } diff --git a/mm/damon/paddr.c b/mm/damon/paddr.c index 44f21860b555..081e2a325778 100644 --- a/mm/damon/paddr.c +++ b/mm/damon/paddr.c @@ -148,7 +148,8 @@ out: return accessed; } -static void __damon_pa_check_access(struct damon_region *r) +static void __damon_pa_check_access(struct damon_region *r, + struct damon_attrs *attrs) { static unsigned long last_addr; static unsigned long last_folio_sz = PAGE_SIZE; @@ -157,12 +158,12 @@ static void __damon_pa_check_access(struct damon_region *r) /* If the region is in the last checked page, reuse the result */ if (ALIGN_DOWN(last_addr, last_folio_sz) == ALIGN_DOWN(r->sampling_addr, last_folio_sz)) { - damon_update_region_access_rate(r, last_accessed); + damon_update_region_access_rate(r, last_accessed, attrs); return; } last_accessed = damon_pa_young(r->sampling_addr, &last_folio_sz); - damon_update_region_access_rate(r, last_accessed); + damon_update_region_access_rate(r, last_accessed, attrs); last_addr = r->sampling_addr; } @@ -175,7 +176,7 @@ static unsigned int damon_pa_check_accesses(struct damon_ctx *ctx) damon_for_each_target(t, ctx) { damon_for_each_region(r, t) { - __damon_pa_check_access(r); + __damon_pa_check_access(r, &ctx->attrs); max_nr_accesses = max(r->nr_accesses, max_nr_accesses); } } diff --git a/mm/damon/vaddr.c b/mm/damon/vaddr.c index 02ab448d9b1e..a4d1f63c5b23 100644 --- a/mm/damon/vaddr.c +++ b/mm/damon/vaddr.c @@ -558,26 +558,27 @@ static bool damon_va_young(struct mm_struct *mm, unsigned long addr, * r the region to be checked */ static void __damon_va_check_access(struct mm_struct *mm, - struct damon_region *r, bool same_target) + struct damon_region *r, bool same_target, + struct damon_attrs *attrs) { static unsigned long last_addr; static unsigned long last_folio_sz = PAGE_SIZE; static bool last_accessed; if (!mm) { - damon_update_region_access_rate(r, false); + damon_update_region_access_rate(r, false, attrs); return; } /* If the region is in the last checked page, reuse the result */ if (same_target && (ALIGN_DOWN(last_addr, last_folio_sz) == ALIGN_DOWN(r->sampling_addr, last_folio_sz))) { - damon_update_region_access_rate(r, last_accessed); + damon_update_region_access_rate(r, last_accessed, attrs); return; } last_accessed = damon_va_young(mm, r->sampling_addr, &last_folio_sz); - damon_update_region_access_rate(r, last_accessed); + damon_update_region_access_rate(r, last_accessed, attrs); last_addr = r->sampling_addr; } @@ -594,7 +595,8 @@ static unsigned int damon_va_check_accesses(struct damon_ctx *ctx) mm = damon_get_mm(t); same_target = false; damon_for_each_region(r, t) { - __damon_va_check_access(mm, r, same_target); + __damon_va_check_access(mm, r, same_target, + &ctx->attrs); max_nr_accesses = max(r->nr_accesses, max_nr_accesses); same_target = true; } -- cgit v1.2.3 From 863803a7948c8e33e6a7b002017747ca83ecfd63 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Fri, 15 Sep 2023 02:52:51 +0000 Subject: mm/damon/core: mark damon_moving_sum() as a static function The function is used by only mm/damon/core.c. Mark it as a static function. Link: https://lkml.kernel.org/r/20230915025251.72816-9-sj@kernel.org Signed-off-by: SeongJae Park Cc: Brendan Higgins Signed-off-by: Andrew Morton --- include/linux/damon.h | 2 -- mm/damon/core.c | 2 +- 2 files changed, 1 insertion(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/damon.h b/include/linux/damon.h index 0fe13482df63..491fdd3e4c76 100644 --- a/include/linux/damon.h +++ b/include/linux/damon.h @@ -632,8 +632,6 @@ void damon_add_region(struct damon_region *r, struct damon_target *t); void damon_destroy_region(struct damon_region *r, struct damon_target *t); int damon_set_regions(struct damon_target *t, struct damon_addr_range *ranges, unsigned int nr_ranges); -unsigned int damon_moving_sum(unsigned int mvsum, unsigned int nomvsum, - unsigned int len_window, unsigned int new_value); void damon_update_region_access_rate(struct damon_region *r, bool accessed, struct damon_attrs *attrs); diff --git a/mm/damon/core.c b/mm/damon/core.c index 45cc108c0fe1..b15cf47d2d29 100644 --- a/mm/damon/core.c +++ b/mm/damon/core.c @@ -1587,7 +1587,7 @@ int damon_set_region_biggest_system_ram_default(struct damon_target *t, * * Return: Pseudo-moving average after getting the @new_value. */ -unsigned int damon_moving_sum(unsigned int mvsum, unsigned int nomvsum, +static unsigned int damon_moving_sum(unsigned int mvsum, unsigned int nomvsum, unsigned int len_window, unsigned int new_value) { return mvsum - nomvsum / len_window + new_value; -- cgit v1.2.3 From 77e6c43e137c130138c3fbadc847351a83c4befe Mon Sep 17 00:00:00 2001 From: Usama Arif Date: Wed, 13 Sep 2023 11:54:00 +0100 Subject: memblock: introduce MEMBLOCK_RSRV_NOINIT flag For reserved memory regions marked with this flag, reserve_bootmem_region is not called during memmap_init_reserved_pages. This can be used to avoid struct page initialization for regions which won't need them, for e.g. hugepages with Hugepage Vmemmap Optimization enabled. Link: https://lkml.kernel.org/r/20230913105401.519709-4-usama.arif@bytedance.com Signed-off-by: Usama Arif Acked-by: Muchun Song Reviewed-by: Mike Rapoport (IBM) Cc: Fam Zheng Cc: Mike Kravetz Cc: Punit Agrawal Signed-off-by: Andrew Morton --- include/linux/memblock.h | 9 +++++++++ mm/memblock.c | 33 ++++++++++++++++++++++++++++----- 2 files changed, 37 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 1c1072e3ca06..ae3bde302f70 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -40,6 +40,8 @@ extern unsigned long long max_possible_pfn; * via a driver, and never indicated in the firmware-provided memory map as * system RAM. This corresponds to IORESOURCE_SYSRAM_DRIVER_MANAGED in the * kernel resource tree. + * @MEMBLOCK_RSRV_NOINIT: memory region for which struct pages are + * not initialized (only for reserved regions). */ enum memblock_flags { MEMBLOCK_NONE = 0x0, /* No special request */ @@ -47,6 +49,7 @@ enum memblock_flags { MEMBLOCK_MIRROR = 0x2, /* mirrored region */ MEMBLOCK_NOMAP = 0x4, /* don't add to kernel direct mapping */ MEMBLOCK_DRIVER_MANAGED = 0x8, /* always detected via a driver */ + MEMBLOCK_RSRV_NOINIT = 0x10, /* don't initialize struct pages */ }; /** @@ -125,6 +128,7 @@ int memblock_clear_hotplug(phys_addr_t base, phys_addr_t size); int memblock_mark_mirror(phys_addr_t base, phys_addr_t size); int memblock_mark_nomap(phys_addr_t base, phys_addr_t size); int memblock_clear_nomap(phys_addr_t base, phys_addr_t size); +int memblock_reserved_mark_noinit(phys_addr_t base, phys_addr_t size); void memblock_free_all(void); void memblock_free(void *ptr, size_t size); @@ -259,6 +263,11 @@ static inline bool memblock_is_nomap(struct memblock_region *m) return m->flags & MEMBLOCK_NOMAP; } +static inline bool memblock_is_reserved_noinit(struct memblock_region *m) +{ + return m->flags & MEMBLOCK_RSRV_NOINIT; +} + static inline bool memblock_is_driver_managed(struct memblock_region *m) { return m->flags & MEMBLOCK_DRIVER_MANAGED; diff --git a/mm/memblock.c b/mm/memblock.c index b978cda96cf0..fd492e5bbdbc 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -997,6 +997,24 @@ int __init_memblock memblock_clear_nomap(phys_addr_t base, phys_addr_t size) return memblock_setclr_flag(&memblock.memory, base, size, 0, MEMBLOCK_NOMAP); } +/** + * memblock_reserved_mark_noinit - Mark a reserved memory region with flag + * MEMBLOCK_RSRV_NOINIT which results in the struct pages not being initialized + * for this region. + * @base: the base phys addr of the region + * @size: the size of the region + * + * struct pages will not be initialized for reserved memory regions marked with + * %MEMBLOCK_RSRV_NOINIT. + * + * Return: 0 on success, -errno on failure. + */ +int __init_memblock memblock_reserved_mark_noinit(phys_addr_t base, phys_addr_t size) +{ + return memblock_setclr_flag(&memblock.reserved, base, size, 1, + MEMBLOCK_RSRV_NOINIT); +} + static bool should_skip_region(struct memblock_type *type, struct memblock_region *m, int nid, int flags) @@ -2113,13 +2131,18 @@ static void __init memmap_init_reserved_pages(void) memblock_set_node(start, end, &memblock.reserved, nid); } - /* initialize struct pages for the reserved regions */ + /* + * initialize struct pages for reserved regions that don't have + * the MEMBLOCK_RSRV_NOINIT flag set + */ for_each_reserved_mem_region(region) { - nid = memblock_get_region_node(region); - start = region->base; - end = start + region->size; + if (!memblock_is_reserved_noinit(region)) { + nid = memblock_get_region_node(region); + start = region->base; + end = start + region->size; - reserve_bootmem_region(start, end, nid); + reserve_bootmem_region(start, end, nid); + } } } -- cgit v1.2.3 From 42f994b71404b17abcd6b170de7a6aa95ffe5d4a Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Sat, 16 Sep 2023 02:09:40 +0000 Subject: mm/damon/core: implement scheme-specific apply interval DAMON-based operation schemes are applied for every aggregation interval. That was mainly because schemes were using nr_accesses, which be complete to be used for every aggregation interval. However, the schemes are now using nr_accesses_bp, which is updated for each sampling interval in a way that reasonable to be used. Therefore, there is no reason to apply schemes for each aggregation interval. The unnecessary alignment with aggregation interval was also making some use cases of DAMOS tricky. Quotas setting under long aggregation interval is one such example. Suppose the aggregation interval is ten seconds, and there is a scheme having CPU quota 100ms per 1s. The scheme will actually uses 100ms per ten seconds, since it cannobe be applied before next aggregation interval. The feature is working as intended, but the results might not that intuitive for some users. This could be fixed by updating the quota to 1s per 10s. But, in the case, the CPU usage of DAMOS could look like spikes, and would actually make a bad effect to other CPU-sensitive workloads. Implement a dedicated timing interval for each DAMON-based operation scheme, namely apply_interval. The interval will be sampling interval aligned, and each scheme will be applied for its apply_interval. The interval is set to 0 by default, and it means the scheme should use the aggregation interval instead. This avoids old users getting any behavioral difference. Link: https://lkml.kernel.org/r/20230916020945.47296-5-sj@kernel.org Signed-off-by: SeongJae Park Cc: Jonathan Corbet Cc: Shuah Khan Cc: Steven Rostedt (Google) Signed-off-by: Andrew Morton --- include/linux/damon.h | 17 ++++++++++-- mm/damon/core.c | 72 +++++++++++++++++++++++++++++++++++++++++++----- mm/damon/dbgfs.c | 3 +- mm/damon/lru_sort.c | 2 ++ mm/damon/reclaim.c | 2 ++ mm/damon/sysfs-schemes.c | 2 +- 6 files changed, 87 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/damon.h b/include/linux/damon.h index 491fdd3e4c76..27b995c22497 100644 --- a/include/linux/damon.h +++ b/include/linux/damon.h @@ -314,16 +314,19 @@ struct damos_access_pattern { * struct damos - Represents a Data Access Monitoring-based Operation Scheme. * @pattern: Access pattern of target regions. * @action: &damo_action to be applied to the target regions. + * @apply_interval_us: The time between applying the @action. * @quota: Control the aggressiveness of this scheme. * @wmarks: Watermarks for automated (in)activation of this scheme. * @filters: Additional set of &struct damos_filter for &action. * @stat: Statistics of this scheme. * @list: List head for siblings. * - * For each aggregation interval, DAMON finds regions which fit in the + * For each @apply_interval_us, DAMON finds regions which fit in the * &pattern and applies &action to those. To avoid consuming too much * CPU time or IO resources for the &action, "a is used. * + * If @apply_interval_us is zero, &damon_attrs->aggr_interval is used instead. + * * To do the work only when needed, schemes can be activated for specific * system situations using &wmarks. If all schemes that registered to the * monitoring context are inactive, DAMON stops monitoring either, and just @@ -340,6 +343,14 @@ struct damos_access_pattern { struct damos { struct damos_access_pattern pattern; enum damos_action action; + unsigned long apply_interval_us; +/* private: internal use only */ + /* + * number of sample intervals that should be passed before applying + * @action + */ + unsigned long next_apply_sis; +/* public: */ struct damos_quota quota; struct damos_watermarks wmarks; struct list_head filters; @@ -641,7 +652,9 @@ void damos_add_filter(struct damos *s, struct damos_filter *f); void damos_destroy_filter(struct damos_filter *f); struct damos *damon_new_scheme(struct damos_access_pattern *pattern, - enum damos_action action, struct damos_quota *quota, + enum damos_action action, + unsigned long apply_interval_us, + struct damos_quota *quota, struct damos_watermarks *wmarks); void damon_add_scheme(struct damon_ctx *ctx, struct damos *s); void damon_destroy_scheme(struct damos *s); diff --git a/mm/damon/core.c b/mm/damon/core.c index 79fef5145a4b..5eb649bd002f 100644 --- a/mm/damon/core.c +++ b/mm/damon/core.c @@ -313,7 +313,9 @@ static struct damos_quota *damos_quota_init_priv(struct damos_quota *quota) } struct damos *damon_new_scheme(struct damos_access_pattern *pattern, - enum damos_action action, struct damos_quota *quota, + enum damos_action action, + unsigned long apply_interval_us, + struct damos_quota *quota, struct damos_watermarks *wmarks) { struct damos *scheme; @@ -323,6 +325,13 @@ struct damos *damon_new_scheme(struct damos_access_pattern *pattern, return NULL; scheme->pattern = *pattern; scheme->action = action; + scheme->apply_interval_us = apply_interval_us; + /* + * next_apply_sis will be set when kdamond starts. While kdamond is + * running, it will also updated when it is added to the DAMON context, + * or damon_attrs are updated. + */ + scheme->next_apply_sis = 0; INIT_LIST_HEAD(&scheme->filters); scheme->stat = (struct damos_stat){}; INIT_LIST_HEAD(&scheme->list); @@ -335,9 +344,21 @@ struct damos *damon_new_scheme(struct damos_access_pattern *pattern, return scheme; } +static void damos_set_next_apply_sis(struct damos *s, struct damon_ctx *ctx) +{ + unsigned long sample_interval = ctx->attrs.sample_interval ? + ctx->attrs.sample_interval : 1; + unsigned long apply_interval = s->apply_interval_us ? + s->apply_interval_us : ctx->attrs.aggr_interval; + + s->next_apply_sis = ctx->passed_sample_intervals + + apply_interval / sample_interval; +} + void damon_add_scheme(struct damon_ctx *ctx, struct damos *s) { list_add_tail(&s->list, &ctx->schemes); + damos_set_next_apply_sis(s, ctx); } static void damon_del_scheme(struct damos *s) @@ -558,6 +579,7 @@ int damon_set_attrs(struct damon_ctx *ctx, struct damon_attrs *attrs) { unsigned long sample_interval = attrs->sample_interval ? attrs->sample_interval : 1; + struct damos *s; if (attrs->min_nr_regions < 3) return -EINVAL; @@ -573,6 +595,10 @@ int damon_set_attrs(struct damon_ctx *ctx, struct damon_attrs *attrs) damon_update_monitoring_results(ctx, attrs); ctx->attrs = *attrs; + + damon_for_each_scheme(s, ctx) + damos_set_next_apply_sis(s, ctx); + return 0; } @@ -1094,14 +1120,29 @@ static void kdamond_apply_schemes(struct damon_ctx *c) struct damon_target *t; struct damon_region *r, *next_r; struct damos *s; + unsigned long sample_interval = c->attrs.sample_interval ? + c->attrs.sample_interval : 1; + bool has_schemes_to_apply = false; damon_for_each_scheme(s, c) { + if (c->passed_sample_intervals != s->next_apply_sis) + continue; + + s->next_apply_sis += + (s->apply_interval_us ? s->apply_interval_us : + c->attrs.aggr_interval) / sample_interval; + if (!s->wmarks.activated) continue; + has_schemes_to_apply = true; + damos_adjust_quota(c, s); } + if (!has_schemes_to_apply) + return; + damon_for_each_target(t, c) { damon_for_each_region_safe(r, next_r, t) damon_do_apply_schemes(c, t, r); @@ -1372,11 +1413,19 @@ static void kdamond_init_intervals_sis(struct damon_ctx *ctx) { unsigned long sample_interval = ctx->attrs.sample_interval ? ctx->attrs.sample_interval : 1; + unsigned long apply_interval; + struct damos *scheme; ctx->passed_sample_intervals = 0; ctx->next_aggregation_sis = ctx->attrs.aggr_interval / sample_interval; ctx->next_ops_update_sis = ctx->attrs.ops_update_interval / sample_interval; + + damon_for_each_scheme(scheme, ctx) { + apply_interval = scheme->apply_interval_us ? + scheme->apply_interval_us : ctx->attrs.aggr_interval; + scheme->next_apply_sis = apply_interval / sample_interval; + } } /* @@ -1428,19 +1477,28 @@ static int kdamond_fn(void *data) if (ctx->ops.check_accesses) max_nr_accesses = ctx->ops.check_accesses(ctx); - sample_interval = ctx->attrs.sample_interval ? - ctx->attrs.sample_interval : 1; if (ctx->passed_sample_intervals == next_aggregation_sis) { - ctx->next_aggregation_sis = next_aggregation_sis + - ctx->attrs.aggr_interval / sample_interval; kdamond_merge_regions(ctx, max_nr_accesses / 10, sz_limit); if (ctx->callback.after_aggregation && ctx->callback.after_aggregation(ctx)) break; - if (!list_empty(&ctx->schemes)) - kdamond_apply_schemes(ctx); + } + + /* + * do kdamond_apply_schemes() after kdamond_merge_regions() if + * possible, to reduce overhead + */ + if (!list_empty(&ctx->schemes)) + kdamond_apply_schemes(ctx); + + sample_interval = ctx->attrs.sample_interval ? + ctx->attrs.sample_interval : 1; + if (ctx->passed_sample_intervals == next_aggregation_sis) { + ctx->next_aggregation_sis = next_aggregation_sis + + ctx->attrs.aggr_interval / sample_interval; + kdamond_reset_aggregated(ctx); kdamond_split_regions(ctx); if (ctx->ops.reset_aggregated) diff --git a/mm/damon/dbgfs.c b/mm/damon/dbgfs.c index 124f0f8c97b7..dc0ea1fc30ca 100644 --- a/mm/damon/dbgfs.c +++ b/mm/damon/dbgfs.c @@ -278,7 +278,8 @@ static struct damos **str_to_schemes(const char *str, ssize_t len, goto fail; pos += parsed; - scheme = damon_new_scheme(&pattern, action, "a, &wmarks); + scheme = damon_new_scheme(&pattern, action, 0, "a, + &wmarks); if (!scheme) goto fail; diff --git a/mm/damon/lru_sort.c b/mm/damon/lru_sort.c index 7b8fce2f67a8..3ecdcc029443 100644 --- a/mm/damon/lru_sort.c +++ b/mm/damon/lru_sort.c @@ -158,6 +158,8 @@ static struct damos *damon_lru_sort_new_scheme( pattern, /* (de)prioritize on LRU-lists */ action, + /* for each aggregation interval */ + 0, /* under the quota. */ "a, /* (De)activate this according to the watermarks. */ diff --git a/mm/damon/reclaim.c b/mm/damon/reclaim.c index 648d2a85523a..ab974e477d2f 100644 --- a/mm/damon/reclaim.c +++ b/mm/damon/reclaim.c @@ -142,6 +142,8 @@ static struct damos *damon_reclaim_new_scheme(void) &pattern, /* page out those, as soon as found */ DAMOS_PAGEOUT, + /* for each aggregation interval */ + 0, /* under the quota. */ &damon_reclaim_quota, /* (De)activate this according to the watermarks. */ diff --git a/mm/damon/sysfs-schemes.c b/mm/damon/sysfs-schemes.c index 093700f50b18..3d30e85596b0 100644 --- a/mm/damon/sysfs-schemes.c +++ b/mm/damon/sysfs-schemes.c @@ -1610,7 +1610,7 @@ static struct damos *damon_sysfs_mk_scheme( .low = sysfs_wmarks->low, }; - scheme = damon_new_scheme(&pattern, sysfs_scheme->action, "a, + scheme = damon_new_scheme(&pattern, sysfs_scheme->action, 0, "a, &wmarks); if (!scheme) return NULL; -- cgit v1.2.3 From a8306f2d4dcea03538c70c26d2948483f70254ff Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Thu, 31 Aug 2023 09:33:40 -0700 Subject: compiler.h: unify __UNIQUE_ID commit 6f33d58794ef ("__UNIQUE_ID()") added a fallback definition of __UNIQUE_ID because gcc 4.2 and older did not support __COUNTER__. Also, this commit is effectively a revert of commit b41c29b0527c ("Kbuild: provide a __UNIQUE_ID for clang") which mentions clang 2.6+ supporting __COUNTER__. Documentation/process/changes.rst currently lists the minimum supported version of these compilers as: - gcc: 5.1 - clang: 11.0.0 It should be safe to say that __COUNTER__ is well supported by this point. Link: https://lkml.kernel.org/r/20230831-unique_id-v1-1-28bacd18eb1d@google.com Signed-off-by: Nick Desaulniers Cc: Arnd Bergmann Cc: Jan Beulich Cc: Luc Van Oostenryck Cc: Michal rarek Cc: Nathan Chancellor Cc: Paul Russel Cc: Tom Rix Signed-off-by: Andrew Morton --- include/linux/compiler-clang.h | 5 ----- include/linux/compiler-gcc.h | 2 -- include/linux/compiler.h | 5 +---- 3 files changed, 1 insertion(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h index 9b673fefcef8..ddab1ef22bee 100644 --- a/include/linux/compiler-clang.h +++ b/include/linux/compiler-clang.h @@ -14,11 +14,6 @@ #undef __cleanup #define __cleanup(func) __maybe_unused __attribute__((__cleanup__(func))) -/* same as gcc, this was present in clang-2.6 so we can assume it works - * with any version that can compile the kernel - */ -#define __UNIQUE_ID(prefix) __PASTE(__PASTE(__UNIQUE_ID_, prefix), __COUNTER__) - /* all clang versions usable with the kernel support KASAN ABI version 5 */ #define KASAN_ABI_VERSION 5 diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index 7af9e34ec261..2ceba3fe4ec1 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -39,8 +39,6 @@ #define __noretpoline __attribute__((__indirect_branch__("keep"))) #endif -#define __UNIQUE_ID(prefix) __PASTE(__PASTE(__UNIQUE_ID_, prefix), __COUNTER__) - #if defined(LATENT_ENTROPY_PLUGIN) && !defined(__CHECKER__) #define __latent_entropy __attribute__((latent_entropy)) #endif diff --git a/include/linux/compiler.h b/include/linux/compiler.h index d7779a18b24f..174099fdc485 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -177,10 +177,7 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, __asm__ ("" : "=r" (var) : "0" (var)) #endif -/* Not-quite-unique ID. */ -#ifndef __UNIQUE_ID -# define __UNIQUE_ID(prefix) __PASTE(__PASTE(__UNIQUE_ID_, prefix), __LINE__) -#endif +#define __UNIQUE_ID(prefix) __PASTE(__PASTE(__UNIQUE_ID_, prefix), __COUNTER__) /** * data_race - mark an expression as containing intentional data races -- cgit v1.2.3 From 33a9813825710fdc2b980d566ee391fd093a36c6 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Thu, 24 Aug 2023 16:31:42 +0200 Subject: introduce __next_thread(), fix next_tid() vs exec() race Patch series "introduce __next_thread(), change next_thread()". After commit dce8f8ed1de1 ("document while_each_thread(), change first_tid() to use for_each_thread()") + this series 1. We have only one lockless user of next_thread(), task_group_seq_get_next(). I think it should be changed too. 2. We have only one user of task_struct->thread_group, thread_group_empty(). The next patches will change thread_group_empty() and kill ->thread_group. This patch (of 2): next_tid(start) does: rcu_read_lock(); if (pid_alive(start)) { pos = next_thread(start); if (thread_group_leader(pos)) pos = NULL; else get_task_struct(pos); it should return pos = NULL when next_thread() wraps to the 1st thread in the thread group, group leader, and the thread_group_leader() check tries to detect this case. But this can race with exec. To simplify, suppose we have a main thread M and a single sub-thread T, next_tid(T) should return NULL. Now suppose that T execs. If next_tid(T) is called after T changes the leadership and before it does release_task() which removes the old leader from list, then next_thread() returns M and thread_group_leader(M) = F. Lockless use of next_thread() should be avoided. After this change only task_group_seq_get_next() does this, and I believe it should be changed as well. Link: https://lkml.kernel.org/r/20230824143112.GA31208@redhat.com Link: https://lkml.kernel.org/r/20230824143142.GA31222@redhat.com Signed-off-by: Oleg Nesterov Cc: Eric W. Biederman Cc: Peter Zijlstra Signed-off-by: Andrew Morton --- fs/proc/base.c | 6 ++---- include/linux/sched/signal.h | 11 +++++++++++ 2 files changed, 13 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/fs/proc/base.c b/fs/proc/base.c index ffd54617c354..c0e971cc6d41 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -3840,10 +3840,8 @@ static struct task_struct *next_tid(struct task_struct *start) struct task_struct *pos = NULL; rcu_read_lock(); if (pid_alive(start)) { - pos = next_thread(start); - if (thread_group_leader(pos)) - pos = NULL; - else + pos = __next_thread(start); + if (pos) get_task_struct(pos); } rcu_read_unlock(); diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h index 0014d3adaf84..7fb34b8cda54 100644 --- a/include/linux/sched/signal.h +++ b/include/linux/sched/signal.h @@ -715,6 +715,17 @@ bool same_thread_group(struct task_struct *p1, struct task_struct *p2) return p1->signal == p2->signal; } +/* + * returns NULL if p is the last thread in the thread group + */ +static inline struct task_struct *__next_thread(struct task_struct *p) +{ + return list_next_or_null_rcu(&p->signal->thread_head, + &p->thread_node, + struct task_struct, + thread_node); +} + static inline struct task_struct *next_thread(const struct task_struct *p) { return list_entry_rcu(p->thread_group.next, -- cgit v1.2.3 From d639cf4abb4d171ab2456904da5668c42b5c1937 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Thu, 24 Aug 2023 16:32:01 +0200 Subject: change next_thread() to use __next_thread() ?: group_leader This relies on fact that group leader is always the 1st entry in the signal->thread_head list. With or without this change, if the lockless next_thread(last_thread) races with exec it can return the old or the new leader. We are almost ready to kill task->thread_group, after this change its only user is thread_group_empty(). Link: https://lkml.kernel.org/r/20230824143201.GB31222@redhat.com Signed-off-by: Oleg Nesterov Cc: Eric W. Biederman Cc: Peter Zijlstra Signed-off-by: Andrew Morton --- include/linux/sched/signal.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h index 7fb34b8cda54..cffc882d367f 100644 --- a/include/linux/sched/signal.h +++ b/include/linux/sched/signal.h @@ -726,10 +726,9 @@ static inline struct task_struct *__next_thread(struct task_struct *p) thread_node); } -static inline struct task_struct *next_thread(const struct task_struct *p) +static inline struct task_struct *next_thread(struct task_struct *p) { - return list_entry_rcu(p->thread_group.next, - struct task_struct, thread_group); + return __next_thread(p) ?: p->group_leader; } static inline int thread_group_empty(struct task_struct *p) -- cgit v1.2.3 From e34a35ee1f52312af130b5ebd42fa28313fc6149 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sat, 26 Aug 2023 13:14:06 +0200 Subject: change thread_group_empty() to use task_struct->thread_node Patch series "kill task_struct->thread_group". This patch (of 2): It could use list_is_singular() but this way it is cheaper. Plus the thread_group_leader() check makes it clear that thread_group_empty() can only return true if p is a group leader. This was not immediately obvious before this patch. task_struct->thread_group no longer has users, it can die. Link: https://lkml.kernel.org/r/20230826111200.GA22982@redhat.com Link: https://lkml.kernel.org/r/20230826111406.GA23238@redhat.com Signed-off-by: Oleg Nesterov Cc: Eric W. Biederman Cc: Peter Zijlstra Signed-off-by: Andrew Morton --- include/linux/sched/signal.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h index cffc882d367f..d7fa3ca2fa53 100644 --- a/include/linux/sched/signal.h +++ b/include/linux/sched/signal.h @@ -733,7 +733,8 @@ static inline struct task_struct *next_thread(struct task_struct *p) static inline int thread_group_empty(struct task_struct *p) { - return list_empty(&p->thread_group); + return thread_group_leader(p) && + list_is_last(&p->thread_node, &p->signal->thread_head); } #define delay_group_leader(p) \ -- cgit v1.2.3 From 8e1f385104ac044f1552686ad6e1cbc71cc05a30 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sat, 26 Aug 2023 13:14:09 +0200 Subject: kill task_struct->thread_group The last user was removed by the previous patch. Link: https://lkml.kernel.org/r/20230826111409.GA23243@redhat.com Signed-off-by: Oleg Nesterov Cc: Eric W. Biederman Cc: Peter Zijlstra Signed-off-by: Andrew Morton --- include/linux/sched.h | 1 - init/init_task.c | 1 - kernel/exit.c | 1 - kernel/fork.c | 3 --- 4 files changed, 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 77f01ac385f7..6d1341b1673f 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1002,7 +1002,6 @@ struct task_struct { /* PID/PID hash table linkage. */ struct pid *thread_pid; struct hlist_node pid_links[PIDTYPE_MAX]; - struct list_head thread_group; struct list_head thread_node; struct completion *vfork_done; diff --git a/init/init_task.c b/init/init_task.c index ff6c4b9bfe6b..c0de0200fd56 100644 --- a/init/init_task.c +++ b/init/init_task.c @@ -132,7 +132,6 @@ struct task_struct init_task .pi_lock = __RAW_SPIN_LOCK_UNLOCKED(init_task.pi_lock), .timer_slack_ns = 50000, /* 50 usec default slack */ .thread_pid = &init_struct_pid, - .thread_group = LIST_HEAD_INIT(init_task.thread_group), .thread_node = LIST_HEAD_INIT(init_signals.thread_head), #ifdef CONFIG_AUDIT .loginuid = INVALID_UID, diff --git a/kernel/exit.c b/kernel/exit.c index edb50b4c9972..f3ba4b97a7d9 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -133,7 +133,6 @@ static void __unhash_process(struct task_struct *p, bool group_dead) list_del_init(&p->sibling); __this_cpu_dec(process_counts); } - list_del_rcu(&p->thread_group); list_del_rcu(&p->thread_node); } diff --git a/kernel/fork.c b/kernel/fork.c index 3b6d20dfb9a8..b9d3aa493bbd 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -2576,7 +2576,6 @@ __latent_entropy struct task_struct *copy_process( p->dirty_paused_when = 0; p->pdeath_signal = 0; - INIT_LIST_HEAD(&p->thread_group); p->task_works = NULL; clear_posix_cputimers_work(p); @@ -2704,8 +2703,6 @@ __latent_entropy struct task_struct *copy_process( atomic_inc(¤t->signal->live); refcount_inc(¤t->signal->sigcnt); task_join_group_stop(p); - list_add_tail_rcu(&p->thread_group, - &p->group_leader->thread_group); list_add_tail_rcu(&p->thread_node, &p->signal->thread_head); } -- cgit v1.2.3 From 9cba82bba500e3ce875381350f289cfb3aa633ba Mon Sep 17 00:00:00 2001 From: Xingui Yang Date: Tue, 5 Sep 2023 02:48:33 +0000 Subject: seq_file: add helper macro to define attribute for rw file Patch series "Add helper macro DEFINE_SHOW_STORE_ATTRIBUTE() at seq_file.c", v6. We already own DEFINE_SHOW_ATTRIBUTE() helper macro for defining attribute for read-only file, but we found many of drivers also want a helper macro for read-write file too. So we add this helper macro to reduce duplicated code. This patch (of 3): We already own DEFINE_SHOW_ATTRIBUTE() helper macro for defining attribute for read-only file, but many of drivers want a helper macro for read-write file too. So we add DEFINE_SHOW_STORE_ATTRIBUTE() helper to reduce duplicated code. Link: https://lkml.kernel.org/r/20230905024835.43219-1-yangxingui@huawei.com Link: https://lkml.kernel.org/r/20230905024835.43219-2-yangxingui@huawei.com Signed-off-by: Luo Jiaxing Co-developed-by: Xingui Yang Signed-off-by: Xingui Yang Reviewed-by: Andy Shevchenko Cc: Al Viro Cc: Animesh Manna Cc: Anshuman Gupta Cc: Damien Le Moal Cc: Felipe Balbi Cc: Greg Kroah-Hartman Cc: Himanshu Madhani Cc: James Bottomley Cc: John Garry Cc: Martin K. Petersen Cc: Uma Shankar Cc: Xiang Chen Cc: Zeng Tao Signed-off-by: Andrew Morton --- include/linux/seq_file.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/linux') diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h index 386ab580b839..234bcdb1fba4 100644 --- a/include/linux/seq_file.h +++ b/include/linux/seq_file.h @@ -207,6 +207,21 @@ static const struct file_operations __name ## _fops = { \ .release = single_release, \ } +#define DEFINE_SHOW_STORE_ATTRIBUTE(__name) \ +static int __name ## _open(struct inode *inode, struct file *file) \ +{ \ + return single_open(file, __name ## _show, inode->i_private); \ +} \ + \ +static const struct file_operations __name ## _fops = { \ + .owner = THIS_MODULE, \ + .open = __name ## _open, \ + .read = seq_read, \ + .write = __name ## _write, \ + .llseek = seq_lseek, \ + .release = single_release, \ +} + #define DEFINE_PROC_SHOW_ATTRIBUTE(__name) \ static int __name ## _open(struct inode *inode, struct file *file) \ { \ -- cgit v1.2.3 From 6309727ef27162deabd5c095c11af24970fba5a2 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Fri, 8 Sep 2023 01:40:48 +0200 Subject: kthread: add kthread_stop_put Add a kthread_stop_put() helper that stops a thread and puts its task struct. Use it to replace the various instances of kthread_stop() followed by put_task_struct(). Remove the kthread_stop_put() macro in usbip that is similar but doesn't return the result of kthread_stop(). [agruenba@redhat.com: fix kerneldoc comment] Link: https://lkml.kernel.org/r/20230911111730.2565537-1-agruenba@redhat.com [akpm@linux-foundation.org: document kthread_stop_put()'s argument] Link: https://lkml.kernel.org/r/20230907234048.2499820-1-agruenba@redhat.com Signed-off-by: Andreas Gruenbacher Signed-off-by: Andrew Morton --- drivers/accel/ivpu/ivpu_job.c | 3 +-- drivers/dma-buf/st-dma-fence-chain.c | 12 ++++-------- drivers/dma-buf/st-dma-fence.c | 4 +--- drivers/gpu/drm/i915/gt/selftest_migrate.c | 4 +--- drivers/net/xen-netback/interface.c | 3 +-- drivers/usb/usbip/usbip_common.h | 6 ------ fs/gfs2/ops_fstype.c | 9 +++------ include/linux/kthread.h | 1 + kernel/irq/manage.c | 15 +++++---------- kernel/kthread.c | 18 ++++++++++++++++++ kernel/smpboot.c | 3 +-- mm/damon/core.c | 3 +-- net/core/pktgen.c | 3 +-- 13 files changed, 38 insertions(+), 46 deletions(-) (limited to 'include/linux') diff --git a/drivers/accel/ivpu/ivpu_job.c b/drivers/accel/ivpu/ivpu_job.c index de9e69f70af7..76f468c9f761 100644 --- a/drivers/accel/ivpu/ivpu_job.c +++ b/drivers/accel/ivpu/ivpu_job.c @@ -618,6 +618,5 @@ int ivpu_job_done_thread_init(struct ivpu_device *vdev) void ivpu_job_done_thread_fini(struct ivpu_device *vdev) { - kthread_stop(vdev->job_done_thread); - put_task_struct(vdev->job_done_thread); + kthread_stop_put(vdev->job_done_thread); } diff --git a/drivers/dma-buf/st-dma-fence-chain.c b/drivers/dma-buf/st-dma-fence-chain.c index c0979c8049b5..9c2a0c082a76 100644 --- a/drivers/dma-buf/st-dma-fence-chain.c +++ b/drivers/dma-buf/st-dma-fence-chain.c @@ -476,10 +476,9 @@ static int find_race(void *arg) for (i = 0; i < ncpus; i++) { int ret; - ret = kthread_stop(threads[i]); + ret = kthread_stop_put(threads[i]); if (ret && !err) err = ret; - put_task_struct(threads[i]); } kfree(threads); @@ -591,8 +590,7 @@ static int wait_forward(void *arg) for (i = 0; i < fc.chain_length; i++) dma_fence_signal(fc.fences[i]); - err = kthread_stop(tsk); - put_task_struct(tsk); + err = kthread_stop_put(tsk); err: fence_chains_fini(&fc); @@ -621,8 +619,7 @@ static int wait_backward(void *arg) for (i = fc.chain_length; i--; ) dma_fence_signal(fc.fences[i]); - err = kthread_stop(tsk); - put_task_struct(tsk); + err = kthread_stop_put(tsk); err: fence_chains_fini(&fc); @@ -669,8 +666,7 @@ static int wait_random(void *arg) for (i = 0; i < fc.chain_length; i++) dma_fence_signal(fc.fences[i]); - err = kthread_stop(tsk); - put_task_struct(tsk); + err = kthread_stop_put(tsk); err: fence_chains_fini(&fc); diff --git a/drivers/dma-buf/st-dma-fence.c b/drivers/dma-buf/st-dma-fence.c index fb6e0a6ae2c9..b7c6f7ea9e0c 100644 --- a/drivers/dma-buf/st-dma-fence.c +++ b/drivers/dma-buf/st-dma-fence.c @@ -548,11 +548,9 @@ static int race_signal_callback(void *arg) for (i = 0; i < ARRAY_SIZE(t); i++) { int err; - err = kthread_stop(t[i].task); + err = kthread_stop_put(t[i].task); if (err && !ret) ret = err; - - put_task_struct(t[i].task); } } diff --git a/drivers/gpu/drm/i915/gt/selftest_migrate.c b/drivers/gpu/drm/i915/gt/selftest_migrate.c index 3def5ca72dec..0fb07f073baa 100644 --- a/drivers/gpu/drm/i915/gt/selftest_migrate.c +++ b/drivers/gpu/drm/i915/gt/selftest_migrate.c @@ -719,11 +719,9 @@ static int threaded_migrate(struct intel_migrate *migrate, if (IS_ERR_OR_NULL(tsk)) continue; - status = kthread_stop(tsk); + status = kthread_stop_put(tsk); if (status && !err) err = status; - - put_task_struct(tsk); } kfree(thread); diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c index f3f2c07423a6..33c8143619f0 100644 --- a/drivers/net/xen-netback/interface.c +++ b/drivers/net/xen-netback/interface.c @@ -672,8 +672,7 @@ err: static void xenvif_disconnect_queue(struct xenvif_queue *queue) { if (queue->task) { - kthread_stop(queue->task); - put_task_struct(queue->task); + kthread_stop_put(queue->task); queue->task = NULL; } diff --git a/drivers/usb/usbip/usbip_common.h b/drivers/usb/usbip/usbip_common.h index d8cbd2dfc2c2..282efca64a01 100644 --- a/drivers/usb/usbip/usbip_common.h +++ b/drivers/usb/usbip/usbip_common.h @@ -298,12 +298,6 @@ struct usbip_device { __k; \ }) -#define kthread_stop_put(k) \ - do { \ - kthread_stop(k); \ - put_task_struct(k); \ - } while (0) - /* usbip_common.c */ void usbip_dump_urb(struct urb *purb); void usbip_dump_header(struct usbip_header *pdu); diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 33ca04733e93..ecf789b7168c 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -1126,8 +1126,7 @@ static int init_threads(struct gfs2_sbd *sdp) return 0; fail: - kthread_stop(sdp->sd_logd_process); - put_task_struct(sdp->sd_logd_process); + kthread_stop_put(sdp->sd_logd_process); sdp->sd_logd_process = NULL; return error; } @@ -1135,13 +1134,11 @@ fail: void gfs2_destroy_threads(struct gfs2_sbd *sdp) { if (sdp->sd_logd_process) { - kthread_stop(sdp->sd_logd_process); - put_task_struct(sdp->sd_logd_process); + kthread_stop_put(sdp->sd_logd_process); sdp->sd_logd_process = NULL; } if (sdp->sd_quotad_process) { - kthread_stop(sdp->sd_quotad_process); - put_task_struct(sdp->sd_quotad_process); + kthread_stop_put(sdp->sd_quotad_process); sdp->sd_quotad_process = NULL; } } diff --git a/include/linux/kthread.h b/include/linux/kthread.h index 2c30ade43bc8..b11f53c1ba2e 100644 --- a/include/linux/kthread.h +++ b/include/linux/kthread.h @@ -86,6 +86,7 @@ void free_kthread_struct(struct task_struct *k); void kthread_bind(struct task_struct *k, unsigned int cpu); void kthread_bind_mask(struct task_struct *k, const struct cpumask *mask); int kthread_stop(struct task_struct *k); +int kthread_stop_put(struct task_struct *k); bool kthread_should_stop(void); bool kthread_should_park(void); bool kthread_should_stop_or_park(void); diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index d309ba84e08a..1782f90cd8c6 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -1852,15 +1852,13 @@ out_thread: struct task_struct *t = new->thread; new->thread = NULL; - kthread_stop(t); - put_task_struct(t); + kthread_stop_put(t); } if (new->secondary && new->secondary->thread) { struct task_struct *t = new->secondary->thread; new->secondary->thread = NULL; - kthread_stop(t); - put_task_struct(t); + kthread_stop_put(t); } out_mput: module_put(desc->owner); @@ -1971,12 +1969,9 @@ static struct irqaction *__free_irq(struct irq_desc *desc, void *dev_id) * the same bit to a newly requested action. */ if (action->thread) { - kthread_stop(action->thread); - put_task_struct(action->thread); - if (action->secondary && action->secondary->thread) { - kthread_stop(action->secondary->thread); - put_task_struct(action->secondary->thread); - } + kthread_stop_put(action->thread); + if (action->secondary && action->secondary->thread) + kthread_stop_put(action->secondary->thread); } /* Last action releases resources */ diff --git a/kernel/kthread.c b/kernel/kthread.c index 1eea53050bab..290cbc845225 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -715,6 +715,24 @@ int kthread_stop(struct task_struct *k) } EXPORT_SYMBOL(kthread_stop); +/** + * kthread_stop_put - stop a thread and put its task struct + * @k: thread created by kthread_create(). + * + * Stops a thread created by kthread_create() and put its task_struct. + * Only use when holding an extra task struct reference obtained by + * calling get_task_struct(). + */ +int kthread_stop_put(struct task_struct *k) +{ + int ret; + + ret = kthread_stop(k); + put_task_struct(k); + return ret; +} +EXPORT_SYMBOL(kthread_stop_put); + int kthreadd(void *unused) { struct task_struct *tsk = current; diff --git a/kernel/smpboot.c b/kernel/smpboot.c index f47d8f375946..1992b62e980b 100644 --- a/kernel/smpboot.c +++ b/kernel/smpboot.c @@ -272,8 +272,7 @@ static void smpboot_destroy_threads(struct smp_hotplug_thread *ht) struct task_struct *tsk = *per_cpu_ptr(ht->store, cpu); if (tsk) { - kthread_stop(tsk); - put_task_struct(tsk); + kthread_stop_put(tsk); *per_cpu_ptr(ht->store, cpu) = NULL; } } diff --git a/mm/damon/core.c b/mm/damon/core.c index bcd2bd9d6c10..2f54f153d7f5 100644 --- a/mm/damon/core.c +++ b/mm/damon/core.c @@ -699,8 +699,7 @@ static int __damon_stop(struct damon_ctx *ctx) if (tsk) { get_task_struct(tsk); mutex_unlock(&ctx->kdamond_lock); - kthread_stop(tsk); - put_task_struct(tsk); + kthread_stop_put(tsk); return 0; } mutex_unlock(&ctx->kdamond_lock); diff --git a/net/core/pktgen.c b/net/core/pktgen.c index f56b8d697014..826250a0f5b1 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -3982,8 +3982,7 @@ static void __net_exit pg_net_exit(struct net *net) list_for_each_safe(q, n, &list) { t = list_entry(q, struct pktgen_thread, th_list); list_del(&t->th_list); - kthread_stop(t->tsk); - put_task_struct(t->tsk); + kthread_stop_put(t->tsk); kfree(t); } -- cgit v1.2.3 From 5e57418a2031cd5e1863efdf3d7447a16a368172 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 11 Sep 2023 18:49:13 +0300 Subject: minmax: deduplicate __unconst_integer_typeof() It appears that compiler_types.h already have an implementation of the __unconst_integer_typeof() called __unqual_scalar_typeof(). Use it instead of the copy. Link: https://lkml.kernel.org/r/20230911154913.4176033-1-andriy.shevchenko@linux.intel.com Signed-off-by: Andy Shevchenko Acked-by: Herve Codina Signed-off-by: Andrew Morton --- include/linux/minmax.h | 26 +++----------------------- 1 file changed, 3 insertions(+), 23 deletions(-) (limited to 'include/linux') diff --git a/include/linux/minmax.h b/include/linux/minmax.h index 83aebc244cba..69bbe987fa87 100644 --- a/include/linux/minmax.h +++ b/include/linux/minmax.h @@ -2,6 +2,7 @@ #ifndef _LINUX_MINMAX_H #define _LINUX_MINMAX_H +#include #include #include @@ -134,27 +135,6 @@ */ #define max_t(type, x, y) __careful_cmp((type)(x), (type)(y), >) -/* - * Remove a const qualifier from integer types - * _Generic(foo, type-name: association, ..., default: association) performs a - * comparison against the foo type (not the qualified type). - * Do not use the const keyword in the type-name as it will not match the - * unqualified type of foo. - */ -#define __unconst_integer_type_cases(type) \ - unsigned type: (unsigned type)0, \ - signed type: (signed type)0 - -#define __unconst_integer_typeof(x) typeof( \ - _Generic((x), \ - char: (char)0, \ - __unconst_integer_type_cases(char), \ - __unconst_integer_type_cases(short), \ - __unconst_integer_type_cases(int), \ - __unconst_integer_type_cases(long), \ - __unconst_integer_type_cases(long long), \ - default: (x))) - /* * Do not check the array parameter using __must_be_array(). * In the following legit use-case where the "array" passed is a simple pointer, @@ -169,13 +149,13 @@ * 'int *buff' and 'int buff[N]' types. * * The array can be an array of const items. - * typeof() keeps the const qualifier. Use __unconst_integer_typeof() in order + * typeof() keeps the const qualifier. Use __unqual_scalar_typeof() in order * to discard the const qualifier for the __element variable. */ #define __minmax_array(op, array, len) ({ \ typeof(&(array)[0]) __array = (array); \ typeof(len) __len = (len); \ - __unconst_integer_typeof(__array[0]) __element = __array[--__len]; \ + __unqual_scalar_typeof(__array[0]) __element = __array[--__len];\ while (__len--) \ __element = op(__element, __array[__len]); \ __element; }) -- cgit v1.2.3 From f6e9d38f8eb00ac8b52e6d15f6aa9bcecacb081b Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 12 Sep 2023 12:23:55 +0300 Subject: minmax: fix header inclusions BUILD_BUG_ON*() macros are defined in build_bug.h. Include it. Replace compiler_types.h by compiler.h, which provides the former, to have a definition of the __UNIQUE_ID(). Link: https://lkml.kernel.org/r/20230912092355.79280-1-andriy.shevchenko@linux.intel.com Signed-off-by: Andy Shevchenko Reviewed-by: Herve Codina Cc: Rasmus Villemoes Signed-off-by: Andrew Morton --- include/linux/minmax.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/minmax.h b/include/linux/minmax.h index 69bbe987fa87..ca69abd6151e 100644 --- a/include/linux/minmax.h +++ b/include/linux/minmax.h @@ -2,7 +2,8 @@ #ifndef _LINUX_MINMAX_H #define _LINUX_MINMAX_H -#include +#include +#include #include #include -- cgit v1.2.3 From a9e1a3d84e4a0ea560ed4d84c28d06dbfdffed22 Mon Sep 17 00:00:00 2001 From: Baoquan He Date: Thu, 14 Sep 2023 11:31:35 +0800 Subject: crash_core: change the prototype of function parse_crashkernel() Add two parameters 'low_size' and 'high' to function parse_crashkernel(), later crashkernel=,high|low parsing will be added. Make adjustments in all call sites of parse_crashkernel() in arch. Link: https://lkml.kernel.org/r/20230914033142.676708-3-bhe@redhat.com Signed-off-by: Baoquan He Reviewed-by: Zhen Lei Cc: Catalin Marinas Cc: Chen Jiahao Signed-off-by: Andrew Morton --- arch/arm/kernel/setup.c | 3 ++- arch/arm64/mm/init.c | 2 +- arch/ia64/kernel/setup.c | 2 +- arch/loongarch/kernel/setup.c | 4 +++- arch/mips/kernel/setup.c | 3 ++- arch/powerpc/kernel/fadump.c | 2 +- arch/powerpc/kexec/core.c | 2 +- arch/powerpc/mm/nohash/kaslr_booke.c | 2 +- arch/riscv/mm/init.c | 2 +- arch/s390/kernel/setup.c | 4 ++-- arch/sh/kernel/machine_kexec.c | 2 +- arch/x86/kernel/setup.c | 3 ++- include/linux/crash_core.h | 3 ++- kernel/crash_core.c | 15 ++++++++++++--- 14 files changed, 32 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index c66b560562b3..e2bb7afd0683 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -1010,7 +1010,8 @@ static void __init reserve_crashkernel(void) total_mem = get_total_mem(); ret = parse_crashkernel(boot_command_line, total_mem, - &crash_size, &crash_base); + &crash_size, &crash_base, + NULL, NULL); /* invalid value specified or crashkernel=0 */ if (ret || !crash_size) return; diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 8a0f8604348b..801c59c39a8f 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -142,7 +142,7 @@ static void __init reserve_crashkernel(void) /* crashkernel=X[@offset] */ ret = parse_crashkernel(cmdline, memblock_phys_mem_size(), - &crash_size, &crash_base); + &crash_size, &crash_base, NULL, NULL); if (ret == -ENOENT) { ret = parse_crashkernel_high(cmdline, 0, &crash_size, &crash_base); if (ret || !crash_size) diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c index 5a55ac82c13a..4faea2d2cf07 100644 --- a/arch/ia64/kernel/setup.c +++ b/arch/ia64/kernel/setup.c @@ -277,7 +277,7 @@ static void __init setup_crashkernel(unsigned long total, int *n) int ret; ret = parse_crashkernel(boot_command_line, total, - &size, &base); + &size, &base, NULL, NULL); if (ret == 0 && size > 0) { if (!base) { sort_regions(rsvd_region, *n); diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c index 7783f0a3d742..4de32b07c0dc 100644 --- a/arch/loongarch/kernel/setup.c +++ b/arch/loongarch/kernel/setup.c @@ -267,7 +267,9 @@ static void __init arch_parse_crashkernel(void) unsigned long long crash_base, crash_size; total_mem = memblock_phys_mem_size(); - ret = parse_crashkernel(boot_command_line, total_mem, &crash_size, &crash_base); + ret = parse_crashkernel(boot_command_line, total_mem, + &crash_size, &crash_base, + NULL, NULL); if (ret < 0 || crash_size <= 0) return; diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c index cb871eb784a7..08321c945ac4 100644 --- a/arch/mips/kernel/setup.c +++ b/arch/mips/kernel/setup.c @@ -460,7 +460,8 @@ static void __init mips_parse_crashkernel(void) total_mem = memblock_phys_mem_size(); ret = parse_crashkernel(boot_command_line, total_mem, - &crash_size, &crash_base); + &crash_size, &crash_base, + NULL, NULL); if (ret != 0 || crash_size <= 0) return; diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c index 3ff2da7b120b..d14eda1e8589 100644 --- a/arch/powerpc/kernel/fadump.c +++ b/arch/powerpc/kernel/fadump.c @@ -313,7 +313,7 @@ static __init u64 fadump_calculate_reserve_size(void) * memory at a predefined offset. */ ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(), - &size, &base); + &size, &base, NULL, NULL); if (ret == 0 && size > 0) { unsigned long max_size; diff --git a/arch/powerpc/kexec/core.c b/arch/powerpc/kexec/core.c index de64c7962991..9346c960b296 100644 --- a/arch/powerpc/kexec/core.c +++ b/arch/powerpc/kexec/core.c @@ -109,7 +109,7 @@ void __init reserve_crashkernel(void) total_mem_sz = memory_limit ? memory_limit : memblock_phys_mem_size(); /* use common parsing */ ret = parse_crashkernel(boot_command_line, total_mem_sz, - &crash_size, &crash_base); + &crash_size, &crash_base, NULL, NULL); if (ret == 0 && crash_size > 0) { crashk_res.start = crash_base; crashk_res.end = crash_base + crash_size - 1; diff --git a/arch/powerpc/mm/nohash/kaslr_booke.c b/arch/powerpc/mm/nohash/kaslr_booke.c index 2fb3edafe9ab..b4f2786a7d2b 100644 --- a/arch/powerpc/mm/nohash/kaslr_booke.c +++ b/arch/powerpc/mm/nohash/kaslr_booke.c @@ -178,7 +178,7 @@ static void __init get_crash_kernel(void *fdt, unsigned long size) int ret; ret = parse_crashkernel(boot_command_line, size, &crash_size, - &crash_base); + &crash_base, NULL, NULL); if (ret != 0 || crash_size == 0) return; if (crash_base == 0) diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index 0798bd861dcb..9fe448900059 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -1388,7 +1388,7 @@ static void __init reserve_crashkernel(void) } ret = parse_crashkernel(cmdline, memblock_phys_mem_size(), - &crash_size, &crash_base); + &crash_size, &crash_base, NULL, NULL); if (ret == -ENOENT) { /* Fallback to crashkernel=X,[high,low] */ ret = parse_crashkernel_high(cmdline, 0, &crash_size, &crash_base); diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index de6ad0fb2328..e555b576d3c8 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -625,8 +625,8 @@ static void __init reserve_crashkernel(void) phys_addr_t low, high; int rc; - rc = parse_crashkernel(boot_command_line, ident_map_size, &crash_size, - &crash_base); + rc = parse_crashkernel(boot_command_line, ident_map_size, + &crash_size, &crash_base, NULL, NULL); crash_base = ALIGN(crash_base, KEXEC_CRASH_MEM_ALIGN); crash_size = ALIGN(crash_size, KEXEC_CRASH_MEM_ALIGN); diff --git a/arch/sh/kernel/machine_kexec.c b/arch/sh/kernel/machine_kexec.c index 223c14f44af7..fa3a7b36190a 100644 --- a/arch/sh/kernel/machine_kexec.c +++ b/arch/sh/kernel/machine_kexec.c @@ -154,7 +154,7 @@ void __init reserve_crashkernel(void) int ret; ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(), - &crash_size, &crash_base); + &crash_size, &crash_base, NULL, NULL); if (ret == 0 && crash_size > 0) { crashk_res.start = crash_base; crashk_res.end = crash_base + crash_size - 1; diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index b098b1fa2470..655c04812905 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -553,7 +553,8 @@ static void __init reserve_crashkernel(void) total_mem = memblock_phys_mem_size(); /* crashkernel=XM */ - ret = parse_crashkernel(boot_command_line, total_mem, &crash_size, &crash_base); + ret = parse_crashkernel(boot_command_line, total_mem, + &crash_size, &crash_base, NULL, NULL); if (ret != 0 || crash_size <= 0) { /* crashkernel=X,high */ ret = parse_crashkernel_high(boot_command_line, total_mem, diff --git a/include/linux/crash_core.h b/include/linux/crash_core.h index 0c06561bf5ff..6156355ef831 100644 --- a/include/linux/crash_core.h +++ b/include/linux/crash_core.h @@ -80,7 +80,8 @@ Elf_Word *append_elf_note(Elf_Word *buf, char *name, unsigned int type, void final_note(Elf_Word *buf); int __init parse_crashkernel(char *cmdline, unsigned long long system_ram, - unsigned long long *crash_size, unsigned long long *crash_base); + unsigned long long *crash_size, unsigned long long *crash_base, + unsigned long long *low_size, bool *high); int parse_crashkernel_high(char *cmdline, unsigned long long system_ram, unsigned long long *crash_size, unsigned long long *crash_base); int parse_crashkernel_low(char *cmdline, unsigned long long system_ram, diff --git a/kernel/crash_core.c b/kernel/crash_core.c index 507113932aa9..33ced5b5ed4e 100644 --- a/kernel/crash_core.c +++ b/kernel/crash_core.c @@ -287,10 +287,19 @@ static int __init __parse_crashkernel(char *cmdline, int __init parse_crashkernel(char *cmdline, unsigned long long system_ram, unsigned long long *crash_size, - unsigned long long *crash_base) + unsigned long long *crash_base, + unsigned long long *low_size, + bool *high) { - return __parse_crashkernel(cmdline, system_ram, crash_size, crash_base, - NULL); + int ret; + + /* crashkernel=X[@offset] */ + ret = __parse_crashkernel(cmdline, system_ram, crash_size, + crash_base, NULL); + if (!high) + return ret; + + return 0; } int __init parse_crashkernel_high(char *cmdline, -- cgit v1.2.3 From 70916e9c8d9f1a286c99727072b22e395097909f Mon Sep 17 00:00:00 2001 From: Baoquan He Date: Thu, 14 Sep 2023 11:31:36 +0800 Subject: crash_core: change parse_crashkernel() to support crashkernel=,high|low parsing Now parse_crashkernel() is a real entry point for all kinds of crahskernel parsing on any architecture. And wrap the crahskernel=,high|low handling inside CONFIG_ARCH_HAS_GENERIC_CRASHKERNEL_RESERVATION ifdeffery scope. Link: https://lkml.kernel.org/r/20230914033142.676708-4-bhe@redhat.com Signed-off-by: Baoquan He Reviewed-by: Zhen Lei Cc: Catalin Marinas Cc: Chen Jiahao Signed-off-by: Andrew Morton --- include/linux/crash_core.h | 6 ++++++ kernel/crash_core.c | 36 +++++++++++++++++++++++++++++++++--- 2 files changed, 39 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/crash_core.h b/include/linux/crash_core.h index 6156355ef831..d8050a7eab01 100644 --- a/include/linux/crash_core.h +++ b/include/linux/crash_core.h @@ -79,6 +79,12 @@ Elf_Word *append_elf_note(Elf_Word *buf, char *name, unsigned int type, void *data, size_t data_len); void final_note(Elf_Word *buf); +#ifdef CONFIG_ARCH_HAS_GENERIC_CRASHKERNEL_RESERVATION +#ifndef DEFAULT_CRASH_KERNEL_LOW_SIZE +#define DEFAULT_CRASH_KERNEL_LOW_SIZE (128UL << 20) +#endif +#endif + int __init parse_crashkernel(char *cmdline, unsigned long long system_ram, unsigned long long *crash_size, unsigned long long *crash_base, unsigned long long *low_size, bool *high); diff --git a/kernel/crash_core.c b/kernel/crash_core.c index 33ced5b5ed4e..99a243540a35 100644 --- a/kernel/crash_core.c +++ b/kernel/crash_core.c @@ -283,6 +283,9 @@ static int __init __parse_crashkernel(char *cmdline, /* * That function is the entry point for command line parsing and should be * called from the arch-specific code. + * + * If crashkernel=,high|low is supported on architecture, non-NULL values + * should be passed to parameters 'low_size' and 'high'. */ int __init parse_crashkernel(char *cmdline, unsigned long long system_ram, @@ -296,10 +299,37 @@ int __init parse_crashkernel(char *cmdline, /* crashkernel=X[@offset] */ ret = __parse_crashkernel(cmdline, system_ram, crash_size, crash_base, NULL); - if (!high) - return ret; +#ifdef CONFIG_ARCH_HAS_GENERIC_CRASHKERNEL_RESERVATION + /* + * If non-NULL 'high' passed in and no normal crashkernel + * setting detected, try parsing crashkernel=,high|low. + */ + if (high && ret == -ENOENT) { + ret = __parse_crashkernel(cmdline, 0, crash_size, + crash_base, suffix_tbl[SUFFIX_HIGH]); + if (ret || !*crash_size) + return -EINVAL; - return 0; + /* + * crashkernel=Y,low can be specified or not, but invalid value + * is not allowed. + */ + ret = __parse_crashkernel(cmdline, 0, low_size, + crash_base, suffix_tbl[SUFFIX_LOW]); + if (ret == -ENOENT) { + *low_size = DEFAULT_CRASH_KERNEL_LOW_SIZE; + ret = 0; + } else if (ret) { + return ret; + } + + *high = true; + } +#endif + if (!*crash_size) + ret = -EINVAL; + + return ret; } int __init parse_crashkernel_high(char *cmdline, -- cgit v1.2.3 From 0ab97169aa0517079b22c2e64192906caa5dc6d5 Mon Sep 17 00:00:00 2001 From: Baoquan He Date: Thu, 14 Sep 2023 11:31:37 +0800 Subject: crash_core: add generic function to do reservation In architecture like x86_64, arm64 and riscv, they have vast virtual address space and usually have huge physical memory RAM. Their crashkernel reservation doesn't have to be limited under 4G RAM, but can be extended to the whole physical memory via crashkernel=,high support. Now add function reserve_crashkernel_generic() to reserve crashkernel memory if users specify any case of kernel pamameters, like crashkernel=xM[@offset] or crashkernel=,high|low. This is preparation to simplify code of crashkernel=,high support in architecutures. Link: https://lkml.kernel.org/r/20230914033142.676708-5-bhe@redhat.com Signed-off-by: Baoquan He Reviewed-by: Zhen Lei Cc: Catalin Marinas Cc: Chen Jiahao Signed-off-by: Andrew Morton --- include/linux/crash_core.h | 28 ++++++++++++ kernel/crash_core.c | 107 ++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 134 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/crash_core.h b/include/linux/crash_core.h index d8050a7eab01..4dbd6565e0ff 100644 --- a/include/linux/crash_core.h +++ b/include/linux/crash_core.h @@ -93,6 +93,34 @@ int parse_crashkernel_high(char *cmdline, unsigned long long system_ram, int parse_crashkernel_low(char *cmdline, unsigned long long system_ram, unsigned long long *crash_size, unsigned long long *crash_base); +#ifdef CONFIG_ARCH_HAS_GENERIC_CRASHKERNEL_RESERVATION +#ifndef DEFAULT_CRASH_KERNEL_LOW_SIZE +#define DEFAULT_CRASH_KERNEL_LOW_SIZE (128UL << 20) +#endif +#ifndef CRASH_ALIGN +#define CRASH_ALIGN SZ_2M +#endif +#ifndef CRASH_ADDR_LOW_MAX +#define CRASH_ADDR_LOW_MAX SZ_4G +#endif +#ifndef CRASH_ADDR_HIGH_MAX +#define CRASH_ADDR_HIGH_MAX memblock_end_of_DRAM() +#endif + +void __init reserve_crashkernel_generic(char *cmdline, + unsigned long long crash_size, + unsigned long long crash_base, + unsigned long long crash_low_size, + bool high); +#else +static inline void __init reserve_crashkernel_generic(char *cmdline, + unsigned long long crash_size, + unsigned long long crash_base, + unsigned long long crash_low_size, + bool high) +{} +#endif + /* Alignment required for elf header segment */ #define ELF_CORE_HEADER_ALIGN 4096 diff --git a/kernel/crash_core.c b/kernel/crash_core.c index 99a243540a35..72e358197b52 100644 --- a/kernel/crash_core.c +++ b/kernel/crash_core.c @@ -5,7 +5,6 @@ */ #include -#include #include #include #include @@ -13,6 +12,9 @@ #include #include #include +#include +#include +#include #include #include @@ -360,6 +362,109 @@ static int __init parse_crashkernel_dummy(char *arg) } early_param("crashkernel", parse_crashkernel_dummy); +#ifdef CONFIG_ARCH_HAS_GENERIC_CRASHKERNEL_RESERVATION +static int __init reserve_crashkernel_low(unsigned long long low_size) +{ +#ifdef CONFIG_64BIT + unsigned long long low_base; + + low_base = memblock_phys_alloc_range(low_size, CRASH_ALIGN, 0, CRASH_ADDR_LOW_MAX); + if (!low_base) { + pr_err("cannot allocate crashkernel low memory (size:0x%llx).\n", low_size); + return -ENOMEM; + } + + pr_info("crashkernel low memory reserved: 0x%08llx - 0x%08llx (%lld MB)\n", + low_base, low_base + low_size, low_size >> 20); + + crashk_low_res.start = low_base; + crashk_low_res.end = low_base + low_size - 1; + insert_resource(&iomem_resource, &crashk_low_res); +#endif + return 0; +} + +void __init reserve_crashkernel_generic(char *cmdline, + unsigned long long crash_size, + unsigned long long crash_base, + unsigned long long crash_low_size, + bool high) +{ + unsigned long long search_end = CRASH_ADDR_LOW_MAX, search_base = 0; + bool fixed_base = false; + + /* User specifies base address explicitly. */ + if (crash_base) { + fixed_base = true; + search_base = crash_base; + search_end = crash_base + crash_size; + } else if (high) { + search_base = CRASH_ADDR_LOW_MAX; + search_end = CRASH_ADDR_HIGH_MAX; + } + +retry: + crash_base = memblock_phys_alloc_range(crash_size, CRASH_ALIGN, + search_base, search_end); + if (!crash_base) { + /* + * For crashkernel=size[KMG]@offset[KMG], print out failure + * message if can't reserve the specified region. + */ + if (fixed_base) { + pr_warn("crashkernel reservation failed - memory is in use.\n"); + return; + } + + /* + * For crashkernel=size[KMG], if the first attempt was for + * low memory, fall back to high memory, the minimum required + * low memory will be reserved later. + */ + if (!high && search_end == CRASH_ADDR_LOW_MAX) { + search_end = CRASH_ADDR_HIGH_MAX; + search_base = CRASH_ADDR_LOW_MAX; + crash_low_size = DEFAULT_CRASH_KERNEL_LOW_SIZE; + goto retry; + } + + /* + * For crashkernel=size[KMG],high, if the first attempt was + * for high memory, fall back to low memory. + */ + if (high && search_end == CRASH_ADDR_HIGH_MAX) { + search_end = CRASH_ADDR_LOW_MAX; + search_base = 0; + goto retry; + } + pr_warn("cannot allocate crashkernel (size:0x%llx)\n", + crash_size); + return; + } + + if ((crash_base > CRASH_ADDR_LOW_MAX) && + crash_low_size && reserve_crashkernel_low(crash_low_size)) { + memblock_phys_free(crash_base, crash_size); + return; + } + + pr_info("crashkernel reserved: 0x%016llx - 0x%016llx (%lld MB)\n", + crash_base, crash_base + crash_size, crash_size >> 20); + + /* + * The crashkernel memory will be removed from the kernel linear + * map. Inform kmemleak so that it won't try to access it. + */ + kmemleak_ignore_phys(crash_base); + if (crashk_low_res.end) + kmemleak_ignore_phys(crashk_low_res.start); + + crashk_res.start = crash_base; + crashk_res.end = crash_base + crash_size - 1; + insert_resource(&iomem_resource, &crashk_res); +} +#endif + int crash_prepare_elf64_headers(struct crash_mem *mem, int need_kernel_map, void **addr, unsigned long *sz) { -- cgit v1.2.3 From b631b95dded5e7f007a3a79cbaf82ef50c1e2cf7 Mon Sep 17 00:00:00 2001 From: Baoquan He Date: Thu, 14 Sep 2023 11:31:38 +0800 Subject: crash_core: move crashk_*res definition into crash_core.c Both crashk_res and crashk_low_res are used to mark the reserved crashkernel regions in iomem_resource tree. And later the generic crashkernel resrvation will be added into crash_core.c. So move crashk_res and crashk_low_res definition into crash_core.c to avoid compiling error if CONFIG_CRASH_CORE=on while CONFIG_KEXEC_CORE is unset. Meanwhile include in if generic reservation is needed. In that case, need be added by ARCH. In asm/crash_core.h, ARCH can provide its own macro definitions to override macros in if needed. Wrap the including into CONFIG_ARCH_HAS_GENERIC_CRASHKERNEL_RESERVATION ifdeffery scope to avoid compiling error in other ARCH-es which don't take the generic reservation way yet. Link: https://lkml.kernel.org/r/20230914033142.676708-6-bhe@redhat.com Signed-off-by: Baoquan He Reviewed-by: Zhen Lei Cc: Catalin Marinas Cc: Chen Jiahao Signed-off-by: Andrew Morton --- include/linux/crash_core.h | 8 ++++++++ include/linux/kexec.h | 4 ---- kernel/crash_core.c | 16 ++++++++++++++++ kernel/kexec_core.c | 17 ----------------- 4 files changed, 24 insertions(+), 21 deletions(-) (limited to 'include/linux') diff --git a/include/linux/crash_core.h b/include/linux/crash_core.h index 4dbd6565e0ff..3c735a7e33fb 100644 --- a/include/linux/crash_core.h +++ b/include/linux/crash_core.h @@ -5,6 +5,14 @@ #include #include #include +#ifdef CONFIG_ARCH_HAS_GENERIC_CRASHKERNEL_RESERVATION +#include +#endif + +/* Location of a reserved region to hold the crash kernel. + */ +extern struct resource crashk_res; +extern struct resource crashk_low_res; #define CRASH_CORE_NOTE_NAME "CORE" #define CRASH_CORE_NOTE_HEAD_BYTES ALIGN(sizeof(struct elf_note), 4) diff --git a/include/linux/kexec.h b/include/linux/kexec.h index 32c78078552c..8227455192b7 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -22,10 +22,6 @@ #include #include -/* Location of a reserved region to hold the crash kernel. - */ -extern struct resource crashk_res; -extern struct resource crashk_low_res; extern note_buf_t __percpu *crash_notes; #ifdef CONFIG_KEXEC_CORE diff --git a/kernel/crash_core.c b/kernel/crash_core.c index 72e358197b52..fa8808c4f00e 100644 --- a/kernel/crash_core.c +++ b/kernel/crash_core.c @@ -35,6 +35,22 @@ u32 *vmcoreinfo_note; /* trusted vmcoreinfo, e.g. we can make a copy in the crash memory */ static unsigned char *vmcoreinfo_data_safecopy; +/* Location of the reserved area for the crash kernel */ +struct resource crashk_res = { + .name = "Crash kernel", + .start = 0, + .end = 0, + .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM, + .desc = IORES_DESC_CRASH_KERNEL +}; +struct resource crashk_low_res = { + .name = "Crash kernel", + .start = 0, + .end = 0, + .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM, + .desc = IORES_DESC_CRASH_KERNEL +}; + /* * parsing the "crashkernel" commandline * diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c index 9dc728982d79..be5642a4ec49 100644 --- a/kernel/kexec_core.c +++ b/kernel/kexec_core.c @@ -52,23 +52,6 @@ atomic_t __kexec_lock = ATOMIC_INIT(0); /* Flag to indicate we are going to kexec a new kernel */ bool kexec_in_progress = false; - -/* Location of the reserved area for the crash kernel */ -struct resource crashk_res = { - .name = "Crash kernel", - .start = 0, - .end = 0, - .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM, - .desc = IORES_DESC_CRASH_KERNEL -}; -struct resource crashk_low_res = { - .name = "Crash kernel", - .start = 0, - .end = 0, - .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM, - .desc = IORES_DESC_CRASH_KERNEL -}; - int kexec_should_crash(struct task_struct *p) { /* -- cgit v1.2.3 From c37e56cac3d62c69f093904afbc58fc428484d14 Mon Sep 17 00:00:00 2001 From: Baoquan He Date: Thu, 14 Sep 2023 11:31:42 +0800 Subject: crash_core.c: remove unneeded functions So far, nobody calls functions parse_crashkernel_high() and parse_crashkernel_low(), remove both of them. Link: https://lkml.kernel.org/r/20230914033142.676708-10-bhe@redhat.com Signed-off-by: Baoquan He Reviewed-by: Zhen Lei Cc: Catalin Marinas Cc: Chen Jiahao Signed-off-by: Andrew Morton --- include/linux/crash_core.h | 4 ---- kernel/crash_core.c | 18 ------------------ 2 files changed, 22 deletions(-) (limited to 'include/linux') diff --git a/include/linux/crash_core.h b/include/linux/crash_core.h index 3c735a7e33fb..3426f6eef60b 100644 --- a/include/linux/crash_core.h +++ b/include/linux/crash_core.h @@ -96,10 +96,6 @@ void final_note(Elf_Word *buf); int __init parse_crashkernel(char *cmdline, unsigned long long system_ram, unsigned long long *crash_size, unsigned long long *crash_base, unsigned long long *low_size, bool *high); -int parse_crashkernel_high(char *cmdline, unsigned long long system_ram, - unsigned long long *crash_size, unsigned long long *crash_base); -int parse_crashkernel_low(char *cmdline, unsigned long long system_ram, - unsigned long long *crash_size, unsigned long long *crash_base); #ifdef CONFIG_ARCH_HAS_GENERIC_CRASHKERNEL_RESERVATION #ifndef DEFAULT_CRASH_KERNEL_LOW_SIZE diff --git a/kernel/crash_core.c b/kernel/crash_core.c index fa8808c4f00e..efe87d501c8c 100644 --- a/kernel/crash_core.c +++ b/kernel/crash_core.c @@ -350,24 +350,6 @@ int __init parse_crashkernel(char *cmdline, return ret; } -int __init parse_crashkernel_high(char *cmdline, - unsigned long long system_ram, - unsigned long long *crash_size, - unsigned long long *crash_base) -{ - return __parse_crashkernel(cmdline, system_ram, crash_size, crash_base, - suffix_tbl[SUFFIX_HIGH]); -} - -int __init parse_crashkernel_low(char *cmdline, - unsigned long long system_ram, - unsigned long long *crash_size, - unsigned long long *crash_base) -{ - return __parse_crashkernel(cmdline, system_ram, crash_size, crash_base, - suffix_tbl[SUFFIX_LOW]); -} - /* * Add a dummy early_param handler to mark crashkernel= as a known command line * parameter and suppress incorrect warnings in init/main.c. -- cgit v1.2.3 From 2cb1f6e9a743af58a23cf14563b5eada1e0d3fde Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 8 Sep 2023 22:36:00 +0200 Subject: rcu: Conditionally build CPU-hotplug teardown callbacks Among the three CPU-hotplug teardown RCU callbacks, two of them early exit if CONFIG_HOTPLUG_CPU=n, and one is left unchanged. In any case all of them have an implementation when CONFIG_HOTPLUG_CPU=n. Align instead with the common way to deal with CPU-hotplug teardown callbacks and provide a proper stub when they are not supported. Reviewed-by: Paul E. McKenney Signed-off-by: Frederic Weisbecker --- include/linux/rcutree.h | 11 ++++- kernel/rcu/tree.c | 114 +++++++++++++++++++++++------------------------- 2 files changed, 63 insertions(+), 62 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index 153cfc7bbffd..46875c4e9f56 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -110,9 +110,16 @@ void rcu_all_qs(void); /* RCUtree hotplug events */ int rcutree_prepare_cpu(unsigned int cpu); int rcutree_online_cpu(unsigned int cpu); -int rcutree_offline_cpu(unsigned int cpu); +void rcu_cpu_starting(unsigned int cpu); + +#ifdef CONFIG_HOTPLUG_CPU int rcutree_dead_cpu(unsigned int cpu); int rcutree_dying_cpu(unsigned int cpu); -void rcu_cpu_starting(unsigned int cpu); +int rcutree_offline_cpu(unsigned int cpu); +#else +#define rcutree_dead_cpu NULL +#define rcutree_dying_cpu NULL +#define rcutree_offline_cpu NULL +#endif #endif /* __LINUX_RCUTREE_H */ diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 2e1e7eadf2cc..f9c6b2680cbb 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -4237,25 +4237,6 @@ static bool rcu_init_invoked(void) return !!rcu_state.n_online_cpus; } -/* - * Near the end of the offline process. Trace the fact that this CPU - * is going offline. - */ -int rcutree_dying_cpu(unsigned int cpu) -{ - bool blkd; - struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu); - struct rcu_node *rnp = rdp->mynode; - - if (!IS_ENABLED(CONFIG_HOTPLUG_CPU)) - return 0; - - blkd = !!(READ_ONCE(rnp->qsmask) & rdp->grpmask); - trace_rcu_grace_period(rcu_state.name, READ_ONCE(rnp->gp_seq), - blkd ? TPS("cpuofl-bgp") : TPS("cpuofl")); - return 0; -} - /* * All CPUs for the specified rcu_node structure have gone offline, * and all tasks that were preempted within an RCU read-side critical @@ -4301,23 +4282,6 @@ static void rcu_cleanup_dead_rnp(struct rcu_node *rnp_leaf) } } -/* - * The CPU has been completely removed, and some other CPU is reporting - * this fact from process context. Do the remainder of the cleanup. - * There can only be one CPU hotplug operation at a time, so no need for - * explicit locking. - */ -int rcutree_dead_cpu(unsigned int cpu) -{ - if (!IS_ENABLED(CONFIG_HOTPLUG_CPU)) - return 0; - - WRITE_ONCE(rcu_state.n_online_cpus, rcu_state.n_online_cpus - 1); - // Stop-machine done, so allow nohz_full to disable tick. - tick_dep_clear(TICK_DEP_BIT_RCU); - return 0; -} - /* * Propagate ->qsinitmask bits up the rcu_node tree to account for the * first CPU in a given leaf rcu_node structure coming online. The caller @@ -4470,29 +4434,6 @@ int rcutree_online_cpu(unsigned int cpu) return 0; } -/* - * Near the beginning of the process. The CPU is still very much alive - * with pretty much all services enabled. - */ -int rcutree_offline_cpu(unsigned int cpu) -{ - unsigned long flags; - struct rcu_data *rdp; - struct rcu_node *rnp; - - rdp = per_cpu_ptr(&rcu_data, cpu); - rnp = rdp->mynode; - raw_spin_lock_irqsave_rcu_node(rnp, flags); - rnp->ffmask &= ~rdp->grpmask; - raw_spin_unlock_irqrestore_rcu_node(rnp, flags); - - rcutree_affinity_setting(cpu, cpu); - - // nohz_full CPUs need the tick for stop-machine to work quickly - tick_dep_set(TICK_DEP_BIT_RCU); - return 0; -} - /* * Mark the specified CPU as being online so that subsequent grace periods * (both expedited and normal) will wait on it. Note that this means that @@ -4646,7 +4587,60 @@ void rcutree_migrate_callbacks(int cpu) cpu, rcu_segcblist_n_cbs(&rdp->cblist), rcu_segcblist_first_cb(&rdp->cblist)); } -#endif + +/* + * The CPU has been completely removed, and some other CPU is reporting + * this fact from process context. Do the remainder of the cleanup. + * There can only be one CPU hotplug operation at a time, so no need for + * explicit locking. + */ +int rcutree_dead_cpu(unsigned int cpu) +{ + WRITE_ONCE(rcu_state.n_online_cpus, rcu_state.n_online_cpus - 1); + // Stop-machine done, so allow nohz_full to disable tick. + tick_dep_clear(TICK_DEP_BIT_RCU); + return 0; +} + +/* + * Near the end of the offline process. Trace the fact that this CPU + * is going offline. + */ +int rcutree_dying_cpu(unsigned int cpu) +{ + bool blkd; + struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu); + struct rcu_node *rnp = rdp->mynode; + + blkd = !!(READ_ONCE(rnp->qsmask) & rdp->grpmask); + trace_rcu_grace_period(rcu_state.name, READ_ONCE(rnp->gp_seq), + blkd ? TPS("cpuofl-bgp") : TPS("cpuofl")); + return 0; +} + +/* + * Near the beginning of the process. The CPU is still very much alive + * with pretty much all services enabled. + */ +int rcutree_offline_cpu(unsigned int cpu) +{ + unsigned long flags; + struct rcu_data *rdp; + struct rcu_node *rnp; + + rdp = per_cpu_ptr(&rcu_data, cpu); + rnp = rdp->mynode; + raw_spin_lock_irqsave_rcu_node(rnp, flags); + rnp->ffmask &= ~rdp->grpmask; + raw_spin_unlock_irqrestore_rcu_node(rnp, flags); + + rcutree_affinity_setting(cpu, cpu); + + // nohz_full CPUs need the tick for stop-machine to work quickly + tick_dep_set(TICK_DEP_BIT_RCU); + return 0; +} +#endif /* #ifdef CONFIG_HOTPLUG_CPU */ /* * On non-huge systems, use expedited RCU grace periods to make suspend -- cgit v1.2.3 From 448e9f34d91d1a4799fdb06a93c2c24b34b6fd9d Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 8 Sep 2023 22:36:01 +0200 Subject: rcu: Standardize explicit CPU-hotplug calls rcu_report_dead() and rcutree_migrate_callbacks() have their headers in rcupdate.h while those are pure rcutree calls, like the other CPU-hotplug functions. Also rcu_cpu_starting() and rcu_report_dead() have different naming conventions while they mirror each other's effects. Fix the headers and propose a naming that relates both functions and aligns with the prefix of other rcutree CPU-hotplug functions. Reviewed-by: Paul E. McKenney Signed-off-by: Frederic Weisbecker --- .../Expedited-Grace-Periods/Expedited-Grace-Periods.rst | 2 +- Documentation/RCU/Design/Memory-Ordering/TreeRCU-gp-fqs.svg | 4 ++-- Documentation/RCU/Design/Memory-Ordering/TreeRCU-gp.svg | 4 ++-- Documentation/RCU/Design/Memory-Ordering/TreeRCU-hotplug.svg | 4 ++-- Documentation/RCU/Design/Requirements/Requirements.rst | 4 ++-- arch/arm64/kernel/smp.c | 4 ++-- arch/powerpc/kernel/smp.c | 2 +- arch/s390/kernel/smp.c | 2 +- arch/x86/kernel/smpboot.c | 2 +- include/linux/interrupt.h | 2 +- include/linux/rcupdate.h | 2 -- include/linux/rcutiny.h | 2 +- include/linux/rcutree.h | 7 ++++++- kernel/cpu.c | 6 +++--- kernel/rcu/tree.c | 12 ++++++++---- 15 files changed, 33 insertions(+), 26 deletions(-) (limited to 'include/linux') diff --git a/Documentation/RCU/Design/Expedited-Grace-Periods/Expedited-Grace-Periods.rst b/Documentation/RCU/Design/Expedited-Grace-Periods/Expedited-Grace-Periods.rst index 93d899d53258..414f8a2012d6 100644 --- a/Documentation/RCU/Design/Expedited-Grace-Periods/Expedited-Grace-Periods.rst +++ b/Documentation/RCU/Design/Expedited-Grace-Periods/Expedited-Grace-Periods.rst @@ -181,7 +181,7 @@ operations is carried out at several levels: of this wait (or series of waits, as the case may be) is to permit a concurrent CPU-hotplug operation to complete. #. In the case of RCU-sched, one of the last acts of an outgoing CPU is - to invoke ``rcu_report_dead()``, which reports a quiescent state for + to invoke ``rcutree_report_cpu_dead()``, which reports a quiescent state for that CPU. However, this is likely paranoia-induced redundancy. +-----------------------------------------------------------------------+ diff --git a/Documentation/RCU/Design/Memory-Ordering/TreeRCU-gp-fqs.svg b/Documentation/RCU/Design/Memory-Ordering/TreeRCU-gp-fqs.svg index 7ddc094d7f28..d82a77d03d8c 100644 --- a/Documentation/RCU/Design/Memory-Ordering/TreeRCU-gp-fqs.svg +++ b/Documentation/RCU/Design/Memory-Ordering/TreeRCU-gp-fqs.svg @@ -1135,7 +1135,7 @@ font-weight="bold" font-size="192" id="text202-7-5-3-27-6-5" - style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">rcu_report_dead() + style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">rcutree_report_cpu_dead() rcu_cpu_starting() + xml:space="preserve">rcutree_report_cpu_starting() rcu_report_dead() + style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">rcutree_report_cpu_dead() rcu_cpu_starting() + xml:space="preserve">rcutree_report_cpu_starting() rcu_report_dead() + style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">rcutree_report_cpu_dead() rcu_cpu_starting() + xml:space="preserve">rcutree_report_cpu_starting() setup_cpu) diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index a4edb7ea66ea..214a1b67f80a 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -898,7 +898,7 @@ static void smp_start_secondary(void *cpuvoid) S390_lowcore.restart_flags = 0; restore_access_regs(S390_lowcore.access_regs_save_area); cpu_init(); - rcu_cpu_starting(cpu); + rcutree_report_cpu_starting(cpu); init_cpu_timer(); vtime_init(); vdso_getcpu_init(); diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 4e45ff44aa07..4ccb76f89af8 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -288,7 +288,7 @@ static void notrace start_secondary(void *unused) cpu_init(); fpu__init_cpu(); - rcu_cpu_starting(raw_smp_processor_id()); + rcutree_report_cpu_starting(raw_smp_processor_id()); x86_cpuinit.early_percpu_clock_init(); ap_starting(); diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index a92bce40b04b..d05e1e9a553c 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -566,7 +566,7 @@ enum * * _ RCU: * 1) rcutree_migrate_callbacks() migrates the queue. - * 2) rcu_report_dead() reports the final quiescent states. + * 2) rcutree_report_cpu_dead() reports the final quiescent states. * * _ IRQ_POLL: irq_poll_cpu_dead() migrates the queue */ diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index aa351ddcbe8d..f7206b2623c9 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -122,8 +122,6 @@ static inline void call_rcu_hurry(struct rcu_head *head, rcu_callback_t func) void rcu_init(void); extern int rcu_scheduler_active; void rcu_sched_clock_irq(int user); -void rcu_report_dead(void); -void rcutree_migrate_callbacks(int cpu); #ifdef CONFIG_TASKS_RCU_GENERIC void rcu_init_tasks_generic(void); diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index 7b949292908a..d9ac7b136aea 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -171,6 +171,6 @@ static inline void rcu_all_qs(void) { barrier(); } #define rcutree_offline_cpu NULL #define rcutree_dead_cpu NULL #define rcutree_dying_cpu NULL -static inline void rcu_cpu_starting(unsigned int cpu) { } +static inline void rcutree_report_cpu_starting(unsigned int cpu) { } #endif /* __LINUX_RCUTINY_H */ diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index 46875c4e9f56..254244202ea9 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -110,7 +110,7 @@ void rcu_all_qs(void); /* RCUtree hotplug events */ int rcutree_prepare_cpu(unsigned int cpu); int rcutree_online_cpu(unsigned int cpu); -void rcu_cpu_starting(unsigned int cpu); +void rcutree_report_cpu_starting(unsigned int cpu); #ifdef CONFIG_HOTPLUG_CPU int rcutree_dead_cpu(unsigned int cpu); @@ -122,4 +122,9 @@ int rcutree_offline_cpu(unsigned int cpu); #define rcutree_offline_cpu NULL #endif +void rcutree_migrate_callbacks(int cpu); + +/* Called from hotplug and also arm64 early secondary boot failure */ +void rcutree_report_cpu_dead(void); + #endif /* __LINUX_RCUTREE_H */ diff --git a/kernel/cpu.c b/kernel/cpu.c index 076e75fed8bb..2491766e1fd5 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -1388,10 +1388,10 @@ void cpuhp_report_idle_dead(void) struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state); BUG_ON(st->state != CPUHP_AP_OFFLINE); - rcu_report_dead(); + rcutree_report_cpu_dead(); st->state = CPUHP_AP_IDLE_DEAD; /* - * We cannot call complete after rcu_report_dead() so we delegate it + * We cannot call complete after rcutree_report_cpu_dead() so we delegate it * to an online cpu. */ smp_call_function_single(cpumask_first(cpu_online_mask), @@ -1617,7 +1617,7 @@ void notify_cpu_starting(unsigned int cpu) struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu); enum cpuhp_state target = min((int)st->target, CPUHP_AP_ONLINE); - rcu_cpu_starting(cpu); /* Enables RCU usage on this CPU. */ + rcutree_report_cpu_starting(cpu); /* Enables RCU usage on this CPU. */ cpumask_set_cpu(cpu, &cpus_booted_once_mask); /* diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index f9c6b2680cbb..36d8818eaec1 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -4216,7 +4216,7 @@ bool rcu_lockdep_current_cpu_online(void) rdp = this_cpu_ptr(&rcu_data); /* * Strictly, we care here about the case where the current CPU is - * in rcu_cpu_starting() and thus has an excuse for rdp->grpmask + * in rcutree_report_cpu_starting() and thus has an excuse for rdp->grpmask * not being up to date. So arch_spin_is_locked() might have a * false positive if it's held by some *other* CPU, but that's * OK because that just means a false *negative* on the warning. @@ -4445,8 +4445,10 @@ int rcutree_online_cpu(unsigned int cpu) * from the incoming CPU rather than from the cpuhp_step mechanism. * This is because this function must be invoked at a precise location. * This incoming CPU must not have enabled interrupts yet. + * + * This mirrors the effects of rcutree_report_cpu_dead(). */ -void rcu_cpu_starting(unsigned int cpu) +void rcutree_report_cpu_starting(unsigned int cpu) { unsigned long mask; struct rcu_data *rdp; @@ -4500,8 +4502,10 @@ void rcu_cpu_starting(unsigned int cpu) * Note that this function is special in that it is invoked directly * from the outgoing CPU rather than from the cpuhp_step mechanism. * This is because this function must be invoked at a precise location. + * + * This mirrors the effect of rcutree_report_cpu_starting(). */ -void rcu_report_dead(void) +void rcutree_report_cpu_dead(void) { unsigned long flags; unsigned long mask; @@ -5072,7 +5076,7 @@ void __init rcu_init(void) pm_notifier(rcu_pm_notify, 0); WARN_ON(num_online_cpus() > 1); // Only one CPU this early in boot. rcutree_prepare_cpu(cpu); - rcu_cpu_starting(cpu); + rcutree_report_cpu_starting(cpu); rcutree_online_cpu(cpu); /* Create workqueue for Tree SRCU and for expedited GPs. */ -- cgit v1.2.3 From 5790b1fb3d672d9a1fe3881a7181dfdbe741568f Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Wed, 4 Oct 2023 16:50:07 -0400 Subject: eventfs: Remove eventfs_file and just use eventfs_inode Instead of having a descriptor for every file represented in the eventfs directory, only have the directory itself represented. Change the API to send in a list of entries that represent all the files in the directory (but not other directories). The entry list contains a name and a callback function that will be used to create the files when they are accessed. struct eventfs_inode *eventfs_create_events_dir(const char *name, struct dentry *parent, const struct eventfs_entry *entries, int size, void *data); is used for the top level eventfs directory, and returns an eventfs_inode that will be used by: struct eventfs_inode *eventfs_create_dir(const char *name, struct eventfs_inode *parent, const struct eventfs_entry *entries, int size, void *data); where both of the above take an array of struct eventfs_entry entries for every file that is in the directory. The entries are defined by: typedef int (*eventfs_callback)(const char *name, umode_t *mode, void **data, const struct file_operations **fops); struct eventfs_entry { const char *name; eventfs_callback callback; }; Where the name is the name of the file and the callback gets called when the file is being created. The callback passes in the name (in case the same callback is used for multiple files), a pointer to the mode, data and fops. The data will be pointing to the data that was passed in eventfs_create_dir() or eventfs_create_events_dir() but may be overridden to point to something else, as it will be used to point to the inode->i_private that is created. The information passed back from the callback is used to create the dentry/inode. If the callback fills the data and the file should be created, it must return a positive number. On zero or negative, the file is ignored. This logic may also be used as a prototype to convert entire pseudo file systems into just-in-time allocation. The "show_events_dentry" file has been updated to show the directories, and any files they have. With just the eventfs_file allocations: Before after deltas for meminfo (in kB): MemFree: -14360 MemAvailable: -14260 Buffers: 40 Cached: 24 Active: 44 Inactive: 48 Inactive(anon): 28 Active(file): 44 Inactive(file): 20 Dirty: -4 AnonPages: 28 Mapped: 4 KReclaimable: 132 Slab: 1604 SReclaimable: 132 SUnreclaim: 1472 Committed_AS: 12 Before after deltas for slabinfo: : [ * = ] ext4_inode_cache 27 [* 1184 = 31968 ] extent_status 102 [* 40 = 4080 ] tracefs_inode_cache 144 [* 656 = 94464 ] buffer_head 39 [* 104 = 4056 ] shmem_inode_cache 49 [* 800 = 39200 ] filp -53 [* 256 = -13568 ] dentry 251 [* 192 = 48192 ] lsm_file_cache 277 [* 32 = 8864 ] vm_area_struct -14 [* 184 = -2576 ] trace_event_file 1748 [* 88 = 153824 ] kmalloc-1k 35 [* 1024 = 35840 ] kmalloc-256 49 [* 256 = 12544 ] kmalloc-192 -28 [* 192 = -5376 ] kmalloc-128 -30 [* 128 = -3840 ] kmalloc-96 10581 [* 96 = 1015776 ] kmalloc-64 3056 [* 64 = 195584 ] kmalloc-32 1291 [* 32 = 41312 ] kmalloc-16 2310 [* 16 = 36960 ] kmalloc-8 9216 [* 8 = 73728 ] Free memory dropped by 14,360 kB Available memory dropped by 14,260 kB Total slab additions in size: 1,771,032 bytes With this change: Before after deltas for meminfo (in kB): MemFree: -12084 MemAvailable: -11976 Buffers: 32 Cached: 32 Active: 72 Inactive: 168 Inactive(anon): 176 Active(file): 72 Inactive(file): -8 Dirty: 24 AnonPages: 196 Mapped: 8 KReclaimable: 148 Slab: 836 SReclaimable: 148 SUnreclaim: 688 Committed_AS: 324 Before after deltas for slabinfo: : [ * = ] tracefs_inode_cache 144 [* 656 = 94464 ] shmem_inode_cache -23 [* 800 = -18400 ] filp -92 [* 256 = -23552 ] dentry 179 [* 192 = 34368 ] lsm_file_cache -3 [* 32 = -96 ] vm_area_struct -13 [* 184 = -2392 ] trace_event_file 1748 [* 88 = 153824 ] kmalloc-1k -49 [* 1024 = -50176 ] kmalloc-256 -27 [* 256 = -6912 ] kmalloc-128 1864 [* 128 = 238592 ] kmalloc-64 4685 [* 64 = 299840 ] kmalloc-32 -72 [* 32 = -2304 ] kmalloc-16 256 [* 16 = 4096 ] total = 721352 Free memory dropped by 12,084 kB Available memory dropped by 11,976 kB Total slab additions in size: 721,352 bytes That's over 2 MB in savings per instance for free and available memory, and over 1 MB in savings per instance of slab memory. Link: https://lore.kernel.org/linux-trace-kernel/20231003184059.4924468e@gandalf.local.home Link: https://lore.kernel.org/linux-trace-kernel/20231004165007.43d79161@gandalf.local.home Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Andrew Morton Cc: Ajay Kaher Signed-off-by: Steven Rostedt (Google) --- fs/tracefs/event_inode.c | 847 ++++++++++++++++++++++--------------------- fs/tracefs/inode.c | 2 +- fs/tracefs/internal.h | 37 +- include/linux/trace_events.h | 2 +- include/linux/tracefs.h | 29 +- kernel/trace/trace.c | 7 +- kernel/trace/trace.h | 4 +- kernel/trace/trace_events.c | 313 +++++++++++----- 8 files changed, 705 insertions(+), 536 deletions(-) (limited to 'include/linux') diff --git a/fs/tracefs/event_inode.c b/fs/tracefs/event_inode.c index 8c8d64e76103..eab18b157ef5 100644 --- a/fs/tracefs/event_inode.c +++ b/fs/tracefs/event_inode.c @@ -2,8 +2,9 @@ /* * event_inode.c - part of tracefs, a pseudo file system for activating tracing * - * Copyright (C) 2020-23 VMware Inc, author: Steven Rostedt (VMware) + * Copyright (C) 2020-23 VMware Inc, author: Steven Rostedt * Copyright (C) 2020-23 VMware Inc, author: Ajay Kaher + * Copyright (C) 2023 Google, author: Steven Rostedt * * eventfs is used to dynamically create inodes and dentries based on the * meta data provided by the tracing system. @@ -23,46 +24,6 @@ #include #include "internal.h" -struct eventfs_inode { - struct list_head e_top_files; -}; - -/* - * struct eventfs_file - hold the properties of the eventfs files and - * directories. - * @name: the name of the file or directory to create - * @d_parent: holds parent's dentry - * @dentry: once accessed holds dentry - * @list: file or directory to be added to parent directory - * @ei: list of files and directories within directory - * @fop: file_operations for file or directory - * @iop: inode_operations for file or directory - * @data: something that the caller will want to get to later on - * @mode: the permission that the file or directory should have - */ -struct eventfs_file { - const char *name; - struct dentry *d_parent; - struct dentry *dentry; - struct list_head list; - struct eventfs_inode *ei; - const struct file_operations *fop; - const struct inode_operations *iop; - /* - * Union - used for deletion - * @del_list: list of eventfs_file to delete - * @rcu: eventfs_file to delete in RCU - * @is_freed: node is freed if one of the above is set - */ - union { - struct list_head del_list; - struct rcu_head rcu; - unsigned long is_freed; - }; - void *data; - umode_t mode; -}; - static DEFINE_MUTEX(eventfs_mutex); DEFINE_STATIC_SRCU(eventfs_srcu); @@ -93,16 +54,9 @@ static const struct file_operations eventfs_file_operations = { * @data: something that the caller will want to get to later on. * @fop: struct file_operations that should be used for this file. * - * This is the basic "create a file" function for tracefs. It allows for a - * wide range of flexibility in creating a file. - * - * This function will return a pointer to a dentry if it succeeds. This - * pointer must be passed to the tracefs_remove() function when the file is - * to be removed (no automatic cleanup happens if your module is unloaded, - * you are responsible here.) If an error occurs, %NULL will be returned. - * - * If tracefs is not enabled in the kernel, the value -%ENODEV will be - * returned. + * This function creates a dentry that represents a file in the eventsfs_inode + * directory. The inode.i_private pointer will point to @data in the open() + * call. */ static struct dentry *create_file(const char *name, umode_t mode, struct dentry *parent, void *data, @@ -118,6 +72,7 @@ static struct dentry *create_file(const char *name, umode_t mode, if (WARN_ON_ONCE(!S_ISREG(mode))) return NULL; + WARN_ON_ONCE(!parent); dentry = eventfs_start_creating(name, parent); if (IS_ERR(dentry)) @@ -142,20 +97,11 @@ static struct dentry *create_file(const char *name, umode_t mode, * create_dir - create a dir in the tracefs filesystem * @name: the name of the file to create. * @parent: parent dentry for this file. - * @data: something that the caller will want to get to later on. - * - * This is the basic "create a dir" function for eventfs. It allows for a - * wide range of flexibility in creating a dir. - * - * This function will return a pointer to a dentry if it succeeds. This - * pointer must be passed to the tracefs_remove() function when the file is - * to be removed (no automatic cleanup happens if your module is unloaded, - * you are responsible here.) If an error occurs, %NULL will be returned. * - * If tracefs is not enabled in the kernel, the value -%ENODEV will be - * returned. + * This function will create a dentry for a directory represented by + * a eventfs_inode. */ -static struct dentry *create_dir(const char *name, struct dentry *parent, void *data) +static struct dentry *create_dir(const char *name, struct dentry *parent) { struct tracefs_inode *ti; struct dentry *dentry; @@ -172,7 +118,6 @@ static struct dentry *create_dir(const char *name, struct dentry *parent, void * inode->i_mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO; inode->i_op = &eventfs_root_dir_inode_operations; inode->i_fop = &eventfs_file_operations; - inode->i_private = data; ti = get_tracefs(inode); ti->flags |= TRACEFS_EVENT_INODE; @@ -185,18 +130,18 @@ static struct dentry *create_dir(const char *name, struct dentry *parent, void * } /** - * eventfs_set_ef_status_free - set the ef->status to free + * eventfs_set_ei_status_free - remove the dentry reference from an eventfs_inode * @ti: the tracefs_inode of the dentry - * @dentry: dentry who's status to be freed + * @dentry: dentry which has the reference to remove. * - * eventfs_set_ef_status_free will be called if no more - * references remain + * Remove the association between a dentry from an eventfs_inode. */ -void eventfs_set_ef_status_free(struct tracefs_inode *ti, struct dentry *dentry) +void eventfs_set_ei_status_free(struct tracefs_inode *ti, struct dentry *dentry) { struct tracefs_inode *ti_parent; + struct eventfs_inode *ei_child, *tmp; struct eventfs_inode *ei; - struct eventfs_file *ef, *tmp; + int i; /* The top level events directory may be freed by this */ if (unlikely(ti->flags & TRACEFS_EVENT_TOP_INODE)) { @@ -207,9 +152,9 @@ void eventfs_set_ef_status_free(struct tracefs_inode *ti, struct dentry *dentry) ei = ti->private; /* Record all the top level files */ - list_for_each_entry_srcu(ef, &ei->e_top_files, list, + list_for_each_entry_srcu(ei_child, &ei->children, list, lockdep_is_held(&eventfs_mutex)) { - list_add_tail(&ef->del_list, &ef_del_list); + list_add_tail(&ei_child->del_list, &ef_del_list); } /* Nothing should access this, but just in case! */ @@ -218,11 +163,13 @@ void eventfs_set_ef_status_free(struct tracefs_inode *ti, struct dentry *dentry) mutex_unlock(&eventfs_mutex); /* Now safely free the top level files and their children */ - list_for_each_entry_safe(ef, tmp, &ef_del_list, del_list) { - list_del(&ef->del_list); - eventfs_remove(ef); + list_for_each_entry_safe(ei_child, tmp, &ef_del_list, del_list) { + list_del(&ei_child->del_list); + eventfs_remove_dir(ei_child); } + kfree_const(ei->name); + kfree(ei->d_children); kfree(ei); return; } @@ -233,68 +180,162 @@ void eventfs_set_ef_status_free(struct tracefs_inode *ti, struct dentry *dentry) if (!ti_parent || !(ti_parent->flags & TRACEFS_EVENT_INODE)) goto out; - ef = dentry->d_fsdata; - if (!ef) + ei = dentry->d_fsdata; + if (!ei) goto out; /* - * If ef was freed, then the LSB bit is set for d_fsdata. + * If ei was freed, then the LSB bit is set for d_fsdata. * But this should not happen, as it should still have a * ref count that prevents it. Warn in case it does. */ - if (WARN_ON_ONCE((unsigned long)ef & 1)) + if (WARN_ON_ONCE((unsigned long)ei & 1)) goto out; + /* This could belong to one of the files of the ei */ + if (ei->dentry != dentry) { + for (i = 0; i < ei->nr_entries; i++) { + if (ei->d_children[i] == dentry) + break; + } + if (WARN_ON_ONCE(i == ei->nr_entries)) + goto out; + ei->d_children[i] = NULL; + } else { + ei->dentry = NULL; + } + dentry->d_fsdata = NULL; - ef->dentry = NULL; -out: + out: mutex_unlock(&eventfs_mutex); } +/** + * create_file_dentry - create a dentry for a file of an eventfs_inode + * @ei: the eventfs_inode that the file will be created under + * @e_dentry: a pointer to the d_children[] of the @ei + * @parent: The parent dentry of the created file. + * @name: The name of the file to create + * @mode: The mode of the file. + * @data: The data to use to set the inode of the file with on open() + * @fops: The fops of the file to be created. + * @lookup: If called by the lookup routine, in which case, dput() the created dentry. + * + * Create a dentry for a file of an eventfs_inode @ei and place it into the + * address located at @e_dentry. If the @e_dentry already has a dentry, then + * just do a dget() on it and return. Otherwise create the dentry and attach it. + */ +static struct dentry * +create_file_dentry(struct eventfs_inode *ei, struct dentry **e_dentry, + struct dentry *parent, const char *name, umode_t mode, void *data, + const struct file_operations *fops, bool lookup) +{ + struct dentry *dentry; + bool invalidate = false; + + mutex_lock(&eventfs_mutex); + /* If the e_dentry already has a dentry, use it */ + if (*e_dentry) { + /* lookup does not need to up the ref count */ + if (!lookup) + dget(*e_dentry); + mutex_unlock(&eventfs_mutex); + return *e_dentry; + } + mutex_unlock(&eventfs_mutex); + + /* The lookup already has the parent->d_inode locked */ + if (!lookup) + inode_lock(parent->d_inode); + + dentry = create_file(name, mode, parent, data, fops); + + if (!lookup) + inode_unlock(parent->d_inode); + + mutex_lock(&eventfs_mutex); + + if (IS_ERR_OR_NULL(dentry)) { + /* + * When the mutex was released, something else could have + * created the dentry for this e_dentry. In which case + * use that one. + * + * Note, with the mutex held, the e_dentry cannot have content + * and the ei->is_freed be true at the same time. + */ + WARN_ON_ONCE(ei->is_freed); + dentry = *e_dentry; + /* The lookup does not need to up the dentry refcount */ + if (dentry && !lookup) + dget(dentry); + mutex_unlock(&eventfs_mutex); + return dentry; + } + + if (!*e_dentry && !ei->is_freed) { + *e_dentry = dentry; + dentry->d_fsdata = ei; + } else { + /* + * Should never happen unless we get here due to being freed. + * Otherwise it means two dentries exist with the same name. + */ + WARN_ON_ONCE(!ei->is_freed); + invalidate = true; + } + mutex_unlock(&eventfs_mutex); + + if (invalidate) + d_invalidate(dentry); + + if (lookup || invalidate) + dput(dentry); + + return invalidate ? NULL : dentry; +} + /** * eventfs_post_create_dir - post create dir routine - * @ef: eventfs_file of recently created dir + * @ei: eventfs_inode of recently created dir * * Map the meta-data of files within an eventfs dir to their parent dentry */ -static void eventfs_post_create_dir(struct eventfs_file *ef) +static void eventfs_post_create_dir(struct eventfs_inode *ei) { - struct eventfs_file *ef_child; + struct eventfs_inode *ei_child; struct tracefs_inode *ti; /* srcu lock already held */ /* fill parent-child relation */ - list_for_each_entry_srcu(ef_child, &ef->ei->e_top_files, list, + list_for_each_entry_srcu(ei_child, &ei->children, list, srcu_read_lock_held(&eventfs_srcu)) { - ef_child->d_parent = ef->dentry; + ei_child->d_parent = ei->dentry; } - ti = get_tracefs(ef->dentry->d_inode); - ti->private = ef->ei; + ti = get_tracefs(ei->dentry->d_inode); + ti->private = ei; } /** - * create_dentry - helper function to create dentry - * @ef: eventfs_file of file or directory to create - * @parent: parent dentry - * @lookup: true if called from lookup routine + * create_dir_dentry - Create a directory dentry for the eventfs_inode + * @ei: The eventfs_inode to create the directory for + * @parent: The dentry of the parent of this directory + * @lookup: True if this is called by the lookup code * - * Used to create a dentry for file/dir, executes post dentry creation routine + * This creates and attaches a directory dentry to the eventfs_inode @ei. */ static struct dentry * -create_dentry(struct eventfs_file *ef, struct dentry *parent, bool lookup) +create_dir_dentry(struct eventfs_inode *ei, struct dentry *parent, bool lookup) { bool invalidate = false; - struct dentry *dentry; + struct dentry *dentry = NULL; mutex_lock(&eventfs_mutex); - if (ef->is_freed) { - mutex_unlock(&eventfs_mutex); - return NULL; - } - if (ef->dentry) { - dentry = ef->dentry; - /* On dir open, up the ref count */ + if (ei->dentry) { + /* If the dentry already has a dentry, use it */ + dentry = ei->dentry; + /* lookup does not need to up the ref count */ if (!lookup) dget(dentry); mutex_unlock(&eventfs_mutex); @@ -302,42 +343,44 @@ create_dentry(struct eventfs_file *ef, struct dentry *parent, bool lookup) } mutex_unlock(&eventfs_mutex); + /* The lookup already has the parent->d_inode locked */ if (!lookup) inode_lock(parent->d_inode); - if (ef->ei) - dentry = create_dir(ef->name, parent, ef->data); - else - dentry = create_file(ef->name, ef->mode, parent, - ef->data, ef->fop); + dentry = create_dir(ei->name, parent); if (!lookup) inode_unlock(parent->d_inode); mutex_lock(&eventfs_mutex); - if (IS_ERR_OR_NULL(dentry)) { - /* If the ef was already updated get it */ - dentry = ef->dentry; + + if (IS_ERR_OR_NULL(dentry) && !ei->is_freed) { + /* + * When the mutex was released, something else could have + * created the dentry for this e_dentry. In which case + * use that one. + * + * Note, with the mutex held, the e_dentry cannot have content + * and the ei->is_freed be true at the same time. + */ + dentry = ei->dentry; if (dentry && !lookup) dget(dentry); mutex_unlock(&eventfs_mutex); return dentry; } - if (!ef->dentry && !ef->is_freed) { - ef->dentry = dentry; - if (ef->ei) - eventfs_post_create_dir(ef); - dentry->d_fsdata = ef; + if (!ei->dentry && !ei->is_freed) { + ei->dentry = dentry; + eventfs_post_create_dir(ei); + dentry->d_fsdata = ei; } else { - /* A race here, should try again (unless freed) */ - invalidate = true; - /* * Should never happen unless we get here due to being freed. * Otherwise it means two dentries exist with the same name. */ - WARN_ON_ONCE(!ef->is_freed); + WARN_ON_ONCE(!ei->is_freed); + invalidate = true; } mutex_unlock(&eventfs_mutex); if (invalidate) @@ -349,50 +392,85 @@ create_dentry(struct eventfs_file *ef, struct dentry *parent, bool lookup) return invalidate ? NULL : dentry; } -static bool match_event_file(struct eventfs_file *ef, const char *name) -{ - bool ret; - - mutex_lock(&eventfs_mutex); - ret = !ef->is_freed && strcmp(ef->name, name) == 0; - mutex_unlock(&eventfs_mutex); - - return ret; -} - /** * eventfs_root_lookup - lookup routine to create file/dir * @dir: in which a lookup is being done * @dentry: file/dir dentry - * @flags: to pass as flags parameter to simple lookup + * @flags: Just passed to simple_lookup() * - * Used to create a dynamic file/dir within @dir. Use the eventfs_inode - * list of meta data to find the information needed to create the file/dir. + * Used to create dynamic file/dir with-in @dir, search with-in @ei + * list, if @dentry found go ahead and create the file/dir */ + static struct dentry *eventfs_root_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) { + const struct file_operations *fops; + const struct eventfs_entry *entry; + struct eventfs_inode *ei_child; struct tracefs_inode *ti; struct eventfs_inode *ei; - struct eventfs_file *ef; + struct dentry *ei_dentry = NULL; struct dentry *ret = NULL; + const char *name = dentry->d_name.name; + bool created = false; + umode_t mode; + void *data; int idx; + int i; + int r; ti = get_tracefs(dir); if (!(ti->flags & TRACEFS_EVENT_INODE)) return NULL; - ei = ti->private; + /* Grab srcu to prevent the ei from going away */ idx = srcu_read_lock(&eventfs_srcu); - list_for_each_entry_srcu(ef, &ei->e_top_files, list, + + /* + * Grab the eventfs_mutex to consistent value from ti->private. + * This s + */ + mutex_lock(&eventfs_mutex); + ei = READ_ONCE(ti->private); + if (ei) + ei_dentry = READ_ONCE(ei->dentry); + mutex_unlock(&eventfs_mutex); + + if (!ei || !ei_dentry) + goto out; + + data = ei->data; + + list_for_each_entry_srcu(ei_child, &ei->children, list, srcu_read_lock_held(&eventfs_srcu)) { - if (!match_event_file(ef, dentry->d_name.name)) + if (strcmp(ei_child->name, name) != 0) continue; ret = simple_lookup(dir, dentry, flags); - create_dentry(ef, ef->d_parent, true); + create_dir_dentry(ei_child, ei_dentry, true); + created = true; break; } + + if (created) + goto out; + + for (i = 0; i < ei->nr_entries; i++) { + entry = &ei->entries[i]; + if (strcmp(name, entry->name) == 0) { + void *cdata = data; + r = entry->callback(name, &mode, &cdata, &fops); + if (r <= 0) + continue; + ret = simple_lookup(dir, dentry, flags); + create_file_dentry(ei, &ei->d_children[i], + ei_dentry, name, mode, cdata, + fops, true); + break; + } + } + out: srcu_read_unlock(&eventfs_srcu, idx); return ret; } @@ -432,29 +510,48 @@ static int eventfs_release(struct inode *inode, struct file *file) return dcache_dir_close(inode, file); } +static int add_dentries(struct dentry ***dentries, struct dentry *d, int cnt) +{ + struct dentry **tmp; + + tmp = krealloc(*dentries, sizeof(d) * (cnt + 2), GFP_KERNEL); + if (!tmp) + return -1; + tmp[cnt] = d; + tmp[cnt + 1] = NULL; + *dentries = tmp; + return 0; +} + /** * dcache_dir_open_wrapper - eventfs open wrapper * @inode: not used - * @file: dir to be opened (to create its child) + * @file: dir to be opened (to create it's children) * - * Used to dynamically create the file/dir within @file. @file is really a - * directory and all the files/dirs of the children within @file will be - * created. If any of the files/dirs have already been created, their - * reference count will be incremented. + * Used to dynamic create file/dir with-in @file, all the + * file/dir will be created. If already created then references + * will be increased */ static int dcache_dir_open_wrapper(struct inode *inode, struct file *file) { + const struct file_operations *fops; + const struct eventfs_entry *entry; + struct eventfs_inode *ei_child; struct tracefs_inode *ti; struct eventfs_inode *ei; - struct eventfs_file *ef; struct dentry_list *dlist; struct dentry **dentries = NULL; - struct dentry *dentry = file_dentry(file); + struct dentry *parent = file_dentry(file); struct dentry *d; struct inode *f_inode = file_inode(file); + const char *name = parent->d_name.name; + umode_t mode; + void *data; int cnt = 0; int idx; int ret; + int i; + int r; ti = get_tracefs(f_inode); if (!(ti->flags & TRACEFS_EVENT_INODE)) @@ -463,25 +560,51 @@ static int dcache_dir_open_wrapper(struct inode *inode, struct file *file) if (WARN_ON_ONCE(file->private_data)) return -EINVAL; + idx = srcu_read_lock(&eventfs_srcu); + + mutex_lock(&eventfs_mutex); + ei = READ_ONCE(ti->private); + mutex_unlock(&eventfs_mutex); + + if (!ei) { + srcu_read_unlock(&eventfs_srcu, idx); + return -EINVAL; + } + + + data = ei->data; + dlist = kmalloc(sizeof(*dlist), GFP_KERNEL); - if (!dlist) + if (!dlist) { + srcu_read_unlock(&eventfs_srcu, idx); return -ENOMEM; + } - ei = ti->private; - idx = srcu_read_lock(&eventfs_srcu); - list_for_each_entry_srcu(ef, &ei->e_top_files, list, + list_for_each_entry_srcu(ei_child, &ei->children, list, srcu_read_lock_held(&eventfs_srcu)) { - d = create_dentry(ef, dentry, false); + d = create_dir_dentry(ei_child, parent, false); if (d) { - struct dentry **tmp; + ret = add_dentries(&dentries, d, cnt); + if (ret < 0) + break; + cnt++; + } + } - tmp = krealloc(dentries, sizeof(d) * (cnt + 2), GFP_KERNEL); - if (!tmp) + for (i = 0; i < ei->nr_entries; i++) { + void *cdata = data; + entry = &ei->entries[i]; + name = entry->name; + r = entry->callback(name, &mode, &cdata, &fops); + if (r <= 0) + continue; + d = create_file_dentry(ei, &ei->d_children[i], + parent, name, mode, cdata, fops, false); + if (d) { + ret = add_dentries(&dentries, d, cnt); + if (ret < 0) break; - tmp[cnt] = d; - tmp[cnt + 1] = NULL; cnt++; - dentries = tmp; } } srcu_read_unlock(&eventfs_srcu, idx); @@ -514,63 +637,90 @@ static int dcache_readdir_wrapper(struct file *file, struct dir_context *ctx) } /** - * eventfs_prepare_ef - helper function to prepare eventfs_file - * @name: the name of the file/directory to create. - * @mode: the permission that the file should have. - * @fop: struct file_operations that should be used for this file/directory. - * @iop: struct inode_operations that should be used for this file/directory. - * @data: something that the caller will want to get to later on. The - * inode.i_private pointer will point to this value on the open() call. + * eventfs_create_dir - Create the eventfs_inode for this directory + * @name: The name of the directory to create. + * @parent: The eventfs_inode of the parent directory. + * @entries: A list of entries that represent the files under this directory + * @size: The number of @entries + * @data: The default data to pass to the files (an entry may override it). + * + * This function creates the descriptor to represent a directory in the + * eventfs. This descriptor is an eventfs_inode, and it is returned to be + * used to create other children underneath. + * + * The @entries is an array of eventfs_entry structures which has: + * const char *name + * eventfs_callback callback; + * + * The name is the name of the file, and the callback is a pointer to a function + * that will be called when the file is reference (either by lookup or by + * reading a directory). The callback is of the prototype: * - * This function allocates and fills the eventfs_file structure. + * int callback(const char *name, umode_t *mode, void **data, + * const struct file_operations **fops); + * + * When a file needs to be created, this callback will be called with + * name = the name of the file being created (so that the same callback + * may be used for multiple files). + * mode = a place to set the file's mode + * data = A pointer to @data, and the callback may replace it, which will + * cause the file created to pass the new data to the open() call. + * fops = the fops to use for the created file. */ -static struct eventfs_file *eventfs_prepare_ef(const char *name, umode_t mode, - const struct file_operations *fop, - const struct inode_operations *iop, - void *data) +struct eventfs_inode *eventfs_create_dir(const char *name, struct eventfs_inode *parent, + const struct eventfs_entry *entries, + int size, void *data) { - struct eventfs_file *ef; + struct eventfs_inode *ei; - ef = kzalloc(sizeof(*ef), GFP_KERNEL); - if (!ef) + if (!parent) + return ERR_PTR(-EINVAL); + + ei = kzalloc(sizeof(*ei), GFP_KERNEL); + if (!ei) return ERR_PTR(-ENOMEM); - ef->name = kstrdup(name, GFP_KERNEL); - if (!ef->name) { - kfree(ef); + ei->name = kstrdup_const(name, GFP_KERNEL); + if (!ei->name) { + kfree(ei); return ERR_PTR(-ENOMEM); } - if (S_ISDIR(mode)) { - ef->ei = kzalloc(sizeof(*ef->ei), GFP_KERNEL); - if (!ef->ei) { - kfree(ef->name); - kfree(ef); + if (size) { + ei->d_children = kzalloc(sizeof(*ei->d_children) * size, GFP_KERNEL); + if (!ei->d_children) { + kfree_const(ei->name); + kfree(ei); return ERR_PTR(-ENOMEM); } - INIT_LIST_HEAD(&ef->ei->e_top_files); - } else { - ef->ei = NULL; } - ef->iop = iop; - ef->fop = fop; - ef->mode = mode; - ef->data = data; - return ef; + ei->entries = entries; + ei->nr_entries = size; + ei->data = data; + INIT_LIST_HEAD(&ei->children); + + mutex_lock(&eventfs_mutex); + list_add_tail(&ei->list, &parent->children); + ei->d_parent = parent->dentry; + mutex_unlock(&eventfs_mutex); + + return ei; } /** - * eventfs_create_events_dir - create the trace event structure - * @name: the name of the directory to create. - * @parent: parent dentry for this file. This should be a directory dentry - * if set. If this parameter is NULL, then the directory will be - * created in the root of the tracefs filesystem. + * eventfs_create_events_dir - create the top level events directory + * @name: The name of the top level directory to create. + * @parent: Parent dentry for this file in the tracefs directory. + * @entries: A list of entries that represent the files under this directory + * @size: The number of @entries + * @data: The default data to pass to the files (an entry may override it). * * This function creates the top of the trace event directory. */ -struct dentry *eventfs_create_events_dir(const char *name, - struct dentry *parent) +struct eventfs_inode *eventfs_create_events_dir(const char *name, struct dentry *parent, + const struct eventfs_entry *entries, + int size, void *data) { struct dentry *dentry = tracefs_start_creating(name, parent); struct eventfs_inode *ei; @@ -581,19 +731,32 @@ struct dentry *eventfs_create_events_dir(const char *name, return NULL; if (IS_ERR(dentry)) - return dentry; + return (struct eventfs_inode *)dentry; ei = kzalloc(sizeof(*ei), GFP_KERNEL); if (!ei) - return ERR_PTR(-ENOMEM); + goto fail; + inode = tracefs_get_inode(dentry->d_sb); - if (unlikely(!inode)) { - kfree(ei); - tracefs_failed_creating(dentry); - return ERR_PTR(-ENOMEM); + if (unlikely(!inode)) + goto fail; + + if (size) { + ei->d_children = kzalloc(sizeof(*ei->d_children) * size, GFP_KERNEL); + if (!ei->d_children) + goto fail; } - INIT_LIST_HEAD(&ei->e_top_files); + ei->dentry = dentry; + ei->entries = entries; + ei->nr_entries = size; + ei->data = data; + ei->name = kstrdup_const(name, GFP_KERNEL); + if (!ei->name) + goto fail; + + INIT_LIST_HEAD(&ei->children); + INIT_LIST_HEAD(&ei->list); ti = get_tracefs(inode); ti->flags |= TRACEFS_EVENT_INODE | TRACEFS_EVENT_TOP_INODE; @@ -608,193 +771,41 @@ struct dentry *eventfs_create_events_dir(const char *name, d_instantiate(dentry, inode); inc_nlink(dentry->d_parent->d_inode); fsnotify_mkdir(dentry->d_parent->d_inode, dentry); - return tracefs_end_creating(dentry); -} + tracefs_end_creating(dentry); -/** - * eventfs_add_subsystem_dir - add eventfs subsystem_dir to list to create later - * @name: the name of the file to create. - * @parent: parent dentry for this dir. - * - * This function adds eventfs subsystem dir to list. - * And all these dirs are created on the fly when they are looked up, - * and the dentry and inodes will be removed when they are done. - */ -struct eventfs_file *eventfs_add_subsystem_dir(const char *name, - struct dentry *parent) -{ - struct tracefs_inode *ti_parent; - struct eventfs_inode *ei_parent; - struct eventfs_file *ef; + /* Will call dput when the directory is removed */ + dget(dentry); - if (security_locked_down(LOCKDOWN_TRACEFS)) - return NULL; - - if (!parent) - return ERR_PTR(-EINVAL); - - ti_parent = get_tracefs(parent->d_inode); - ei_parent = ti_parent->private; + return ei; - ef = eventfs_prepare_ef(name, S_IFDIR, NULL, NULL, NULL); - if (IS_ERR(ef)) - return ef; - - mutex_lock(&eventfs_mutex); - list_add_tail(&ef->list, &ei_parent->e_top_files); - ef->d_parent = parent; - mutex_unlock(&eventfs_mutex); - return ef; + fail: + kfree(ei->d_children); + kfree(ei); + tracefs_failed_creating(dentry); + return ERR_PTR(-ENOMEM); } -/** - * eventfs_add_dir - add eventfs dir to list to create later - * @name: the name of the file to create. - * @ef_parent: parent eventfs_file for this dir. - * - * This function adds eventfs dir to list. - * And all these dirs are created on the fly when they are looked up, - * and the dentry and inodes will be removed when they are done. - */ -struct eventfs_file *eventfs_add_dir(const char *name, - struct eventfs_file *ef_parent) +static void free_ei(struct rcu_head *head) { - struct eventfs_file *ef; - - if (security_locked_down(LOCKDOWN_TRACEFS)) - return NULL; + struct eventfs_inode *ei = container_of(head, struct eventfs_inode, rcu); - if (!ef_parent) - return ERR_PTR(-EINVAL); - - ef = eventfs_prepare_ef(name, S_IFDIR, NULL, NULL, NULL); - if (IS_ERR(ef)) - return ef; - - mutex_lock(&eventfs_mutex); - list_add_tail(&ef->list, &ef_parent->ei->e_top_files); - ef->d_parent = ef_parent->dentry; - mutex_unlock(&eventfs_mutex); - return ef; -} - -/** - * eventfs_add_events_file - add the data needed to create a file for later reference - * @name: the name of the file to create. - * @mode: the permission that the file should have. - * @parent: parent dentry for this file. - * @data: something that the caller will want to get to later on. - * @fop: struct file_operations that should be used for this file. - * - * This function is used to add the information needed to create a - * dentry/inode within the top level events directory. The file created - * will have the @mode permissions. The @data will be used to fill the - * inode.i_private when the open() call is done. The dentry and inodes are - * all created when they are referenced, and removed when they are no - * longer referenced. - */ -int eventfs_add_events_file(const char *name, umode_t mode, - struct dentry *parent, void *data, - const struct file_operations *fop) -{ - struct tracefs_inode *ti; - struct eventfs_inode *ei; - struct eventfs_file *ef; - - if (security_locked_down(LOCKDOWN_TRACEFS)) - return -ENODEV; - - if (!parent) - return -EINVAL; - - if (!(mode & S_IFMT)) - mode |= S_IFREG; - - if (!parent->d_inode) - return -EINVAL; - - ti = get_tracefs(parent->d_inode); - if (!(ti->flags & TRACEFS_EVENT_INODE)) - return -EINVAL; - - ei = ti->private; - ef = eventfs_prepare_ef(name, mode, fop, NULL, data); - - if (IS_ERR(ef)) - return -ENOMEM; - - mutex_lock(&eventfs_mutex); - list_add_tail(&ef->list, &ei->e_top_files); - ef->d_parent = parent; - mutex_unlock(&eventfs_mutex); - return 0; -} - -/** - * eventfs_add_file - add eventfs file to list to create later - * @name: the name of the file to create. - * @mode: the permission that the file should have. - * @ef_parent: parent eventfs_file for this file. - * @data: something that the caller will want to get to later on. - * @fop: struct file_operations that should be used for this file. - * - * This function is used to add the information needed to create a - * file within a subdirectory of the events directory. The file created - * will have the @mode permissions. The @data will be used to fill the - * inode.i_private when the open() call is done. The dentry and inodes are - * all created when they are referenced, and removed when they are no - * longer referenced. - */ -int eventfs_add_file(const char *name, umode_t mode, - struct eventfs_file *ef_parent, - void *data, - const struct file_operations *fop) -{ - struct eventfs_file *ef; - - if (security_locked_down(LOCKDOWN_TRACEFS)) - return -ENODEV; - - if (!ef_parent) - return -EINVAL; - - if (!(mode & S_IFMT)) - mode |= S_IFREG; - - ef = eventfs_prepare_ef(name, mode, fop, NULL, data); - if (IS_ERR(ef)) - return -ENOMEM; - - mutex_lock(&eventfs_mutex); - list_add_tail(&ef->list, &ef_parent->ei->e_top_files); - ef->d_parent = ef_parent->dentry; - mutex_unlock(&eventfs_mutex); - return 0; -} - -static void free_ef(struct rcu_head *head) -{ - struct eventfs_file *ef = container_of(head, struct eventfs_file, rcu); - - kfree(ef->name); - kfree(ef->ei); - kfree(ef); + kfree_const(ei->name); + kfree(ei->d_children); + kfree(ei); } /** * eventfs_remove_rec - remove eventfs dir or file from list - * @ef: eventfs_file to be removed. - * @head: to create list of eventfs_file to be deleted - * @level: to check recursion depth + * @ei: eventfs_inode to be removed. * - * The helper function eventfs_remove_rec() is used to clean up and free the - * associated data from eventfs for both of the added functions. + * This function recursively remove eventfs_inode which + * contains info of file or dir. */ -static void eventfs_remove_rec(struct eventfs_file *ef, struct list_head *head, int level) +static void eventfs_remove_rec(struct eventfs_inode *ei, struct list_head *head, int level) { - struct eventfs_file *ef_child; + struct eventfs_inode *ei_child; - if (!ef) + if (!ei) return; /* * Check recursion depth. It should never be greater than 3: @@ -806,62 +817,68 @@ static void eventfs_remove_rec(struct eventfs_file *ef, struct list_head *head, if (WARN_ON_ONCE(level > 3)) return; - if (ef->ei) { - /* search for nested folders or files */ - list_for_each_entry_srcu(ef_child, &ef->ei->e_top_files, list, - lockdep_is_held(&eventfs_mutex)) { - eventfs_remove_rec(ef_child, head, level + 1); - } + /* search for nested folders or files */ + list_for_each_entry_srcu(ei_child, &ei->children, list, + lockdep_is_held(&eventfs_mutex)) { + eventfs_remove_rec(ei_child, head, level + 1); } - list_del_rcu(&ef->list); - list_add_tail(&ef->del_list, head); + list_del_rcu(&ei->list); + list_add_tail(&ei->del_list, head); } +static void unhook_dentry(struct dentry **dentry, struct dentry **list) +{ + if (*dentry) { + unsigned long ptr = (unsigned long)*list; + + /* Keep the dentry from being freed yet */ + dget(*dentry); + + /* + * Paranoid: The dget() above should prevent the dentry + * from being freed and calling eventfs_set_ei_status_free(). + * But just in case, set the link list LSB pointer to 1 + * and have eventfs_set_ei_status_free() check that to + * make sure that if it does happen, it will not think + * the d_fsdata is an eventfs_inode. + * + * For this to work, no eventfs_inode should be allocated + * on a odd space, as the ef should always be allocated + * to be at least word aligned. Check for that too. + */ + WARN_ON_ONCE(ptr & 1); + + (*dentry)->d_fsdata = (void *)(ptr | 1); + *list = *dentry; + *dentry = NULL; + } +} /** * eventfs_remove - remove eventfs dir or file from list - * @ef: eventfs_file to be removed. + * @ei: eventfs_inode to be removed. * * This function acquire the eventfs_mutex lock and call eventfs_remove_rec() */ -void eventfs_remove(struct eventfs_file *ef) +void eventfs_remove_dir(struct eventfs_inode *ei) { - struct eventfs_file *tmp; - LIST_HEAD(ef_del_list); + struct eventfs_inode *tmp; + LIST_HEAD(ei_del_list); struct dentry *dentry_list = NULL; struct dentry *dentry; + int i; - if (!ef) + if (!ei) return; mutex_lock(&eventfs_mutex); - eventfs_remove_rec(ef, &ef_del_list, 0); - list_for_each_entry_safe(ef, tmp, &ef_del_list, del_list) { - if (ef->dentry) { - unsigned long ptr = (unsigned long)dentry_list; - - /* Keep the dentry from being freed yet */ - dget(ef->dentry); - - /* - * Paranoid: The dget() above should prevent the dentry - * from being freed and calling eventfs_set_ef_status_free(). - * But just in case, set the link list LSB pointer to 1 - * and have eventfs_set_ef_status_free() check that to - * make sure that if it does happen, it will not think - * the d_fsdata is an event_file. - * - * For this to work, no event_file should be allocated - * on a odd space, as the ef should always be allocated - * to be at least word aligned. Check for that too. - */ - WARN_ON_ONCE(ptr & 1); - - ef->dentry->d_fsdata = (void *)(ptr | 1); - dentry_list = ef->dentry; - ef->dentry = NULL; - } - call_srcu(&eventfs_srcu, &ef->rcu, free_ef); + eventfs_remove_rec(ei, &ei_del_list, 0); + + list_for_each_entry_safe(ei, tmp, &ei_del_list, del_list) { + for (i = 0; i < ei->nr_entries; i++) + unhook_dentry(&ei->d_children[i], &dentry_list); + unhook_dentry(&ei->dentry, &dentry_list); + call_srcu(&eventfs_srcu, &ei->rcu, free_ei); } mutex_unlock(&eventfs_mutex); @@ -876,8 +893,8 @@ void eventfs_remove(struct eventfs_file *ef) mutex_lock(&eventfs_mutex); /* dentry should now have at least a single reference */ WARN_ONCE((int)d_count(dentry) < 1, - "dentry %p less than one reference (%d) after invalidate\n", - dentry, d_count(dentry)); + "dentry %px (%s) less than one reference (%d) after invalidate\n", + dentry, dentry->d_name.name, d_count(dentry)); mutex_unlock(&eventfs_mutex); dput(dentry); } diff --git a/fs/tracefs/inode.c b/fs/tracefs/inode.c index 891653ba9cf3..34ffb2f8114e 100644 --- a/fs/tracefs/inode.c +++ b/fs/tracefs/inode.c @@ -385,7 +385,7 @@ static void tracefs_dentry_iput(struct dentry *dentry, struct inode *inode) ti = get_tracefs(inode); if (ti && ti->flags & TRACEFS_EVENT_INODE) - eventfs_set_ef_status_free(ti, dentry); + eventfs_set_ei_status_free(ti, dentry); iput(inode); } diff --git a/fs/tracefs/internal.h b/fs/tracefs/internal.h index 4f2e49e2197b..298d3ecaf621 100644 --- a/fs/tracefs/internal.h +++ b/fs/tracefs/internal.h @@ -13,6 +13,41 @@ struct tracefs_inode { struct inode vfs_inode; }; +/* + * struct eventfs_inode - hold the properties of the eventfs directories. + * @list: link list into the parent directory + * @entries: the array of entries representing the files in the directory + * @name: the name of the directory to create + * @children: link list into the child eventfs_inode + * @dentry: the dentry of the directory + * @d_parent: pointer to the parent's dentry + * @d_children: The array of dentries to represent the files when created + * @data: The private data to pass to the callbacks + * @nr_entries: The number of items in @entries + */ +struct eventfs_inode { + struct list_head list; + const struct eventfs_entry *entries; + const char *name; + struct list_head children; + struct dentry *dentry; + struct dentry *d_parent; + struct dentry **d_children; + void *data; + /* + * Union - used for deletion + * @del_list: list of eventfs_inode to delete + * @rcu: eventfs_indoe to delete in RCU + * @is_freed: node is freed if one of the above is set + */ + union { + struct list_head del_list; + struct rcu_head rcu; + unsigned long is_freed; + }; + int nr_entries; +}; + static inline struct tracefs_inode *get_tracefs(const struct inode *inode) { return container_of(inode, struct tracefs_inode, vfs_inode); @@ -25,6 +60,6 @@ struct inode *tracefs_get_inode(struct super_block *sb); struct dentry *eventfs_start_creating(const char *name, struct dentry *parent); struct dentry *eventfs_failed_creating(struct dentry *dentry); struct dentry *eventfs_end_creating(struct dentry *dentry); -void eventfs_set_ef_status_free(struct tracefs_inode *ti, struct dentry *dentry); +void eventfs_set_ei_status_free(struct tracefs_inode *ti, struct dentry *dentry); #endif /* _TRACEFS_INTERNAL_H */ diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index 21ae37e49319..12207dc6722d 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -649,7 +649,7 @@ struct trace_event_file { struct list_head list; struct trace_event_call *event_call; struct event_filter __rcu *filter; - struct eventfs_file *ef; + struct eventfs_inode *ei; struct trace_array *tr; struct trace_subsystem_dir *system; struct list_head triggers; diff --git a/include/linux/tracefs.h b/include/linux/tracefs.h index 009072792fa3..0c39704455d9 100644 --- a/include/linux/tracefs.h +++ b/include/linux/tracefs.h @@ -23,26 +23,25 @@ struct file_operations; struct eventfs_file; -struct dentry *eventfs_create_events_dir(const char *name, - struct dentry *parent); +typedef int (*eventfs_callback)(const char *name, umode_t *mode, void **data, + const struct file_operations **fops); -struct eventfs_file *eventfs_add_subsystem_dir(const char *name, - struct dentry *parent); +struct eventfs_entry { + const char *name; + eventfs_callback callback; +}; -struct eventfs_file *eventfs_add_dir(const char *name, - struct eventfs_file *ef_parent); +struct eventfs_inode; -int eventfs_add_file(const char *name, umode_t mode, - struct eventfs_file *ef_parent, void *data, - const struct file_operations *fops); +struct eventfs_inode *eventfs_create_events_dir(const char *name, struct dentry *parent, + const struct eventfs_entry *entries, + int size, void *data); -int eventfs_add_events_file(const char *name, umode_t mode, - struct dentry *parent, void *data, - const struct file_operations *fops); +struct eventfs_inode *eventfs_create_dir(const char *name, struct eventfs_inode *parent, + const struct eventfs_entry *entries, + int size, void *data); -void eventfs_remove(struct eventfs_file *ef); - -void eventfs_remove_events_dir(struct dentry *dentry); +void eventfs_remove_dir(struct eventfs_inode *ei); struct dentry *tracefs_create_file(const char *name, umode_t mode, struct dentry *parent, void *data, diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index dd6395692ff9..4383be8fa1b0 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -9764,7 +9764,6 @@ static __init void create_trace_instances(struct dentry *d_tracer) static void init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer) { - struct trace_event_file *file; int cpu; trace_create_file("available_tracers", TRACE_MODE_READ, d_tracer, @@ -9797,11 +9796,7 @@ init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer) trace_create_file("trace_marker", 0220, d_tracer, tr, &tracing_mark_fops); - file = __find_event_file(tr, "ftrace", "print"); - if (file && file->ef) - eventfs_add_file("trigger", TRACE_MODE_WRITE, file->ef, - file, &event_trigger_fops); - tr->trace_marker_file = file; + tr->trace_marker_file = __find_event_file(tr, "ftrace", "print"); trace_create_file("trace_marker_raw", 0220, d_tracer, tr, &tracing_mark_raw_fops); diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index e92cb9c1292f..0e1405abf4f7 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -381,7 +381,7 @@ struct trace_array { struct dentry *dir; struct dentry *options; struct dentry *percpu_dir; - struct dentry *event_dir; + struct eventfs_inode *event_dir; struct trace_options *topts; struct list_head systems; struct list_head events; @@ -1349,7 +1349,7 @@ struct trace_subsystem_dir { struct list_head list; struct event_subsystem *subsystem; struct trace_array *tr; - struct eventfs_file *ef; + struct eventfs_inode *ei; int ref_count; int nr_events; }; diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 099b9b6bbdc4..a3b9d9423824 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -984,7 +984,7 @@ static void remove_subsystem(struct trace_subsystem_dir *dir) return; if (!--dir->nr_events) { - eventfs_remove(dir->ef); + eventfs_remove_dir(dir->ei); list_del(&dir->list); __put_system_dir(dir); } @@ -992,7 +992,7 @@ static void remove_subsystem(struct trace_subsystem_dir *dir) static void remove_event_file_dir(struct trace_event_file *file) { - eventfs_remove(file->ef); + eventfs_remove_dir(file->ei); list_del(&file->list); remove_subsystem(file->system); free_event_filter(file->filter); @@ -2282,14 +2282,40 @@ create_new_subsystem(const char *name) return NULL; } -static struct eventfs_file * +int system_callback(const char *name, umode_t *mode, void **data, + const struct file_operations **fops) +{ + if (strcmp(name, "filter") == 0) + *fops = &ftrace_subsystem_filter_fops; + + else if (strcmp(name, "enable") == 0) + *fops = &ftrace_system_enable_fops; + + else + return 0; + + *mode = TRACE_MODE_WRITE; + return 1; +} + +static struct eventfs_inode * event_subsystem_dir(struct trace_array *tr, const char *name, - struct trace_event_file *file, struct dentry *parent) + struct trace_event_file *file, struct eventfs_inode *parent) { struct event_subsystem *system, *iter; struct trace_subsystem_dir *dir; - struct eventfs_file *ef; - int res; + struct eventfs_inode *ei; + int nr_entries; + static struct eventfs_entry system_entries[] = { + { + .name = "filter", + .callback = system_callback, + }, + { + .name = "enable", + .callback = system_callback, + } + }; /* First see if we did not already create this dir */ list_for_each_entry(dir, &tr->systems, list) { @@ -2297,7 +2323,7 @@ event_subsystem_dir(struct trace_array *tr, const char *name, if (strcmp(system->name, name) == 0) { dir->nr_events++; file->system = dir; - return dir->ef; + return dir->ei; } } @@ -2321,39 +2347,29 @@ event_subsystem_dir(struct trace_array *tr, const char *name, } else __get_system(system); - ef = eventfs_add_subsystem_dir(name, parent); - if (IS_ERR(ef)) { + /* ftrace only has directories no files */ + if (strcmp(name, "ftrace") == 0) + nr_entries = 0; + else + nr_entries = ARRAY_SIZE(system_entries); + + ei = eventfs_create_dir(name, parent, system_entries, nr_entries, dir); + if (!ei) { pr_warn("Failed to create system directory %s\n", name); __put_system(system); goto out_free; } - dir->ef = ef; + dir->ei = ei; dir->tr = tr; dir->ref_count = 1; dir->nr_events = 1; dir->subsystem = system; file->system = dir; - /* the ftrace system is special, do not create enable or filter files */ - if (strcmp(name, "ftrace") != 0) { - - res = eventfs_add_file("filter", TRACE_MODE_WRITE, - dir->ef, dir, - &ftrace_subsystem_filter_fops); - if (res) { - kfree(system->filter); - system->filter = NULL; - pr_warn("Could not create tracefs '%s/filter' entry\n", name); - } - - eventfs_add_file("enable", TRACE_MODE_WRITE, dir->ef, dir, - &ftrace_system_enable_fops); - } - list_add(&dir->list, &tr->systems); - return dir->ef; + return dir->ei; out_free: kfree(dir); @@ -2402,15 +2418,134 @@ event_define_fields(struct trace_event_call *call) return ret; } +static int event_callback(const char *name, umode_t *mode, void **data, + const struct file_operations **fops) +{ + struct trace_event_file *file = *data; + struct trace_event_call *call = file->event_call; + + if (strcmp(name, "format") == 0) { + *mode = TRACE_MODE_READ; + *fops = &ftrace_event_format_fops; + *data = call; + return 1; + } + + /* + * Only event directories that can be enabled should have + * triggers or filters, with the exception of the "print" + * event that can have a "trigger" file. + */ + if (!(call->flags & TRACE_EVENT_FL_IGNORE_ENABLE)) { + if (call->class->reg && strcmp(name, "enable") == 0) { + *mode = TRACE_MODE_WRITE; + *fops = &ftrace_enable_fops; + return 1; + } + + if (strcmp(name, "filter") == 0) { + *mode = TRACE_MODE_WRITE; + *fops = &ftrace_event_filter_fops; + return 1; + } + } + + if (!(call->flags & TRACE_EVENT_FL_IGNORE_ENABLE) || + strcmp(trace_event_name(call), "print") == 0) { + if (strcmp(name, "trigger") == 0) { + *mode = TRACE_MODE_WRITE; + *fops = &event_trigger_fops; + return 1; + } + } + +#ifdef CONFIG_PERF_EVENTS + if (call->event.type && call->class->reg && + strcmp(name, "id") == 0) { + *mode = TRACE_MODE_READ; + *data = (void *)(long)call->event.type; + *fops = &ftrace_event_id_fops; + return 1; + } +#endif + +#ifdef CONFIG_HIST_TRIGGERS + if (strcmp(name, "hist") == 0) { + *mode = TRACE_MODE_READ; + *fops = &event_hist_fops; + return 1; + } +#endif +#ifdef CONFIG_HIST_TRIGGERS_DEBUG + if (strcmp(name, "hist_debug") == 0) { + *mode = TRACE_MODE_READ; + *fops = &event_hist_debug_fops; + return 1; + } +#endif +#ifdef CONFIG_TRACE_EVENT_INJECT + if (call->event.type && call->class->reg && + strcmp(name, "inject") == 0) { + *mode = 0200; + *fops = &event_inject_fops; + return 1; + } +#endif + return 0; +} + static int -event_create_dir(struct dentry *parent, struct trace_event_file *file) +event_create_dir(struct eventfs_inode *parent, struct trace_event_file *file) { struct trace_event_call *call = file->event_call; - struct eventfs_file *ef_subsystem = NULL; struct trace_array *tr = file->tr; - struct eventfs_file *ef; + struct eventfs_inode *e_events; + struct eventfs_inode *ei; const char *name; + int nr_entries; int ret; + static struct eventfs_entry event_entries[] = { + { + .name = "enable", + .callback = event_callback, + }, + { + .name = "filter", + .callback = event_callback, + }, + { + .name = "trigger", + .callback = event_callback, + }, + { + .name = "format", + .callback = event_callback, + }, +#ifdef CONFIG_PERF_EVENTS + { + .name = "id", + .callback = event_callback, + }, +#endif +#ifdef CONFIG_HIST_TRIGGERS + { + .name = "hist", + .callback = event_callback, + }, +#endif +#ifdef CONFIG_HIST_TRIGGERS_DEBUG + { + .name = "hist_debug", + .callback = event_callback, + }, +#endif +#ifdef CONFIG_TRACE_EVENT_INJECT + { + .name = "inject", + .callback = event_callback, + }, +#endif + }; /* * If the trace point header did not define TRACE_SYSTEM @@ -2420,29 +2555,20 @@ event_create_dir(struct dentry *parent, struct trace_event_file *file) if (WARN_ON_ONCE(strcmp(call->class->system, TRACE_SYSTEM) == 0)) return -ENODEV; - ef_subsystem = event_subsystem_dir(tr, call->class->system, file, parent); - if (!ef_subsystem) + e_events = event_subsystem_dir(tr, call->class->system, file, parent); + if (!e_events) return -ENOMEM; + nr_entries = ARRAY_SIZE(event_entries); + name = trace_event_name(call); - ef = eventfs_add_dir(name, ef_subsystem); - if (IS_ERR(ef)) { + ei = eventfs_create_dir(name, e_events, event_entries, nr_entries, file); + if (IS_ERR(ei)) { pr_warn("Could not create tracefs '%s' directory\n", name); return -1; } - file->ef = ef; - - if (call->class->reg && !(call->flags & TRACE_EVENT_FL_IGNORE_ENABLE)) - eventfs_add_file("enable", TRACE_MODE_WRITE, file->ef, file, - &ftrace_enable_fops); - -#ifdef CONFIG_PERF_EVENTS - if (call->event.type && call->class->reg) - eventfs_add_file("id", TRACE_MODE_READ, file->ef, - (void *)(long)call->event.type, - &ftrace_event_id_fops); -#endif + file->ei = ei; ret = event_define_fields(call); if (ret < 0) { @@ -2450,35 +2576,6 @@ event_create_dir(struct dentry *parent, struct trace_event_file *file) return ret; } - /* - * Only event directories that can be enabled should have - * triggers or filters. - */ - if (!(call->flags & TRACE_EVENT_FL_IGNORE_ENABLE)) { - eventfs_add_file("filter", TRACE_MODE_WRITE, file->ef, - file, &ftrace_event_filter_fops); - - eventfs_add_file("trigger", TRACE_MODE_WRITE, file->ef, - file, &event_trigger_fops); - } - -#ifdef CONFIG_HIST_TRIGGERS - eventfs_add_file("hist", TRACE_MODE_READ, file->ef, file, - &event_hist_fops); -#endif -#ifdef CONFIG_HIST_TRIGGERS_DEBUG - eventfs_add_file("hist_debug", TRACE_MODE_READ, file->ef, file, - &event_hist_debug_fops); -#endif - eventfs_add_file("format", TRACE_MODE_READ, file->ef, call, - &ftrace_event_format_fops); - -#ifdef CONFIG_TRACE_EVENT_INJECT - if (call->event.type && call->class->reg) - eventfs_add_file("inject", 0200, file->ef, file, - &event_inject_fops); -#endif - return 0; } @@ -3623,30 +3720,65 @@ static __init int setup_trace_event(char *str) } __setup("trace_event=", setup_trace_event); +static int events_callback(const char *name, umode_t *mode, void **data, + const struct file_operations **fops) +{ + if (strcmp(name, "enable") == 0) { + *mode = TRACE_MODE_WRITE; + *fops = &ftrace_tr_enable_fops; + return 1; + } + + if (strcmp(name, "header_page") == 0) + *data = ring_buffer_print_page_header; + + else if (strcmp(name, "header_event") == 0) + *data = ring_buffer_print_entry_header; + + else + return 0; + + *mode = TRACE_MODE_READ; + *fops = &ftrace_show_header_fops; + return 1; +} + /* Expects to have event_mutex held when called */ static int create_event_toplevel_files(struct dentry *parent, struct trace_array *tr) { - struct dentry *d_events; + struct eventfs_inode *e_events; struct dentry *entry; - int error = 0; + int nr_entries; + static struct eventfs_entry events_entries[] = { + { + .name = "enable", + .callback = events_callback, + }, + { + .name = "header_page", + .callback = events_callback, + }, + { + .name = "header_event", + .callback = events_callback, + }, + }; entry = trace_create_file("set_event", TRACE_MODE_WRITE, parent, tr, &ftrace_set_event_fops); if (!entry) return -ENOMEM; - d_events = eventfs_create_events_dir("events", parent); - if (IS_ERR(d_events)) { + nr_entries = ARRAY_SIZE(events_entries); + + e_events = eventfs_create_events_dir("events", parent, events_entries, + nr_entries, tr); + if (IS_ERR(e_events)) { pr_warn("Could not create tracefs 'events' directory\n"); return -ENOMEM; } - error = eventfs_add_events_file("enable", TRACE_MODE_WRITE, d_events, - tr, &ftrace_tr_enable_fops); - if (error) - return -ENOMEM; - /* There are not as crucial, just warn if they are not created */ trace_create_file("set_event_pid", TRACE_MODE_WRITE, parent, @@ -3656,16 +3788,7 @@ create_event_toplevel_files(struct dentry *parent, struct trace_array *tr) TRACE_MODE_WRITE, parent, tr, &ftrace_set_event_notrace_pid_fops); - /* ring buffer internal formats */ - eventfs_add_events_file("header_page", TRACE_MODE_READ, d_events, - ring_buffer_print_page_header, - &ftrace_show_header_fops); - - eventfs_add_events_file("header_event", TRACE_MODE_READ, d_events, - ring_buffer_print_entry_header, - &ftrace_show_header_fops); - - tr->event_dir = d_events; + tr->event_dir = e_events; return 0; } @@ -3749,7 +3872,7 @@ int event_trace_del_tracer(struct trace_array *tr) down_write(&trace_event_sem); __trace_remove_event_dirs(tr); - eventfs_remove_events_dir(tr->event_dir); + eventfs_remove_dir(tr->event_dir); up_write(&trace_event_sem); tr->event_dir = NULL; -- cgit v1.2.3 From d7e9a9037de27b642d5a3edef7c69e2a2b460287 Mon Sep 17 00:00:00 2001 From: Daniel Rosenberg Date: Mon, 2 Oct 2023 16:09:35 -0700 Subject: f2fs: Support Block Size == Page Size This allows f2fs to support cases where the block size = page size for both 4K and 16K block sizes. Other sizes should work as well, should the need arise. This does not currently support 4K Block size filesystems if the page size is 16K. Signed-off-by: Daniel Rosenberg Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 2 +- fs/f2fs/inode.c | 2 +- fs/f2fs/node.c | 2 +- fs/f2fs/super.c | 4 +-- include/linux/f2fs_fs.h | 69 +++++++++++++++++++++++++++++-------------------- 5 files changed, 46 insertions(+), 33 deletions(-) (limited to 'include/linux') diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 96342aba8022..4e42b5f24deb 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -4076,7 +4076,7 @@ next: sis->highest_bit = cur_lblock - 1; out: if (not_aligned) - f2fs_warn(sbi, "Swapfile (%u) is not align to section: 1) creat(), 2) ioctl(F2FS_IOC_SET_PIN_FILE), 3) fallocate(%u * N)", + f2fs_warn(sbi, "Swapfile (%u) is not align to section: 1) creat(), 2) ioctl(F2FS_IOC_SET_PIN_FILE), 3) fallocate(%lu * N)", not_aligned, blks_per_sec * F2FS_BLKSIZE); return ret; } diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index cde243840abd..cafb81588359 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -315,7 +315,7 @@ static bool sanity_check_inode(struct inode *inode, struct page *node_page) f2fs_has_inline_xattr(inode) && (!fi->i_inline_xattr_size || fi->i_inline_xattr_size > MAX_INLINE_XATTR_SIZE)) { - f2fs_warn(sbi, "%s: inode (ino=%lx) has corrupted i_inline_xattr_size: %d, max: %zu", + f2fs_warn(sbi, "%s: inode (ino=%lx) has corrupted i_inline_xattr_size: %d, max: %lu", __func__, inode->i_ino, fi->i_inline_xattr_size, MAX_INLINE_XATTR_SIZE); return false; diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index ee2e1dd64f25..a2b2c6c7f66d 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -633,7 +633,7 @@ static void f2fs_ra_node_pages(struct page *parent, int start, int n) /* Then, try readahead for siblings of the desired node */ end = start + n; - end = min(end, NIDS_PER_BLOCK); + end = min(end, (int)NIDS_PER_BLOCK); for (i = start; i < end; i++) { nid = get_nid(parent, i, false); f2fs_ra_node_page(sbi, nid); diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index bc303a052215..d2eafb56af81 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -3502,7 +3502,7 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi, return -EFSCORRUPTED; } - /* Currently, support 512/1024/2048/4096 bytes sector size */ + /* Currently, support 512/1024/2048/4096/16K bytes sector size */ if (le32_to_cpu(raw_super->log_sectorsize) > F2FS_MAX_LOG_SECTOR_SIZE || le32_to_cpu(raw_super->log_sectorsize) < @@ -4948,7 +4948,7 @@ static int __init init_f2fs_fs(void) int err; if (PAGE_SIZE != F2FS_BLKSIZE) { - printk("F2FS not supported on PAGE_SIZE(%lu) != %d\n", + printk("F2FS not supported on PAGE_SIZE(%lu) != BLOCK_SIZE(%lu)\n", PAGE_SIZE, F2FS_BLKSIZE); return -EINVAL; } diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index a82a4bb6ce68..07ed69c2840d 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -13,10 +13,10 @@ #define F2FS_SUPER_OFFSET 1024 /* byte-size offset */ #define F2FS_MIN_LOG_SECTOR_SIZE 9 /* 9 bits for 512 bytes */ -#define F2FS_MAX_LOG_SECTOR_SIZE 12 /* 12 bits for 4096 bytes */ -#define F2FS_LOG_SECTORS_PER_BLOCK 3 /* log number for sector/blk */ -#define F2FS_BLKSIZE 4096 /* support only 4KB block */ -#define F2FS_BLKSIZE_BITS 12 /* bits for F2FS_BLKSIZE */ +#define F2FS_MAX_LOG_SECTOR_SIZE PAGE_SHIFT /* Max is Block Size */ +#define F2FS_LOG_SECTORS_PER_BLOCK (PAGE_SHIFT - 9) /* log number for sector/blk */ +#define F2FS_BLKSIZE PAGE_SIZE /* support only block == page */ +#define F2FS_BLKSIZE_BITS PAGE_SHIFT /* bits for F2FS_BLKSIZE */ #define F2FS_MAX_EXTENSION 64 /* # of extension entries */ #define F2FS_EXTENSION_LEN 8 /* max size of extension */ #define F2FS_BLK_ALIGN(x) (((x) + F2FS_BLKSIZE - 1) >> F2FS_BLKSIZE_BITS) @@ -210,14 +210,14 @@ struct f2fs_checkpoint { unsigned char sit_nat_version_bitmap[]; } __packed; -#define CP_CHKSUM_OFFSET 4092 /* default chksum offset in checkpoint */ +#define CP_CHKSUM_OFFSET (F2FS_BLKSIZE - sizeof(__le32)) /* default chksum offset in checkpoint */ #define CP_MIN_CHKSUM_OFFSET \ (offsetof(struct f2fs_checkpoint, sit_nat_version_bitmap)) /* * For orphan inode management */ -#define F2FS_ORPHANS_PER_BLOCK 1020 +#define F2FS_ORPHANS_PER_BLOCK ((F2FS_BLKSIZE - 4 * sizeof(__le32)) / sizeof(__le32)) #define GET_ORPHAN_BLOCKS(n) (((n) + F2FS_ORPHANS_PER_BLOCK - 1) / \ F2FS_ORPHANS_PER_BLOCK) @@ -243,14 +243,31 @@ struct f2fs_extent { #define F2FS_NAME_LEN 255 /* 200 bytes for inline xattrs by default */ #define DEFAULT_INLINE_XATTR_ADDRS 50 -#define DEF_ADDRS_PER_INODE 923 /* Address Pointers in an Inode */ + +#define OFFSET_OF_END_OF_I_EXT 360 +#define SIZE_OF_I_NID 20 + +struct node_footer { + __le32 nid; /* node id */ + __le32 ino; /* inode number */ + __le32 flag; /* include cold/fsync/dentry marks and offset */ + __le64 cp_ver; /* checkpoint version */ + __le32 next_blkaddr; /* next node page block address */ +} __packed; + +/* Address Pointers in an Inode */ +#define DEF_ADDRS_PER_INODE ((F2FS_BLKSIZE - OFFSET_OF_END_OF_I_EXT \ + - SIZE_OF_I_NID \ + - sizeof(struct node_footer)) / sizeof(__le32)) #define CUR_ADDRS_PER_INODE(inode) (DEF_ADDRS_PER_INODE - \ get_extra_isize(inode)) #define DEF_NIDS_PER_INODE 5 /* Node IDs in an Inode */ #define ADDRS_PER_INODE(inode) addrs_per_inode(inode) -#define DEF_ADDRS_PER_BLOCK 1018 /* Address Pointers in a Direct Block */ +/* Address Pointers in a Direct Block */ +#define DEF_ADDRS_PER_BLOCK ((F2FS_BLKSIZE - sizeof(struct node_footer)) / sizeof(__le32)) #define ADDRS_PER_BLOCK(inode) addrs_per_block(inode) -#define NIDS_PER_BLOCK 1018 /* Node IDs in an Indirect Block */ +/* Node IDs in an Indirect Block */ +#define NIDS_PER_BLOCK ((F2FS_BLKSIZE - sizeof(struct node_footer)) / sizeof(__le32)) #define ADDRS_PER_PAGE(page, inode) \ (IS_INODE(page) ? ADDRS_PER_INODE(inode) : ADDRS_PER_BLOCK(inode)) @@ -342,14 +359,6 @@ enum { #define OFFSET_BIT_MASK GENMASK(OFFSET_BIT_SHIFT - 1, 0) -struct node_footer { - __le32 nid; /* node id */ - __le32 ino; /* inode number */ - __le32 flag; /* include cold/fsync/dentry marks and offset */ - __le64 cp_ver; /* checkpoint version */ - __le32 next_blkaddr; /* next node page block address */ -} __packed; - struct f2fs_node { /* can be one of three types: inode, direct, and indirect types */ union { @@ -363,7 +372,7 @@ struct f2fs_node { /* * For NAT entries */ -#define NAT_ENTRY_PER_BLOCK (PAGE_SIZE / sizeof(struct f2fs_nat_entry)) +#define NAT_ENTRY_PER_BLOCK (F2FS_BLKSIZE / sizeof(struct f2fs_nat_entry)) struct f2fs_nat_entry { __u8 version; /* latest version of cached nat entry */ @@ -378,12 +387,13 @@ struct f2fs_nat_block { /* * For SIT entries * - * Each segment is 2MB in size by default so that a bitmap for validity of - * there-in blocks should occupy 64 bytes, 512 bits. + * A validity bitmap of 64 bytes covers 512 blocks of area. For a 4K page size, + * this results in a segment size of 2MB. For 16k pages, the default segment size + * is 8MB. * Not allow to change this. */ #define SIT_VBLOCK_MAP_SIZE 64 -#define SIT_ENTRY_PER_BLOCK (PAGE_SIZE / sizeof(struct f2fs_sit_entry)) +#define SIT_ENTRY_PER_BLOCK (F2FS_BLKSIZE / sizeof(struct f2fs_sit_entry)) /* * F2FS uses 4 bytes to represent block address. As a result, supported size of @@ -418,7 +428,7 @@ struct f2fs_sit_block { * For segment summary * * One summary block contains exactly 512 summary entries, which represents - * exactly 2MB segment by default. Not allow to change the basic units. + * exactly one segment by default. Not allow to change the basic units. * * NOTE: For initializing fields, you must use set_summary * @@ -429,12 +439,12 @@ struct f2fs_sit_block { * from node's page's beginning to get a data block address. * ex) data_blkaddr = (block_t)(nodepage_start_address + ofs_in_node) */ -#define ENTRIES_IN_SUM 512 +#define ENTRIES_IN_SUM (F2FS_BLKSIZE / 8) #define SUMMARY_SIZE (7) /* sizeof(struct summary) */ #define SUM_FOOTER_SIZE (5) /* sizeof(struct summary_footer) */ #define SUM_ENTRY_SIZE (SUMMARY_SIZE * ENTRIES_IN_SUM) -/* a summary entry for a 4KB-sized block in a segment */ +/* a summary entry for a block in a segment */ struct f2fs_summary { __le32 nid; /* parent node id */ union { @@ -518,7 +528,7 @@ struct f2fs_journal { }; } __packed; -/* 4KB-sized summary block structure */ +/* Block-sized summary block structure */ struct f2fs_summary_block { struct f2fs_summary entries[ENTRIES_IN_SUM]; struct f2fs_journal journal; @@ -559,11 +569,14 @@ typedef __le32 f2fs_hash_t; * Note: there are more reserved space in inline dentry than in regular * dentry, when converting inline dentry we should handle this carefully. */ -#define NR_DENTRY_IN_BLOCK 214 /* the number of dentry in a block */ + +/* the number of dentry in a block */ +#define NR_DENTRY_IN_BLOCK ((BITS_PER_BYTE * F2FS_BLKSIZE) / \ + ((SIZE_OF_DIR_ENTRY + F2FS_SLOT_LEN) * BITS_PER_BYTE + 1)) #define SIZE_OF_DIR_ENTRY 11 /* by byte */ #define SIZE_OF_DENTRY_BITMAP ((NR_DENTRY_IN_BLOCK + BITS_PER_BYTE - 1) / \ BITS_PER_BYTE) -#define SIZE_OF_RESERVED (PAGE_SIZE - ((SIZE_OF_DIR_ENTRY + \ +#define SIZE_OF_RESERVED (F2FS_BLKSIZE - ((SIZE_OF_DIR_ENTRY + \ F2FS_SLOT_LEN) * \ NR_DENTRY_IN_BLOCK + SIZE_OF_DENTRY_BITMAP)) #define MIN_INLINE_DENTRY_SIZE 40 /* just include '.' and '..' entries */ @@ -576,7 +589,7 @@ struct f2fs_dir_entry { __u8 file_type; /* file type */ } __packed; -/* 4KB-sized directory entry block */ +/* Block-sized directory entry block */ struct f2fs_dentry_block { /* validity bitmap for directory entries in each block */ __u8 dentry_bitmap[SIZE_OF_DENTRY_BITMAP]; -- cgit v1.2.3 From 1cf56299f9bc7d4b8e1e39af08f01d6380e28173 Mon Sep 17 00:00:00 2001 From: Randy Li Date: Fri, 15 Sep 2023 01:23:23 +0800 Subject: USB: dma: remove unused function prototype usb_buffer_map_sg() and usb_buffer_unmap_sg() have no definition since the beginning of v5.4. The rest are gone from 2.6.12. Signed-off-by: Randy Li Link: https://lore.kernel.org/r/20230914172336.18761-2-ayaka@soulik.info Signed-off-by: Greg Kroah-Hartman --- include/linux/usb.h | 16 ---------------- 1 file changed, 16 deletions(-) (limited to 'include/linux') diff --git a/include/linux/usb.h b/include/linux/usb.h index a21074861f91..8c61643acd49 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -1823,22 +1823,6 @@ void *usb_alloc_coherent(struct usb_device *dev, size_t size, void usb_free_coherent(struct usb_device *dev, size_t size, void *addr, dma_addr_t dma); -#if 0 -struct urb *usb_buffer_map(struct urb *urb); -void usb_buffer_dmasync(struct urb *urb); -void usb_buffer_unmap(struct urb *urb); -#endif - -struct scatterlist; -int usb_buffer_map_sg(const struct usb_device *dev, int is_in, - struct scatterlist *sg, int nents); -#if 0 -void usb_buffer_dmasync_sg(const struct usb_device *dev, int is_in, - struct scatterlist *sg, int n_hw_ents); -#endif -void usb_buffer_unmap_sg(const struct usb_device *dev, int is_in, - struct scatterlist *sg, int n_hw_ents); - /*-------------------------------------------------------------------* * SYNCHRONOUS CALL SUPPORT * *-------------------------------------------------------------------*/ -- cgit v1.2.3 From 0f28ada1fbf0054557cddcdb93ad17f767105208 Mon Sep 17 00:00:00 2001 From: Jorge Sanjuan Garcia Date: Wed, 6 Sep 2023 11:49:26 +0000 Subject: mcb: remove is_added flag from mcb_device struct When calling mcb_bus_add_devices(), both mcb devices and the mcb bus will attempt to attach a device to a driver because they share the same bus_type. This causes an issue when trying to cast the container of the device to mcb_device struct using to_mcb_device(), leading to a wrong cast when the mcb_bus is added. A crash occurs when freing the ida resources as the bus numbering of mcb_bus gets confused with the is_added flag on the mcb_device struct. The only reason for this cast was to keep an is_added flag on the mcb_device struct that does not seem necessary. The function device_attach() handles already bound devices and the mcb subsystem does nothing special with this is_added flag so remove it completely. Fixes: 18d288198099 ("mcb: Correctly initialize the bus's device") Cc: stable Signed-off-by: Jorge Sanjuan Garcia Co-developed-by: Jose Javier Rodriguez Barbarin Signed-off-by: Jose Javier Rodriguez Barbarin Link: https://lore.kernel.org/r/20230906114901.63174-2-JoseJavier.Rodriguez@duagon.com Signed-off-by: Greg Kroah-Hartman --- drivers/mcb/mcb-core.c | 10 +++------- drivers/mcb/mcb-parse.c | 2 -- include/linux/mcb.h | 1 - 3 files changed, 3 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/drivers/mcb/mcb-core.c b/drivers/mcb/mcb-core.c index 978fdfc19a06..0cac5bead84f 100644 --- a/drivers/mcb/mcb-core.c +++ b/drivers/mcb/mcb-core.c @@ -387,17 +387,13 @@ EXPORT_SYMBOL_NS_GPL(mcb_free_dev, MCB); static int __mcb_bus_add_devices(struct device *dev, void *data) { - struct mcb_device *mdev = to_mcb_device(dev); int retval; - if (mdev->is_added) - return 0; - retval = device_attach(dev); - if (retval < 0) + if (retval < 0) { dev_err(dev, "Error adding device (%d)\n", retval); - - mdev->is_added = true; + return retval; + } return 0; } diff --git a/drivers/mcb/mcb-parse.c b/drivers/mcb/mcb-parse.c index 2aef990f379f..656b6b71c768 100644 --- a/drivers/mcb/mcb-parse.c +++ b/drivers/mcb/mcb-parse.c @@ -99,8 +99,6 @@ static int chameleon_parse_gdd(struct mcb_bus *bus, mdev->mem.end = mdev->mem.start + size - 1; mdev->mem.flags = IORESOURCE_MEM; - mdev->is_added = false; - ret = mcb_device_register(bus, mdev); if (ret < 0) goto err; diff --git a/include/linux/mcb.h b/include/linux/mcb.h index 1e5893138afe..0b971b24a804 100644 --- a/include/linux/mcb.h +++ b/include/linux/mcb.h @@ -63,7 +63,6 @@ static inline struct mcb_bus *to_mcb_bus(struct device *dev) struct mcb_device { struct device dev; struct mcb_bus *bus; - bool is_added; struct mcb_driver *driver; u16 id; int inst; -- cgit v1.2.3 From e6814ec3ba1994561db9b1c05a80227d30cc18fa Mon Sep 17 00:00:00 2001 From: Xiu Jianfeng Date: Fri, 21 Jul 2023 09:06:07 +0000 Subject: perf/core: Rename perf_proc_update_handler() -> perf_event_max_sample_rate_handler(), for readability Follow the naming pattern of the other sysctl handlers in perf. Signed-off-by: Xiu Jianfeng Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20230721090607.172002-1-xiujianfeng@huawei.com --- include/linux/perf_event.h | 2 +- kernel/events/core.c | 4 ++-- kernel/sysctl.c | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index e85cd1c0eaf3..f31f962a6445 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -1573,7 +1573,7 @@ extern int sysctl_perf_cpu_time_max_percent; extern void perf_sample_event_took(u64 sample_len_ns); -int perf_proc_update_handler(struct ctl_table *table, int write, +int perf_event_max_sample_rate_handler(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos); int perf_cpu_time_max_percent_handler(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos); diff --git a/kernel/events/core.c b/kernel/events/core.c index 4c72a41f11af..af569196d760 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -449,8 +449,8 @@ static void update_perf_cpu_limits(void) static bool perf_rotate_context(struct perf_cpu_pmu_context *cpc); -int perf_proc_update_handler(struct ctl_table *table, int write, - void *buffer, size_t *lenp, loff_t *ppos) +int perf_event_max_sample_rate_handler(struct ctl_table *table, int write, + void *buffer, size_t *lenp, loff_t *ppos) { int ret; int perf_cpu = sysctl_perf_cpu_time_max_percent; diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 354a2d294f52..2b6585751891 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1983,7 +1983,7 @@ static struct ctl_table kern_table[] = { .data = &sysctl_perf_event_sample_rate, .maxlen = sizeof(sysctl_perf_event_sample_rate), .mode = 0644, - .proc_handler = perf_proc_update_handler, + .proc_handler = perf_event_max_sample_rate_handler, .extra1 = SYSCTL_ONE, }, { -- cgit v1.2.3 From 0cff993e08a7578e2c1df93a95fc5059f447e7ae Mon Sep 17 00:00:00 2001 From: "pangzizhen001@208suo.com" Date: Thu, 20 Jul 2023 23:45:39 +0800 Subject: locking/seqlock: Fix typo in comment s/the the /the [ mingo: Cleaned up the changelog. ] Signed-off-by: Zizhen Pang Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/70293ecd5bb7a1cd370fd4d95c35f936@208suo.com --- include/linux/seqlock.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h index 987a59d977c5..ea7a58258af6 100644 --- a/include/linux/seqlock.h +++ b/include/linux/seqlock.h @@ -864,7 +864,7 @@ static inline unsigned read_seqretry(const seqlock_t *sl, unsigned start) } /* - * For all seqlock_t write side functions, use the the internal + * For all seqlock_t write side functions, use the internal * do_write_seqcount_begin() instead of generic write_seqcount_begin(). * This way, no redundant lockdep_assert_held() checks are added. */ -- cgit v1.2.3 From b83ce9cb4a465b8f9a3fa45561b721a9551f60e3 Mon Sep 17 00:00:00 2001 From: Christian König Date: Fri, 8 Sep 2023 10:27:23 +0200 Subject: dma-buf: add dma_fence_timestamp helper MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When a fence signals there is a very small race window where the timestamp isn't updated yet. sync_file solves this by busy waiting for the timestamp to appear, but on other ocassions didn't handled this correctly. Provide a dma_fence_timestamp() helper function for this and use it in all appropriate cases. Another alternative would be to grab the spinlock when that happens. v2 by teddy: add a wait parameter to wait for the timestamp to show up, in case the accurate timestamp is needed and/or the timestamp is not based on ktime (e.g. hw timestamp) v3 chk: drop the parameter again for unified handling Signed-off-by: Yunxiang Li Signed-off-by: Christian König Fixes: 1774baa64f93 ("drm/scheduler: Change scheduled fence track v2") Reviewed-by: Alex Deucher CC: stable@vger.kernel.org Link: https://patchwork.freedesktop.org/patch/msgid/20230929104725.2358-1-christian.koenig@amd.com --- drivers/dma-buf/dma-fence-unwrap.c | 13 ++++--------- drivers/dma-buf/sync_file.c | 9 +++------ drivers/gpu/drm/scheduler/sched_main.c | 2 +- include/linux/dma-fence.h | 19 +++++++++++++++++++ 4 files changed, 27 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/drivers/dma-buf/dma-fence-unwrap.c b/drivers/dma-buf/dma-fence-unwrap.c index c625bb2b5d56..628af51c81af 100644 --- a/drivers/dma-buf/dma-fence-unwrap.c +++ b/drivers/dma-buf/dma-fence-unwrap.c @@ -76,16 +76,11 @@ struct dma_fence *__dma_fence_unwrap_merge(unsigned int num_fences, dma_fence_unwrap_for_each(tmp, &iter[i], fences[i]) { if (!dma_fence_is_signaled(tmp)) { ++count; - } else if (test_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, - &tmp->flags)) { - if (ktime_after(tmp->timestamp, timestamp)) - timestamp = tmp->timestamp; } else { - /* - * Use the current time if the fence is - * currently signaling. - */ - timestamp = ktime_get(); + ktime_t t = dma_fence_timestamp(tmp); + + if (ktime_after(t, timestamp)) + timestamp = t; } } } diff --git a/drivers/dma-buf/sync_file.c b/drivers/dma-buf/sync_file.c index af57799c86ce..2e9a316c596a 100644 --- a/drivers/dma-buf/sync_file.c +++ b/drivers/dma-buf/sync_file.c @@ -268,13 +268,10 @@ static int sync_fill_fence_info(struct dma_fence *fence, sizeof(info->driver_name)); info->status = dma_fence_get_status(fence); - while (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags) && - !test_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &fence->flags)) - cpu_relax(); info->timestamp_ns = - test_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &fence->flags) ? - ktime_to_ns(fence->timestamp) : - ktime_set(0, 0); + dma_fence_is_signaled(fence) ? + ktime_to_ns(dma_fence_timestamp(fence)) : + ktime_set(0, 0); return info->status; } diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c index 506371c42745..5a3a622fc672 100644 --- a/drivers/gpu/drm/scheduler/sched_main.c +++ b/drivers/gpu/drm/scheduler/sched_main.c @@ -929,7 +929,7 @@ drm_sched_get_cleanup_job(struct drm_gpu_scheduler *sched) if (next) { next->s_fence->scheduled.timestamp = - job->s_fence->finished.timestamp; + dma_fence_timestamp(&job->s_fence->finished); /* start TO timer for next job */ drm_sched_start_timeout(sched); } diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h index 0d678e9a7b24..ebe78bd3d121 100644 --- a/include/linux/dma-fence.h +++ b/include/linux/dma-fence.h @@ -568,6 +568,25 @@ static inline void dma_fence_set_error(struct dma_fence *fence, fence->error = error; } +/** + * dma_fence_timestamp - helper to get the completion timestamp of a fence + * @fence: fence to get the timestamp from. + * + * After a fence is signaled the timestamp is updated with the signaling time, + * but setting the timestamp can race with tasks waiting for the signaling. This + * helper busy waits for the correct timestamp to appear. + */ +static inline ktime_t dma_fence_timestamp(struct dma_fence *fence) +{ + if (WARN_ON(!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))) + return ktime_get(); + + while (!test_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &fence->flags)) + cpu_relax(); + + return fence->timestamp; +} + signed long dma_fence_wait_timeout(struct dma_fence *, bool intr, signed long timeout); signed long dma_fence_wait_any_timeout(struct dma_fence **fences, -- cgit v1.2.3 From a083c755e136844a934bc9b4416cd23b5c19c617 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Fri, 8 Sep 2023 22:58:40 +0900 Subject: devres: rename the first parameter of devm_add_action(_or_reset) The first parameter of devm_add_action(_or_reset) is a device. The name 'release' is confusing because it is often used for dr_release_t in the devres context. Rename it to 'dev'. No functional change intended. Signed-off-by: Masahiro Yamada Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20230908135840.2362708-1-masahiroy@kernel.org Signed-off-by: Greg Kroah-Hartman --- include/linux/device.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/device.h b/include/linux/device.h index 56d93a1ffb7b..d7a72a8749ea 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -389,8 +389,8 @@ void devm_remove_action(struct device *dev, void (*action)(void *), void *data); void devm_release_action(struct device *dev, void (*action)(void *), void *data); int __devm_add_action(struct device *dev, void (*action)(void *), void *data, const char *name); -#define devm_add_action(release, action, data) \ - __devm_add_action(release, action, data, #action) +#define devm_add_action(dev, action, data) \ + __devm_add_action(dev, action, data, #action) static inline int __devm_add_action_or_reset(struct device *dev, void (*action)(void *), void *data, const char *name) @@ -403,8 +403,8 @@ static inline int __devm_add_action_or_reset(struct device *dev, void (*action)( return ret; } -#define devm_add_action_or_reset(release, action, data) \ - __devm_add_action_or_reset(release, action, data, #action) +#define devm_add_action_or_reset(dev, action, data) \ + __devm_add_action_or_reset(dev, action, data, #action) /** * devm_alloc_percpu - Resource-managed alloc_percpu -- cgit v1.2.3 From 5831fc1fd4a578232fea708b82de0c666ed17153 Mon Sep 17 00:00:00 2001 From: Longfang Liu Date: Thu, 28 Sep 2023 16:57:22 +0800 Subject: crypto: hisilicon/qm - fix PF queue parameter issue If the queue isolation feature is enabled, the number of queues supported by the device changes. When PF is enabled using the current default number of queues, the default number of queues may be greater than the number supported by the device. As a result, the PF fails to be bound to the driver. After modification, if queue isolation feature is enabled, when the default queue parameter is greater than the number supported by the device, the number of enabled queues will be changed to the number supported by the device, so that the PF and driver can be properly bound. Fixes: 8bbecfb402f7 ("crypto: hisilicon/qm - add queue isolation support for Kunpeng930") Signed-off-by: Longfang Liu Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/hpre/hpre_main.c | 5 +++++ drivers/crypto/hisilicon/qm.c | 18 ++++++++++++------ drivers/crypto/hisilicon/qm_common.h | 1 - drivers/crypto/hisilicon/sec2/sec_main.c | 5 +++++ drivers/crypto/hisilicon/zip/zip_main.c | 5 +++++ include/linux/hisi_acc_qm.h | 7 +++++++ 6 files changed, 34 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/drivers/crypto/hisilicon/hpre/hpre_main.c b/drivers/crypto/hisilicon/hpre/hpre_main.c index db44d889438a..3dce35debf63 100644 --- a/drivers/crypto/hisilicon/hpre/hpre_main.c +++ b/drivers/crypto/hisilicon/hpre/hpre_main.c @@ -433,8 +433,11 @@ static u32 uacce_mode = UACCE_MODE_NOUACCE; module_param_cb(uacce_mode, &hpre_uacce_mode_ops, &uacce_mode, 0444); MODULE_PARM_DESC(uacce_mode, UACCE_MODE_DESC); +static bool pf_q_num_flag; static int pf_q_num_set(const char *val, const struct kernel_param *kp) { + pf_q_num_flag = true; + return q_num_set(val, kp, PCI_DEVICE_ID_HUAWEI_HPRE_PF); } @@ -1157,6 +1160,8 @@ static int hpre_qm_init(struct hisi_qm *qm, struct pci_dev *pdev) qm->qp_num = pf_q_num; qm->debug.curr_qm_qp_num = pf_q_num; qm->qm_list = &hpre_devices; + if (pf_q_num_flag) + set_bit(QM_MODULE_PARAM, &qm->misc_ctl); } ret = hisi_qm_init(qm); diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c index a99fd589445c..1638c0a7df31 100644 --- a/drivers/crypto/hisilicon/qm.c +++ b/drivers/crypto/hisilicon/qm.c @@ -206,8 +206,6 @@ #define WAIT_PERIOD 20 #define REMOVE_WAIT_DELAY 10 -#define QM_DRIVER_REMOVING 0 -#define QM_RST_SCHED 1 #define QM_QOS_PARAM_NUM 2 #define QM_QOS_MAX_VAL 1000 #define QM_QOS_RATE 100 @@ -2824,7 +2822,6 @@ static void hisi_qm_pre_init(struct hisi_qm *qm) mutex_init(&qm->mailbox_lock); init_rwsem(&qm->qps_lock); qm->qp_in_used = 0; - qm->misc_ctl = false; if (test_bit(QM_SUPPORT_RPM, &qm->caps)) { if (!acpi_device_power_manageable(ACPI_COMPANION(&pdev->dev))) dev_info(&pdev->dev, "_PS0 and _PR0 are not defined"); @@ -5093,6 +5090,7 @@ free_eq_irq: static int qm_get_qp_num(struct hisi_qm *qm) { + struct device *dev = &qm->pdev->dev; bool is_db_isolation; /* VF's qp_num assigned by PF in v2, and VF can get qp_num by vft. */ @@ -5109,13 +5107,21 @@ static int qm_get_qp_num(struct hisi_qm *qm) qm->max_qp_num = hisi_qm_get_hw_info(qm, qm_basic_info, QM_FUNC_MAX_QP_CAP, is_db_isolation); - /* check if qp number is valid */ - if (qm->qp_num > qm->max_qp_num) { - dev_err(&qm->pdev->dev, "qp num(%u) is more than max qp num(%u)!\n", + if (qm->qp_num <= qm->max_qp_num) + return 0; + + if (test_bit(QM_MODULE_PARAM, &qm->misc_ctl)) { + /* Check whether the set qp number is valid */ + dev_err(dev, "qp num(%u) is more than max qp num(%u)!\n", qm->qp_num, qm->max_qp_num); return -EINVAL; } + dev_info(dev, "Default qp num(%u) is too big, reset it to Function's max qp num(%u)!\n", + qm->qp_num, qm->max_qp_num); + qm->qp_num = qm->max_qp_num; + qm->debug.curr_qm_qp_num = qm->qp_num; + return 0; } diff --git a/drivers/crypto/hisilicon/qm_common.h b/drivers/crypto/hisilicon/qm_common.h index 1406a422d455..8e36aa9c681b 100644 --- a/drivers/crypto/hisilicon/qm_common.h +++ b/drivers/crypto/hisilicon/qm_common.h @@ -4,7 +4,6 @@ #define QM_COMMON_H #define QM_DBG_READ_LEN 256 -#define QM_RESETTING 2 struct qm_cqe { __le32 rsvd0; diff --git a/drivers/crypto/hisilicon/sec2/sec_main.c b/drivers/crypto/hisilicon/sec2/sec_main.c index 77f9f131b850..62bd8936a915 100644 --- a/drivers/crypto/hisilicon/sec2/sec_main.c +++ b/drivers/crypto/hisilicon/sec2/sec_main.c @@ -311,8 +311,11 @@ static int sec_diff_regs_show(struct seq_file *s, void *unused) } DEFINE_SHOW_ATTRIBUTE(sec_diff_regs); +static bool pf_q_num_flag; static int sec_pf_q_num_set(const char *val, const struct kernel_param *kp) { + pf_q_num_flag = true; + return q_num_set(val, kp, PCI_DEVICE_ID_HUAWEI_SEC_PF); } @@ -1120,6 +1123,8 @@ static int sec_qm_init(struct hisi_qm *qm, struct pci_dev *pdev) qm->qp_num = pf_q_num; qm->debug.curr_qm_qp_num = pf_q_num; qm->qm_list = &sec_devices; + if (pf_q_num_flag) + set_bit(QM_MODULE_PARAM, &qm->misc_ctl); } else if (qm->fun_type == QM_HW_VF && qm->ver == QM_HW_V1) { /* * have no way to get qm configure in VM in v1 hardware, diff --git a/drivers/crypto/hisilicon/zip/zip_main.c b/drivers/crypto/hisilicon/zip/zip_main.c index 0d5d1ee363e4..945ab3648a87 100644 --- a/drivers/crypto/hisilicon/zip/zip_main.c +++ b/drivers/crypto/hisilicon/zip/zip_main.c @@ -364,8 +364,11 @@ static u32 uacce_mode = UACCE_MODE_NOUACCE; module_param_cb(uacce_mode, &zip_uacce_mode_ops, &uacce_mode, 0444); MODULE_PARM_DESC(uacce_mode, UACCE_MODE_DESC); +static bool pf_q_num_flag; static int pf_q_num_set(const char *val, const struct kernel_param *kp) { + pf_q_num_flag = true; + return q_num_set(val, kp, PCI_DEVICE_ID_HUAWEI_ZIP_PF); } @@ -1139,6 +1142,8 @@ static int hisi_zip_qm_init(struct hisi_qm *qm, struct pci_dev *pdev) qm->qp_num = pf_q_num; qm->debug.curr_qm_qp_num = pf_q_num; qm->qm_list = &zip_devices; + if (pf_q_num_flag) + set_bit(QM_MODULE_PARAM, &qm->misc_ctl); } else if (qm->fun_type == QM_HW_VF && qm->ver == QM_HW_V1) { /* * have no way to get qm configure in VM in v1 hardware, diff --git a/include/linux/hisi_acc_qm.h b/include/linux/hisi_acc_qm.h index 39fbfb4be944..9da4f3f1e6d6 100644 --- a/include/linux/hisi_acc_qm.h +++ b/include/linux/hisi_acc_qm.h @@ -144,6 +144,13 @@ enum qm_vf_state { QM_NOT_READY, }; +enum qm_misc_ctl_bits { + QM_DRIVER_REMOVING = 0x0, + QM_RST_SCHED, + QM_RESETTING, + QM_MODULE_PARAM, +}; + enum qm_cap_bits { QM_SUPPORT_DB_ISOLATION = 0x0, QM_SUPPORT_FUNC_QOS, -- cgit v1.2.3 From b42ab1c61a77832040ad42ebf9adf237360e49f7 Mon Sep 17 00:00:00 2001 From: Weili Qian Date: Thu, 28 Sep 2023 17:21:47 +0800 Subject: crypto: hisilicon/qm - check function qp num before alg register When the Kunpeng accelerator executes tasks such as encryption and decryption have minimum requirements on the number of device queues. If the number of queues does not meet the requirement, the process initialization will fail. Therefore, the driver checks the number of queues on the device before registering the algorithm. If the number does not meet the requirements, the driver does not register the algorithm to crypto subsystem, the device is still added to the qm_list. Signed-off-by: Weili Qian Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/hpre/hpre_crypto.c | 25 +++++++++++++-- drivers/crypto/hisilicon/hpre/hpre_main.c | 14 ++++++--- drivers/crypto/hisilicon/qm.c | 47 ++++++++++------------------- drivers/crypto/hisilicon/sec2/sec_crypto.c | 31 +++++++++++++++++-- drivers/crypto/hisilicon/sec2/sec_main.c | 24 ++++++--------- drivers/crypto/hisilicon/zip/zip_crypto.c | 24 ++++++++++++++- drivers/crypto/hisilicon/zip/zip_main.c | 14 ++++++--- include/linux/hisi_acc_qm.h | 18 +++++++++-- 8 files changed, 134 insertions(+), 63 deletions(-) (limited to 'include/linux') diff --git a/drivers/crypto/hisilicon/hpre/hpre_crypto.c b/drivers/crypto/hisilicon/hpre/hpre_crypto.c index 9a1c61be32cc..764532a6ca82 100644 --- a/drivers/crypto/hisilicon/hpre/hpre_crypto.c +++ b/drivers/crypto/hisilicon/hpre/hpre_crypto.c @@ -57,6 +57,9 @@ struct hpre_ctx; #define HPRE_DRV_ECDH_MASK_CAP BIT(2) #define HPRE_DRV_X25519_MASK_CAP BIT(5) +static DEFINE_MUTEX(hpre_algs_lock); +static unsigned int hpre_available_devs; + typedef void (*hpre_cb)(struct hpre_ctx *ctx, void *sqe); struct hpre_rsa_ctx { @@ -2202,11 +2205,17 @@ static void hpre_unregister_x25519(struct hisi_qm *qm) int hpre_algs_register(struct hisi_qm *qm) { - int ret; + int ret = 0; + + mutex_lock(&hpre_algs_lock); + if (hpre_available_devs) { + hpre_available_devs++; + goto unlock; + } ret = hpre_register_rsa(qm); if (ret) - return ret; + goto unlock; ret = hpre_register_dh(qm); if (ret) @@ -2220,6 +2229,9 @@ int hpre_algs_register(struct hisi_qm *qm) if (ret) goto unreg_ecdh; + hpre_available_devs++; + mutex_unlock(&hpre_algs_lock); + return ret; unreg_ecdh: @@ -2228,13 +2240,22 @@ unreg_dh: hpre_unregister_dh(qm); unreg_rsa: hpre_unregister_rsa(qm); +unlock: + mutex_unlock(&hpre_algs_lock); return ret; } void hpre_algs_unregister(struct hisi_qm *qm) { + mutex_lock(&hpre_algs_lock); + if (--hpre_available_devs) + goto unlock; + hpre_unregister_x25519(qm); hpre_unregister_ecdh(qm); hpre_unregister_dh(qm); hpre_unregister_rsa(qm); + +unlock: + mutex_unlock(&hpre_algs_lock); } diff --git a/drivers/crypto/hisilicon/hpre/hpre_main.c b/drivers/crypto/hisilicon/hpre/hpre_main.c index 3dce35debf63..56777099ef69 100644 --- a/drivers/crypto/hisilicon/hpre/hpre_main.c +++ b/drivers/crypto/hisilicon/hpre/hpre_main.c @@ -107,6 +107,7 @@ #define HPRE_VIA_MSI_DSM 1 #define HPRE_SQE_MASK_OFFSET 8 #define HPRE_SQE_MASK_LEN 24 +#define HPRE_CTX_Q_NUM_DEF 1 #define HPRE_DFX_BASE 0x301000 #define HPRE_DFX_COMMON1 0x301400 @@ -1399,10 +1400,11 @@ static int hpre_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (ret) dev_warn(&pdev->dev, "init debugfs fail!\n"); - ret = hisi_qm_alg_register(qm, &hpre_devices); + hisi_qm_add_list(qm, &hpre_devices); + ret = hisi_qm_alg_register(qm, &hpre_devices, HPRE_CTX_Q_NUM_DEF); if (ret < 0) { pci_err(pdev, "fail to register algs to crypto!\n"); - goto err_with_qm_start; + goto err_qm_del_list; } if (qm->uacce) { @@ -1424,9 +1426,10 @@ static int hpre_probe(struct pci_dev *pdev, const struct pci_device_id *id) return 0; err_with_alg_register: - hisi_qm_alg_unregister(qm, &hpre_devices); + hisi_qm_alg_unregister(qm, &hpre_devices, HPRE_CTX_Q_NUM_DEF); -err_with_qm_start: +err_qm_del_list: + hisi_qm_del_list(qm, &hpre_devices); hpre_debugfs_exit(qm); hisi_qm_stop(qm, QM_NORMAL); @@ -1446,7 +1449,8 @@ static void hpre_remove(struct pci_dev *pdev) hisi_qm_pm_uninit(qm); hisi_qm_wait_task_finish(qm, &hpre_devices); - hisi_qm_alg_unregister(qm, &hpre_devices); + hisi_qm_alg_unregister(qm, &hpre_devices, HPRE_CTX_Q_NUM_DEF); + hisi_qm_del_list(qm, &hpre_devices); if (qm->fun_type == QM_HW_PF && qm->vfs_num) hisi_qm_sriov_disable(pdev, true); diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c index 458d1fe42a24..f3b55c044dd3 100644 --- a/drivers/crypto/hisilicon/qm.c +++ b/drivers/crypto/hisilicon/qm.c @@ -4860,63 +4860,48 @@ static void qm_cmd_process(struct work_struct *cmd_process) } /** - * hisi_qm_alg_register() - Register alg to crypto and add qm to qm_list. + * hisi_qm_alg_register() - Register alg to crypto. * @qm: The qm needs add. * @qm_list: The qm list. + * @guard: Guard of qp_num. * - * This function adds qm to qm list, and will register algorithm to - * crypto when the qm list is empty. + * Register algorithm to crypto when the function is satisfy guard. */ -int hisi_qm_alg_register(struct hisi_qm *qm, struct hisi_qm_list *qm_list) +int hisi_qm_alg_register(struct hisi_qm *qm, struct hisi_qm_list *qm_list, int guard) { struct device *dev = &qm->pdev->dev; - int flag = 0; - int ret = 0; - - mutex_lock(&qm_list->lock); - if (list_empty(&qm_list->list)) - flag = 1; - list_add_tail(&qm->list, &qm_list->list); - mutex_unlock(&qm_list->lock); if (qm->ver <= QM_HW_V2 && qm->use_sva) { dev_info(dev, "HW V2 not both use uacce sva mode and hardware crypto algs.\n"); return 0; } - if (flag) { - ret = qm_list->register_to_crypto(qm); - if (ret) { - mutex_lock(&qm_list->lock); - list_del(&qm->list); - mutex_unlock(&qm_list->lock); - } + if (qm->qp_num < guard) { + dev_info(dev, "qp_num is less than task need.\n"); + return 0; } - return ret; + return qm_list->register_to_crypto(qm); } EXPORT_SYMBOL_GPL(hisi_qm_alg_register); /** - * hisi_qm_alg_unregister() - Unregister alg from crypto and delete qm from - * qm list. + * hisi_qm_alg_unregister() - Unregister alg from crypto. * @qm: The qm needs delete. * @qm_list: The qm list. + * @guard: Guard of qp_num. * - * This function deletes qm from qm list, and will unregister algorithm - * from crypto when the qm list is empty. + * Unregister algorithm from crypto when the last function is satisfy guard. */ -void hisi_qm_alg_unregister(struct hisi_qm *qm, struct hisi_qm_list *qm_list) +void hisi_qm_alg_unregister(struct hisi_qm *qm, struct hisi_qm_list *qm_list, int guard) { - mutex_lock(&qm_list->lock); - list_del(&qm->list); - mutex_unlock(&qm_list->lock); - if (qm->ver <= QM_HW_V2 && qm->use_sva) return; - if (list_empty(&qm_list->list)) - qm_list->unregister_from_crypto(qm); + if (qm->qp_num < guard) + return; + + qm_list->unregister_from_crypto(qm); } EXPORT_SYMBOL_GPL(hisi_qm_alg_unregister); diff --git a/drivers/crypto/hisilicon/sec2/sec_crypto.c b/drivers/crypto/hisilicon/sec2/sec_crypto.c index ed77711e809e..6fcabbc87860 100644 --- a/drivers/crypto/hisilicon/sec2/sec_crypto.c +++ b/drivers/crypto/hisilicon/sec2/sec_crypto.c @@ -104,6 +104,9 @@ #define IV_CTR_INIT 0x1 #define IV_BYTE_OFFSET 0x8 +static DEFINE_MUTEX(sec_algs_lock); +static unsigned int sec_available_devs; + struct sec_skcipher { u64 alg_msk; struct skcipher_alg alg; @@ -2545,16 +2548,31 @@ err: int sec_register_to_crypto(struct hisi_qm *qm) { u64 alg_mask = sec_get_alg_bitmap(qm, SEC_DRV_ALG_BITMAP_HIGH, SEC_DRV_ALG_BITMAP_LOW); - int ret; + int ret = 0; + + mutex_lock(&sec_algs_lock); + if (sec_available_devs) { + sec_available_devs++; + goto unlock; + } ret = sec_register_skcipher(alg_mask); if (ret) - return ret; + goto unlock; ret = sec_register_aead(alg_mask); if (ret) - sec_unregister_skcipher(alg_mask, ARRAY_SIZE(sec_skciphers)); + goto unreg_skcipher; + sec_available_devs++; + mutex_unlock(&sec_algs_lock); + + return 0; + +unreg_skcipher: + sec_unregister_skcipher(alg_mask, ARRAY_SIZE(sec_skciphers)); +unlock: + mutex_unlock(&sec_algs_lock); return ret; } @@ -2562,6 +2580,13 @@ void sec_unregister_from_crypto(struct hisi_qm *qm) { u64 alg_mask = sec_get_alg_bitmap(qm, SEC_DRV_ALG_BITMAP_HIGH, SEC_DRV_ALG_BITMAP_LOW); + mutex_lock(&sec_algs_lock); + if (--sec_available_devs) + goto unlock; + sec_unregister_aead(alg_mask, ARRAY_SIZE(sec_aeads)); sec_unregister_skcipher(alg_mask, ARRAY_SIZE(sec_skciphers)); + +unlock: + mutex_unlock(&sec_algs_lock); } diff --git a/drivers/crypto/hisilicon/sec2/sec_main.c b/drivers/crypto/hisilicon/sec2/sec_main.c index 62bd8936a915..0e56a47eb862 100644 --- a/drivers/crypto/hisilicon/sec2/sec_main.c +++ b/drivers/crypto/hisilicon/sec2/sec_main.c @@ -1234,15 +1234,11 @@ static int sec_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (ret) pci_warn(pdev, "Failed to init debugfs!\n"); - if (qm->qp_num >= ctx_q_num) { - ret = hisi_qm_alg_register(qm, &sec_devices); - if (ret < 0) { - pr_err("Failed to register driver to crypto.\n"); - goto err_qm_stop; - } - } else { - pci_warn(qm->pdev, - "Failed to use kernel mode, qp not enough!\n"); + hisi_qm_add_list(qm, &sec_devices); + ret = hisi_qm_alg_register(qm, &sec_devices, ctx_q_num); + if (ret < 0) { + pr_err("Failed to register driver to crypto.\n"); + goto err_qm_del_list; } if (qm->uacce) { @@ -1264,9 +1260,9 @@ static int sec_probe(struct pci_dev *pdev, const struct pci_device_id *id) return 0; err_alg_unregister: - if (qm->qp_num >= ctx_q_num) - hisi_qm_alg_unregister(qm, &sec_devices); -err_qm_stop: + hisi_qm_alg_unregister(qm, &sec_devices, ctx_q_num); +err_qm_del_list: + hisi_qm_del_list(qm, &sec_devices); sec_debugfs_exit(qm); hisi_qm_stop(qm, QM_NORMAL); err_probe_uninit: @@ -1283,8 +1279,8 @@ static void sec_remove(struct pci_dev *pdev) hisi_qm_pm_uninit(qm); hisi_qm_wait_task_finish(qm, &sec_devices); - if (qm->qp_num >= ctx_q_num) - hisi_qm_alg_unregister(qm, &sec_devices); + hisi_qm_alg_unregister(qm, &sec_devices, ctx_q_num); + hisi_qm_del_list(qm, &sec_devices); if (qm->fun_type == QM_HW_PF && qm->vfs_num) hisi_qm_sriov_disable(pdev, true); diff --git a/drivers/crypto/hisilicon/zip/zip_crypto.c b/drivers/crypto/hisilicon/zip/zip_crypto.c index 636ac794ebb7..c650c741a18d 100644 --- a/drivers/crypto/hisilicon/zip/zip_crypto.c +++ b/drivers/crypto/hisilicon/zip/zip_crypto.c @@ -25,6 +25,9 @@ #define HZIP_ALG_DEFLATE GENMASK(5, 4) +static DEFINE_MUTEX(zip_algs_lock); +static unsigned int zip_available_devs; + enum hisi_zip_alg_type { HZIP_ALG_TYPE_COMP = 0, HZIP_ALG_TYPE_DECOMP = 1, @@ -618,10 +621,29 @@ static void hisi_zip_unregister_deflate(struct hisi_qm *qm) int hisi_zip_register_to_crypto(struct hisi_qm *qm) { - return hisi_zip_register_deflate(qm); + int ret = 0; + + mutex_lock(&zip_algs_lock); + if (zip_available_devs++) + goto unlock; + + ret = hisi_zip_register_deflate(qm); + if (ret) + zip_available_devs--; + +unlock: + mutex_unlock(&zip_algs_lock); + return ret; } void hisi_zip_unregister_from_crypto(struct hisi_qm *qm) { + mutex_lock(&zip_algs_lock); + if (--zip_available_devs) + goto unlock; + hisi_zip_unregister_deflate(qm); + +unlock: + mutex_unlock(&zip_algs_lock); } diff --git a/drivers/crypto/hisilicon/zip/zip_main.c b/drivers/crypto/hisilicon/zip/zip_main.c index 945ab3648a87..db4c964cd649 100644 --- a/drivers/crypto/hisilicon/zip/zip_main.c +++ b/drivers/crypto/hisilicon/zip/zip_main.c @@ -66,6 +66,7 @@ #define HZIP_SQE_SIZE 128 #define HZIP_PF_DEF_Q_NUM 64 #define HZIP_PF_DEF_Q_BASE 0 +#define HZIP_CTX_Q_NUM_DEF 2 #define HZIP_SOFT_CTRL_CNT_CLR_CE 0x301000 #define HZIP_SOFT_CTRL_CNT_CLR_CE_BIT BIT(0) @@ -1231,10 +1232,11 @@ static int hisi_zip_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (ret) pci_err(pdev, "failed to init debugfs (%d)!\n", ret); - ret = hisi_qm_alg_register(qm, &zip_devices); + hisi_qm_add_list(qm, &zip_devices); + ret = hisi_qm_alg_register(qm, &zip_devices, HZIP_CTX_Q_NUM_DEF); if (ret < 0) { pci_err(pdev, "failed to register driver to crypto!\n"); - goto err_qm_stop; + goto err_qm_del_list; } if (qm->uacce) { @@ -1256,9 +1258,10 @@ static int hisi_zip_probe(struct pci_dev *pdev, const struct pci_device_id *id) return 0; err_qm_alg_unregister: - hisi_qm_alg_unregister(qm, &zip_devices); + hisi_qm_alg_unregister(qm, &zip_devices, HZIP_CTX_Q_NUM_DEF); -err_qm_stop: +err_qm_del_list: + hisi_qm_del_list(qm, &zip_devices); hisi_zip_debugfs_exit(qm); hisi_qm_stop(qm, QM_NORMAL); @@ -1278,7 +1281,8 @@ static void hisi_zip_remove(struct pci_dev *pdev) hisi_qm_pm_uninit(qm); hisi_qm_wait_task_finish(qm, &zip_devices); - hisi_qm_alg_unregister(qm, &zip_devices); + hisi_qm_alg_unregister(qm, &zip_devices, HZIP_CTX_Q_NUM_DEF); + hisi_qm_del_list(qm, &zip_devices); if (qm->fun_type == QM_HW_PF && qm->vfs_num) hisi_qm_sriov_disable(pdev, true); diff --git a/include/linux/hisi_acc_qm.h b/include/linux/hisi_acc_qm.h index 9da4f3f1e6d6..34c64a02712c 100644 --- a/include/linux/hisi_acc_qm.h +++ b/include/linux/hisi_acc_qm.h @@ -478,6 +478,20 @@ static inline void hisi_qm_init_list(struct hisi_qm_list *qm_list) mutex_init(&qm_list->lock); } +static inline void hisi_qm_add_list(struct hisi_qm *qm, struct hisi_qm_list *qm_list) +{ + mutex_lock(&qm_list->lock); + list_add_tail(&qm->list, &qm_list->list); + mutex_unlock(&qm_list->lock); +} + +static inline void hisi_qm_del_list(struct hisi_qm *qm, struct hisi_qm_list *qm_list) +{ + mutex_lock(&qm_list->lock); + list_del(&qm->list); + mutex_unlock(&qm_list->lock); +} + int hisi_qm_init(struct hisi_qm *qm); void hisi_qm_uninit(struct hisi_qm *qm); int hisi_qm_start(struct hisi_qm *qm); @@ -523,8 +537,8 @@ int hisi_qm_alloc_qps_node(struct hisi_qm_list *qm_list, int qp_num, void hisi_qm_free_qps(struct hisi_qp **qps, int qp_num); void hisi_qm_dev_shutdown(struct pci_dev *pdev); void hisi_qm_wait_task_finish(struct hisi_qm *qm, struct hisi_qm_list *qm_list); -int hisi_qm_alg_register(struct hisi_qm *qm, struct hisi_qm_list *qm_list); -void hisi_qm_alg_unregister(struct hisi_qm *qm, struct hisi_qm_list *qm_list); +int hisi_qm_alg_register(struct hisi_qm *qm, struct hisi_qm_list *qm_list, int guard); +void hisi_qm_alg_unregister(struct hisi_qm *qm, struct hisi_qm_list *qm_list, int guard); int hisi_qm_resume(struct device *dev); int hisi_qm_suspend(struct device *dev); void hisi_qm_pm_uninit(struct hisi_qm *qm); -- cgit v1.2.3 From 8468516f9f93a41dc65158b6428a1a1039c68f20 Mon Sep 17 00:00:00 2001 From: Dimitri John Ledkov Date: Mon, 2 Oct 2023 00:57:15 +0100 Subject: crypto: pkcs7 - remove md4 md5 x.509 support Remove support for md4 md5 hash and signatures in x.509 certificate parsers, pkcs7 signature parser, authenticode parser. All of these are insecure or broken, and everyone has long time ago migrated to alternative hash implementations. Also remove md2 & md3 oids which have already didn't have support. This is also likely the last user of md4 in the kernel, and thus crypto/md4.c and related tests in tcrypt & testmgr can likely be removed. Other users such as cifs smbfs ext modpost sumversions have their own internal implementation as needed. Signed-off-by: Dimitri John Ledkov Reviewed-by: Jarkko Sakkinen Signed-off-by: Herbert Xu --- crypto/asymmetric_keys/mscode_parser.c | 6 ------ crypto/asymmetric_keys/pkcs7_parser.c | 6 ------ crypto/asymmetric_keys/x509_cert_parser.c | 6 ------ include/linux/oid_registry.h | 8 -------- 4 files changed, 26 deletions(-) (limited to 'include/linux') diff --git a/crypto/asymmetric_keys/mscode_parser.c b/crypto/asymmetric_keys/mscode_parser.c index 839591ad21ac..690405ebe77b 100644 --- a/crypto/asymmetric_keys/mscode_parser.c +++ b/crypto/asymmetric_keys/mscode_parser.c @@ -75,12 +75,6 @@ int mscode_note_digest_algo(void *context, size_t hdrlen, oid = look_up_OID(value, vlen); switch (oid) { - case OID_md4: - ctx->digest_algo = "md4"; - break; - case OID_md5: - ctx->digest_algo = "md5"; - break; case OID_sha1: ctx->digest_algo = "sha1"; break; diff --git a/crypto/asymmetric_keys/pkcs7_parser.c b/crypto/asymmetric_keys/pkcs7_parser.c index 277482bb1777..cf4caab9620f 100644 --- a/crypto/asymmetric_keys/pkcs7_parser.c +++ b/crypto/asymmetric_keys/pkcs7_parser.c @@ -227,12 +227,6 @@ int pkcs7_sig_note_digest_algo(void *context, size_t hdrlen, struct pkcs7_parse_context *ctx = context; switch (ctx->last_oid) { - case OID_md4: - ctx->sinfo->sig->hash_algo = "md4"; - break; - case OID_md5: - ctx->sinfo->sig->hash_algo = "md5"; - break; case OID_sha1: ctx->sinfo->sig->hash_algo = "sha1"; break; diff --git a/crypto/asymmetric_keys/x509_cert_parser.c b/crypto/asymmetric_keys/x509_cert_parser.c index 0a7049b470c1..2c30928621b7 100644 --- a/crypto/asymmetric_keys/x509_cert_parser.c +++ b/crypto/asymmetric_keys/x509_cert_parser.c @@ -195,15 +195,9 @@ int x509_note_sig_algo(void *context, size_t hdrlen, unsigned char tag, pr_debug("PubKey Algo: %u\n", ctx->last_oid); switch (ctx->last_oid) { - case OID_md2WithRSAEncryption: - case OID_md3WithRSAEncryption: default: return -ENOPKG; /* Unsupported combination */ - case OID_md4WithRSAEncryption: - ctx->cert->sig->hash_algo = "md4"; - goto rsa_pkcs1; - case OID_sha1WithRSAEncryption: ctx->cert->sig->hash_algo = "sha1"; goto rsa_pkcs1; diff --git a/include/linux/oid_registry.h b/include/linux/oid_registry.h index f86a08ba0207..4d04fa5d1eec 100644 --- a/include/linux/oid_registry.h +++ b/include/linux/oid_registry.h @@ -30,9 +30,6 @@ enum OID { /* PKCS#1 {iso(1) member-body(2) us(840) rsadsi(113549) pkcs(1) pkcs-1(1)} */ OID_rsaEncryption, /* 1.2.840.113549.1.1.1 */ - OID_md2WithRSAEncryption, /* 1.2.840.113549.1.1.2 */ - OID_md3WithRSAEncryption, /* 1.2.840.113549.1.1.3 */ - OID_md4WithRSAEncryption, /* 1.2.840.113549.1.1.4 */ OID_sha1WithRSAEncryption, /* 1.2.840.113549.1.1.5 */ OID_sha256WithRSAEncryption, /* 1.2.840.113549.1.1.11 */ OID_sha384WithRSAEncryption, /* 1.2.840.113549.1.1.12 */ @@ -49,11 +46,6 @@ enum OID { OID_smimeCapabilites, /* 1.2.840.113549.1.9.15 */ OID_smimeAuthenticatedAttrs, /* 1.2.840.113549.1.9.16.2.11 */ - /* {iso(1) member-body(2) us(840) rsadsi(113549) digestAlgorithm(2)} */ - OID_md2, /* 1.2.840.113549.2.2 */ - OID_md4, /* 1.2.840.113549.2.4 */ - OID_md5, /* 1.2.840.113549.2.5 */ - OID_mskrb5, /* 1.2.840.48018.1.2.2 */ OID_krb5, /* 1.2.840.113554.1.2.2 */ OID_krb5u2u, /* 1.2.840.113554.1.2.2.3 */ -- cgit v1.2.3 From 7523d330aac7190f738998a52df8d5aa14293280 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 4 Sep 2023 13:40:46 +0300 Subject: device property: Clarify usage scope of some struct fwnode_handle members Most of the struct fwnode_handle members are for exclusive use with device links framework. Clarify this by adding a respective comment. Signed-off-by: Andy Shevchenko Reviewed-by: Heikki Krogerus Reviewed-by: Sakari Ailus Link: https://lore.kernel.org/r/20230904104046.1682875-1-andriy.shevchenko@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- include/linux/fwnode.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fwnode.h b/include/linux/fwnode.h index 5700451b300f..2a72f55d26eb 100644 --- a/include/linux/fwnode.h +++ b/include/linux/fwnode.h @@ -41,6 +41,8 @@ struct device; struct fwnode_handle { struct fwnode_handle *secondary; const struct fwnode_operations *ops; + + /* The below is used solely by device links, don't use otherwise */ struct device *dev; struct list_head suppliers; struct list_head consumers; -- cgit v1.2.3 From 1dc05a274a7b13fd61b6c43f0136153752e6f731 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 20 Sep 2023 18:38:19 +0300 Subject: device property: Replace custom implementation of COUNT_ARGS() Replace custom and non-portable implementation of COUNT_ARGS(). Fixes: e64b674bc9d7 ("software node: implement reference properties") Reported-by: Nick Desaulniers Closes: https://lore.kernel.org/r/ZQoILN6QCjzosCOs@google.com Signed-off-by: Andy Shevchenko Reviewed-by: Takashi Iwai Closes: https://github.com/ClangBuiltLinux/linux/issues/1935 Reviewed-by: Nick Desaulniers Link: https://lore.kernel.org/r/20230920153819.2069869-1-andriy.shevchenko@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- include/linux/property.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/property.h b/include/linux/property.h index 8c3c6685a2ae..9f2585d705a8 100644 --- a/include/linux/property.h +++ b/include/linux/property.h @@ -10,6 +10,7 @@ #ifndef _LINUX_PROPERTY_H_ #define _LINUX_PROPERTY_H_ +#include #include #include #include @@ -288,7 +289,7 @@ struct software_node_ref_args { #define SOFTWARE_NODE_REFERENCE(_ref_, ...) \ (const struct software_node_ref_args) { \ .node = _ref_, \ - .nargs = ARRAY_SIZE(((u64[]){ 0, ##__VA_ARGS__ })) - 1, \ + .nargs = COUNT_ARGS(__VA_ARGS__), \ .args = { __VA_ARGS__ }, \ } -- cgit v1.2.3 From a56cc0a8338523f709892696cc229527617c1316 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 3 Oct 2023 15:17:24 +0200 Subject: thermal: core: Add function to walk trips under zone lock Add a wrapper around for_each_thermal_trip(), called thermal_zone_for_each_trip(), that will invoke the former under the thermal zone lock and pass its return value to the caller. Two drivers will be modified subsequently to use this new function. No functional impact. Signed-off-by: Rafael J. Wysocki Reviewed-by: Daniel Lezcano --- drivers/thermal/thermal_trip.c | 14 ++++++++++++++ include/linux/thermal.h | 3 +++ 2 files changed, 17 insertions(+) (limited to 'include/linux') diff --git a/drivers/thermal/thermal_trip.c b/drivers/thermal/thermal_trip.c index a8e92a89b2b8..8c649a899537 100644 --- a/drivers/thermal/thermal_trip.c +++ b/drivers/thermal/thermal_trip.c @@ -27,6 +27,20 @@ int for_each_thermal_trip(struct thermal_zone_device *tz, } EXPORT_SYMBOL_GPL(for_each_thermal_trip); +int thermal_zone_for_each_trip(struct thermal_zone_device *tz, + int (*cb)(struct thermal_trip *, void *), + void *data) +{ + int ret; + + mutex_lock(&tz->lock); + ret = for_each_thermal_trip(tz, cb, data); + mutex_unlock(&tz->lock); + + return ret; +} +EXPORT_SYMBOL_GPL(thermal_zone_for_each_trip); + int thermal_zone_get_num_trips(struct thermal_zone_device *tz) { return tz->num_trips; diff --git a/include/linux/thermal.h b/include/linux/thermal.h index 6710a4ace992..2bab72149bbf 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -286,6 +286,9 @@ int thermal_zone_set_trip(struct thermal_zone_device *tz, int trip_id, int for_each_thermal_trip(struct thermal_zone_device *tz, int (*cb)(struct thermal_trip *, void *), void *data); +int thermal_zone_for_each_trip(struct thermal_zone_device *tz, + int (*cb)(struct thermal_trip *, void *), + void *data); int thermal_zone_get_num_trips(struct thermal_zone_device *tz); int thermal_zone_get_crit_temp(struct thermal_zone_device *tz, int *temp); -- cgit v1.2.3 From 4963e34ce7b95237021575d208fa576f88697839 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 3 Oct 2023 15:25:33 +0200 Subject: thermal: core: Drop thermal_zone_device_exec() Because thermal_zone_device_exec() has no users any more and there are no plans to use it anywhere, revert commit 9a99a996d1ec ("thermal: core: Introduce thermal_zone_device_exec()") that introduced it. No functional impact. Signed-off-by: Rafael J. Wysocki Acked-by: Daniel Lezcano --- drivers/thermal/thermal_core.c | 19 ------------------- include/linux/thermal.h | 4 ---- 2 files changed, 23 deletions(-) (limited to 'include/linux') diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c index 45d0aa0b69b7..8ee22eb804d3 100644 --- a/drivers/thermal/thermal_core.c +++ b/drivers/thermal/thermal_core.c @@ -495,25 +495,6 @@ void thermal_zone_device_update(struct thermal_zone_device *tz, } EXPORT_SYMBOL_GPL(thermal_zone_device_update); -/** - * thermal_zone_device_exec - Run a callback under the zone lock. - * @tz: Thermal zone. - * @cb: Callback to run. - * @data: Data to pass to the callback. - */ -void thermal_zone_device_exec(struct thermal_zone_device *tz, - void (*cb)(struct thermal_zone_device *, - unsigned long), - unsigned long data) -{ - mutex_lock(&tz->lock); - - cb(tz, data); - - mutex_unlock(&tz->lock); -} -EXPORT_SYMBOL_GPL(thermal_zone_device_exec); - static void thermal_zone_device_check(struct work_struct *work) { struct thermal_zone_device *tz = container_of(work, struct diff --git a/include/linux/thermal.h b/include/linux/thermal.h index 2bab72149bbf..c8600e313909 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -339,10 +339,6 @@ int thermal_zone_unbind_cooling_device(struct thermal_zone_device *, int, struct thermal_cooling_device *); void thermal_zone_device_update(struct thermal_zone_device *, enum thermal_notify_event); -void thermal_zone_device_exec(struct thermal_zone_device *tz, - void (*cb)(struct thermal_zone_device *, - unsigned long), - unsigned long data); struct thermal_cooling_device *thermal_cooling_device_register(const char *, void *, const struct thermal_cooling_device_ops *); -- cgit v1.2.3 From c62f5032f72a745542aa6a7e777c7819c96adfbe Mon Sep 17 00:00:00 2001 From: Ian Abbott Date: Wed, 13 Sep 2023 18:07:01 +0100 Subject: comedi: comedi_8254: Use a call-back function for register access Rework the comedi_8254 module to use a call-back function for register access. This will make it easier to isolate the parts that will depend on the `CONFIG_HAS_IOPORT` macro being defined and also allows the possibility of supplying an external callback function during initialization by a variant of the `comedi_8254_init()` and `comedi_8254_mm_init()` functions, although that has not been implemented yet. The `struct comedi_8254` members have been changed to use a pointer to a callback function and a context of type `unsigned long`. The `comedi_8254_init()` and `comedi_8254_mm_init()` functions use an internal callback function and set the context to the base address of the registers (for `comedi_8254_mm_init()` that involves converting a `void __iomem *` to `unsigned long`). A minor change to `dio200_subdev_8254_offset()` in the amplc_dio200_common module has been made due to the changes in `struct comedi_8254`. Cc: Arnd Bergmann Cc: Niklas Schnelle Signed-off-by: Ian Abbott Link: https://lore.kernel.org/r/20230913170712.111719-3-abbotti@mev.co.uk Signed-off-by: Greg Kroah-Hartman --- drivers/comedi/drivers/amplc_dio200_common.c | 4 +- drivers/comedi/drivers/comedi_8254.c | 177 +++++++++++++++++++-------- include/linux/comedi/comedi_8254.h | 22 +++- 3 files changed, 144 insertions(+), 59 deletions(-) (limited to 'include/linux') diff --git a/drivers/comedi/drivers/amplc_dio200_common.c b/drivers/comedi/drivers/amplc_dio200_common.c index ff651f2eb86c..2c1507a23f8a 100644 --- a/drivers/comedi/drivers/amplc_dio200_common.c +++ b/drivers/comedi/drivers/amplc_dio200_common.c @@ -149,9 +149,9 @@ static unsigned int dio200_subdev_8254_offset(struct comedi_device *dev, /* get the offset that was passed to comedi_8254_*_init() */ if (dev->mmio) - offset = i8254->mmio - dev->mmio; + offset = (void __iomem *)i8254->context - dev->mmio; else - offset = i8254->iobase - dev->iobase; + offset = i8254->context - dev->iobase; /* remove the shift that was added for PCIe boards */ if (board->is_pcie) diff --git a/drivers/comedi/drivers/comedi_8254.c b/drivers/comedi/drivers/comedi_8254.c index b4185c1b2695..3f8657fc7ee5 100644 --- a/drivers/comedi/drivers/comedi_8254.c +++ b/drivers/comedi/drivers/comedi_8254.c @@ -119,63 +119,101 @@ #include #include -static unsigned int __i8254_read(struct comedi_8254 *i8254, unsigned int reg) +static unsigned int i8254_io8_cb(struct comedi_8254 *i8254, int dir, + unsigned int reg, unsigned int val) { - unsigned int reg_offset = (reg * i8254->iosize) << i8254->regshift; - unsigned int val; + unsigned long iobase = i8254->context; + unsigned int reg_offset = (reg * I8254_IO8) << i8254->regshift; - switch (i8254->iosize) { - default: - case I8254_IO8: - if (i8254->mmio) - val = readb(i8254->mmio + reg_offset); - else - val = inb(i8254->iobase + reg_offset); - break; - case I8254_IO16: - if (i8254->mmio) - val = readw(i8254->mmio + reg_offset); - else - val = inw(i8254->iobase + reg_offset); - break; - case I8254_IO32: - if (i8254->mmio) - val = readl(i8254->mmio + reg_offset); - else - val = inl(i8254->iobase + reg_offset); - break; + if (dir) { + outb(val, iobase + reg_offset); + return 0; + } else { + return inb(iobase + reg_offset); } - return val & 0xff; } -static void __i8254_write(struct comedi_8254 *i8254, - unsigned int val, unsigned int reg) +static unsigned int i8254_io16_cb(struct comedi_8254 *i8254, int dir, + unsigned int reg, unsigned int val) { - unsigned int reg_offset = (reg * i8254->iosize) << i8254->regshift; + unsigned long iobase = i8254->context; + unsigned int reg_offset = (reg * I8254_IO16) << i8254->regshift; - switch (i8254->iosize) { - default: - case I8254_IO8: - if (i8254->mmio) - writeb(val, i8254->mmio + reg_offset); - else - outb(val, i8254->iobase + reg_offset); - break; - case I8254_IO16: - if (i8254->mmio) - writew(val, i8254->mmio + reg_offset); - else - outw(val, i8254->iobase + reg_offset); - break; - case I8254_IO32: - if (i8254->mmio) - writel(val, i8254->mmio + reg_offset); - else - outl(val, i8254->iobase + reg_offset); - break; + if (dir) { + outw(val, iobase + reg_offset); + return 0; + } else { + return inw(iobase + reg_offset); + } +} + +static unsigned int i8254_io32_cb(struct comedi_8254 *i8254, int dir, + unsigned int reg, unsigned int val) +{ + unsigned long iobase = i8254->context; + unsigned int reg_offset = (reg * I8254_IO32) << i8254->regshift; + + if (dir) { + outl(val, iobase + reg_offset); + return 0; + } else { + return inl(iobase + reg_offset); + } +} + +static unsigned int i8254_mmio8_cb(struct comedi_8254 *i8254, int dir, + unsigned int reg, unsigned int val) +{ + void __iomem *mmiobase = (void __iomem *)i8254->context; + unsigned int reg_offset = (reg * I8254_IO8) << i8254->regshift; + + if (dir) { + writeb(val, mmiobase + reg_offset); + return 0; + } else { + return readb(mmiobase + reg_offset); + } +} + +static unsigned int i8254_mmio16_cb(struct comedi_8254 *i8254, int dir, + unsigned int reg, unsigned int val) +{ + void __iomem *mmiobase = (void __iomem *)i8254->context; + unsigned int reg_offset = (reg * I8254_IO16) << i8254->regshift; + + if (dir) { + writew(val, mmiobase + reg_offset); + return 0; + } else { + return readw(mmiobase + reg_offset); } } +static unsigned int i8254_mmio32_cb(struct comedi_8254 *i8254, int dir, + unsigned int reg, unsigned int val) +{ + void __iomem *mmiobase = (void __iomem *)i8254->context; + unsigned int reg_offset = (reg * I8254_IO32) << i8254->regshift; + + if (dir) { + writel(val, mmiobase + reg_offset); + return 0; + } else { + return readl(mmiobase + reg_offset); + } +} + +static unsigned int __i8254_read(struct comedi_8254 *i8254, unsigned int reg) +{ + return 0xff & i8254->iocb(i8254, 0, reg, 0); +} + +static void __i8254_write(struct comedi_8254 *i8254, + unsigned int val, unsigned int reg) +{ + i8254->iocb(i8254, 1, reg, val); +} + /** * comedi_8254_status - return the status of a counter * @i8254: comedi_8254 struct for the timer @@ -571,8 +609,8 @@ void comedi_8254_subdevice_init(struct comedi_subdevice *s, } EXPORT_SYMBOL_GPL(comedi_8254_subdevice_init); -static struct comedi_8254 *__i8254_init(unsigned long iobase, - void __iomem *mmio, +static struct comedi_8254 *__i8254_init(comedi_8254_iocb_fn *iocb, + unsigned long context, unsigned int osc_base, unsigned int iosize, unsigned int regshift) @@ -585,12 +623,15 @@ static struct comedi_8254 *__i8254_init(unsigned long iobase, iosize == I8254_IO32)) return NULL; + if (!iocb) + return NULL; + i8254 = kzalloc(sizeof(*i8254), GFP_KERNEL); if (!i8254) return NULL; - i8254->iobase = iobase; - i8254->mmio = mmio; + i8254->iocb = iocb; + i8254->context = context; i8254->iosize = iosize; i8254->regshift = regshift; @@ -617,7 +658,22 @@ struct comedi_8254 *comedi_8254_init(unsigned long iobase, unsigned int iosize, unsigned int regshift) { - return __i8254_init(iobase, NULL, osc_base, iosize, regshift); + comedi_8254_iocb_fn *iocb; + + switch (iosize) { + case I8254_IO8: + iocb = i8254_io8_cb; + break; + case I8254_IO16: + iocb = i8254_io16_cb; + break; + case I8254_IO32: + iocb = i8254_io32_cb; + break; + default: + return NULL; + } + return __i8254_init(iocb, iobase, osc_base, iosize, regshift); } EXPORT_SYMBOL_GPL(comedi_8254_init); @@ -634,7 +690,22 @@ struct comedi_8254 *comedi_8254_mm_init(void __iomem *mmio, unsigned int iosize, unsigned int regshift) { - return __i8254_init(0, mmio, osc_base, iosize, regshift); + comedi_8254_iocb_fn *iocb; + + switch (iosize) { + case I8254_IO8: + iocb = i8254_mmio8_cb; + break; + case I8254_IO16: + iocb = i8254_mmio16_cb; + break; + case I8254_IO32: + iocb = i8254_mmio32_cb; + break; + default: + return NULL; + } + return __i8254_init(iocb, (unsigned long)mmio, osc_base, iosize, regshift); } EXPORT_SYMBOL_GPL(comedi_8254_mm_init); diff --git a/include/linux/comedi/comedi_8254.h b/include/linux/comedi/comedi_8254.h index d8264417e53c..18d12321c87d 100644 --- a/include/linux/comedi/comedi_8254.h +++ b/include/linux/comedi/comedi_8254.h @@ -57,10 +57,24 @@ struct comedi_subdevice; /* counter maps zero to 0x10000 */ #define I8254_MAX_COUNT 0x10000 +struct comedi_8254; + +/** + * typedef comedi_8254_iocb_fn - call-back function type for 8254 register access + * @i8254: pointer to struct comedi_8254 + * @dir: direction (0 = read, 1 = write) + * @reg: register number + * @val: value to write + * + * Return: Register value when reading, 0 when writing. + */ +typedef unsigned int comedi_8254_iocb_fn(struct comedi_8254 *i8254, int dir, + unsigned int reg, unsigned int val); + /** * struct comedi_8254 - private data used by this module - * @iobase: PIO base address of the registers (in/out) - * @mmio: MMIO base address of the registers (read/write) + * @iocb: I/O call-back function for register access + * @context: context for register access (e.g. a base address) * @iosize: I/O size used to access the registers (b/w/l) * @regshift: register gap shift * @osc_base: cascaded oscillator speed in ns @@ -76,8 +90,8 @@ struct comedi_subdevice; * @insn_config: driver specific (*insn_config) callback */ struct comedi_8254 { - unsigned long iobase; - void __iomem *mmio; + comedi_8254_iocb_fn *iocb; + unsigned long context; unsigned int iosize; unsigned int regshift; unsigned int osc_base; -- cgit v1.2.3 From fade5e5b0b2a2cc3855f64be6407b0bdcd837714 Mon Sep 17 00:00:00 2001 From: Ian Abbott Date: Wed, 13 Sep 2023 18:07:02 +0100 Subject: comedi: comedi_8254: Replace comedi_8254_init() and comedi_8254_mm_init() `comedi_8254_init()` and `comedi_8254_mm_init()` return `NULL` on failure, but the failure is not necessarily due to lack of memory. Change them to return an `ERR_PTR` value on failure and rename the functions to make it obvious the API has changed. `comedi_8254_init()` has been replaced with `comedi_8254_io_alloc()`, and `comedi_8254_mm_init()` has been replaced with `comedi_8254_mm_alloc()`. Cc: Arnd Bergmann Cc: Niklas Schnelle Signed-off-by: Ian Abbott Link: https://lore.kernel.org/r/20230913170712.111719-4-abbotti@mev.co.uk Signed-off-by: Greg Kroah-Hartman --- drivers/comedi/drivers.c | 3 +- drivers/comedi/drivers/adl_pci9111.c | 8 ++-- drivers/comedi/drivers/adl_pci9118.c | 8 ++-- drivers/comedi/drivers/adv_pci1710.c | 8 ++-- drivers/comedi/drivers/adv_pci_dio.c | 10 ++--- drivers/comedi/drivers/aio_aio12_8.c | 8 ++-- drivers/comedi/drivers/amplc_dio200_common.c | 12 +++--- drivers/comedi/drivers/amplc_pci224.c | 8 ++-- drivers/comedi/drivers/amplc_pci230.c | 8 ++-- drivers/comedi/drivers/cb_das16_cs.c | 8 ++-- drivers/comedi/drivers/cb_pcidas.c | 21 ++++++----- drivers/comedi/drivers/cb_pcimdas.c | 10 ++--- drivers/comedi/drivers/comedi_8254.c | 55 ++++++++++++++++------------ drivers/comedi/drivers/das08.c | 9 +++-- drivers/comedi/drivers/das16.c | 8 ++-- drivers/comedi/drivers/das16m1.c | 20 +++++----- drivers/comedi/drivers/das1800.c | 8 ++-- drivers/comedi/drivers/das6402.c | 8 ++-- drivers/comedi/drivers/das800.c | 8 ++-- drivers/comedi/drivers/me4000.c | 6 +-- drivers/comedi/drivers/ni_at_a2150.c | 8 ++-- drivers/comedi/drivers/ni_at_ao.c | 8 ++-- drivers/comedi/drivers/ni_labpc_common.c | 38 ++++++++++--------- drivers/comedi/drivers/pcl711.c | 8 ++-- drivers/comedi/drivers/pcl812.c | 10 ++--- drivers/comedi/drivers/pcl816.c | 8 ++-- drivers/comedi/drivers/pcl818.c | 8 ++-- drivers/comedi/drivers/rtd520.c | 6 +-- include/linux/comedi/comedi_8254.h | 16 ++++---- 29 files changed, 179 insertions(+), 165 deletions(-) (limited to 'include/linux') diff --git a/drivers/comedi/drivers.c b/drivers/comedi/drivers.c index d4e2ed709bfc..376130bfba8a 100644 --- a/drivers/comedi/drivers.c +++ b/drivers/comedi/drivers.c @@ -177,7 +177,8 @@ static void comedi_device_detach_cleanup(struct comedi_device *dev) dev->n_subdevices = 0; } kfree(dev->private); - kfree(dev->pacer); + if (!IS_ERR(dev->pacer)) + kfree(dev->pacer); dev->private = NULL; dev->pacer = NULL; dev->driver = NULL; diff --git a/drivers/comedi/drivers/adl_pci9111.c b/drivers/comedi/drivers/adl_pci9111.c index c50f94272a74..086d93f40cb9 100644 --- a/drivers/comedi/drivers/adl_pci9111.c +++ b/drivers/comedi/drivers/adl_pci9111.c @@ -647,10 +647,10 @@ static int pci9111_auto_attach(struct comedi_device *dev, dev->irq = pcidev->irq; } - dev->pacer = comedi_8254_init(dev->iobase + PCI9111_8254_BASE_REG, - I8254_OSC_BASE_2MHZ, I8254_IO16, 0); - if (!dev->pacer) - return -ENOMEM; + dev->pacer = comedi_8254_io_alloc(dev->iobase + PCI9111_8254_BASE_REG, + I8254_OSC_BASE_2MHZ, I8254_IO16, 0); + if (IS_ERR(dev->pacer)) + return PTR_ERR(dev->pacer); ret = comedi_alloc_subdevices(dev, 4); if (ret) diff --git a/drivers/comedi/drivers/adl_pci9118.c b/drivers/comedi/drivers/adl_pci9118.c index 9a816c718303..a76e2666d583 100644 --- a/drivers/comedi/drivers/adl_pci9118.c +++ b/drivers/comedi/drivers/adl_pci9118.c @@ -1524,10 +1524,10 @@ static int pci9118_common_attach(struct comedi_device *dev, devpriv->iobase_a = pci_resource_start(pcidev, 0); dev->iobase = pci_resource_start(pcidev, 2); - dev->pacer = comedi_8254_init(dev->iobase + PCI9118_TIMER_BASE, - I8254_OSC_BASE_4MHZ, I8254_IO32, 0); - if (!dev->pacer) - return -ENOMEM; + dev->pacer = comedi_8254_io_alloc(dev->iobase + PCI9118_TIMER_BASE, + I8254_OSC_BASE_4MHZ, I8254_IO32, 0); + if (IS_ERR(dev->pacer)) + return PTR_ERR(dev->pacer); pci9118_reset(dev); diff --git a/drivers/comedi/drivers/adv_pci1710.c b/drivers/comedi/drivers/adv_pci1710.c index 4f2639968260..c49b0f1f5228 100644 --- a/drivers/comedi/drivers/adv_pci1710.c +++ b/drivers/comedi/drivers/adv_pci1710.c @@ -767,10 +767,10 @@ static int pci1710_auto_attach(struct comedi_device *dev, return ret; dev->iobase = pci_resource_start(pcidev, 2); - dev->pacer = comedi_8254_init(dev->iobase + PCI171X_TIMER_BASE, - I8254_OSC_BASE_10MHZ, I8254_IO16, 0); - if (!dev->pacer) - return -ENOMEM; + dev->pacer = comedi_8254_io_alloc(dev->iobase + PCI171X_TIMER_BASE, + I8254_OSC_BASE_10MHZ, I8254_IO16, 0); + if (IS_ERR(dev->pacer)) + return PTR_ERR(dev->pacer); n_subdevices = 1; /* all boards have analog inputs */ if (board->has_ao) diff --git a/drivers/comedi/drivers/adv_pci_dio.c b/drivers/comedi/drivers/adv_pci_dio.c index efa3e46b554b..0319d8c7ee47 100644 --- a/drivers/comedi/drivers/adv_pci_dio.c +++ b/drivers/comedi/drivers/adv_pci_dio.c @@ -664,11 +664,11 @@ static int pci_dio_auto_attach(struct comedi_device *dev, if (board->timer_regbase) { s = &dev->subdevices[subdev++]; - dev->pacer = comedi_8254_init(dev->iobase + - board->timer_regbase, - 0, I8254_IO8, 0); - if (!dev->pacer) - return -ENOMEM; + dev->pacer = + comedi_8254_io_alloc(dev->iobase + board->timer_regbase, + 0, I8254_IO8, 0); + if (IS_ERR(dev->pacer)) + return PTR_ERR(dev->pacer); comedi_8254_subdevice_init(s, dev->pacer); } diff --git a/drivers/comedi/drivers/aio_aio12_8.c b/drivers/comedi/drivers/aio_aio12_8.c index 30b8a32204d8..f9d40fa3d3a9 100644 --- a/drivers/comedi/drivers/aio_aio12_8.c +++ b/drivers/comedi/drivers/aio_aio12_8.c @@ -206,10 +206,10 @@ static int aio_aio12_8_attach(struct comedi_device *dev, if (ret) return ret; - dev->pacer = comedi_8254_init(dev->iobase + AIO12_8_8254_BASE_REG, - 0, I8254_IO8, 0); - if (!dev->pacer) - return -ENOMEM; + dev->pacer = comedi_8254_io_alloc(dev->iobase + AIO12_8_8254_BASE_REG, + 0, I8254_IO8, 0); + if (IS_ERR(dev->pacer)) + return PTR_ERR(dev->pacer); ret = comedi_alloc_subdevices(dev, 4); if (ret) diff --git a/drivers/comedi/drivers/amplc_dio200_common.c b/drivers/comedi/drivers/amplc_dio200_common.c index 2c1507a23f8a..19166cb26f5e 100644 --- a/drivers/comedi/drivers/amplc_dio200_common.c +++ b/drivers/comedi/drivers/amplc_dio200_common.c @@ -556,14 +556,14 @@ static int dio200_subdev_8254_init(struct comedi_device *dev, } if (dev->mmio) { - i8254 = comedi_8254_mm_init(dev->mmio + offset, - 0, I8254_IO8, regshift); + i8254 = comedi_8254_mm_alloc(dev->mmio + offset, + 0, I8254_IO8, regshift); } else { - i8254 = comedi_8254_init(dev->iobase + offset, - 0, I8254_IO8, regshift); + i8254 = comedi_8254_io_alloc(dev->iobase + offset, + 0, I8254_IO8, regshift); } - if (!i8254) - return -ENOMEM; + if (IS_ERR(i8254)) + return PTR_ERR(i8254); comedi_8254_subdevice_init(s, i8254); diff --git a/drivers/comedi/drivers/amplc_pci224.c b/drivers/comedi/drivers/amplc_pci224.c index 5a04e55daeea..1373637c2ca2 100644 --- a/drivers/comedi/drivers/amplc_pci224.c +++ b/drivers/comedi/drivers/amplc_pci224.c @@ -1051,10 +1051,10 @@ pci224_auto_attach(struct comedi_device *dev, unsigned long context_model) outw(devpriv->daccon | PCI224_DACCON_FIFORESET, dev->iobase + PCI224_DACCON); - dev->pacer = comedi_8254_init(devpriv->iobase1 + PCI224_Z2_BASE, - I8254_OSC_BASE_10MHZ, I8254_IO8, 0); - if (!dev->pacer) - return -ENOMEM; + dev->pacer = comedi_8254_io_alloc(devpriv->iobase1 + PCI224_Z2_BASE, + I8254_OSC_BASE_10MHZ, I8254_IO8, 0); + if (IS_ERR(dev->pacer)) + return PTR_ERR(dev->pacer); ret = comedi_alloc_subdevices(dev, 1); if (ret) diff --git a/drivers/comedi/drivers/amplc_pci230.c b/drivers/comedi/drivers/amplc_pci230.c index 92ba8b8c0172..783da73877b9 100644 --- a/drivers/comedi/drivers/amplc_pci230.c +++ b/drivers/comedi/drivers/amplc_pci230.c @@ -2475,10 +2475,10 @@ static int pci230_auto_attach(struct comedi_device *dev, dev->irq = pci_dev->irq; } - dev->pacer = comedi_8254_init(dev->iobase + PCI230_Z2_CT_BASE, - 0, I8254_IO8, 0); - if (!dev->pacer) - return -ENOMEM; + dev->pacer = comedi_8254_io_alloc(dev->iobase + PCI230_Z2_CT_BASE, + 0, I8254_IO8, 0); + if (IS_ERR(dev->pacer)) + return PTR_ERR(dev->pacer); rc = comedi_alloc_subdevices(dev, 3); if (rc) diff --git a/drivers/comedi/drivers/cb_das16_cs.c b/drivers/comedi/drivers/cb_das16_cs.c index 8e0d2fa5f95d..306208a0695b 100644 --- a/drivers/comedi/drivers/cb_das16_cs.c +++ b/drivers/comedi/drivers/cb_das16_cs.c @@ -363,10 +363,10 @@ static int das16cs_auto_attach(struct comedi_device *dev, if (!devpriv) return -ENOMEM; - dev->pacer = comedi_8254_init(dev->iobase + DAS16CS_TIMER_BASE, - I8254_OSC_BASE_10MHZ, I8254_IO16, 0); - if (!dev->pacer) - return -ENOMEM; + dev->pacer = comedi_8254_io_alloc(dev->iobase + DAS16CS_TIMER_BASE, + I8254_OSC_BASE_10MHZ, I8254_IO16, 0); + if (IS_ERR(dev->pacer)) + return PTR_ERR(dev->pacer); ret = comedi_alloc_subdevices(dev, 4); if (ret) diff --git a/drivers/comedi/drivers/cb_pcidas.c b/drivers/comedi/drivers/cb_pcidas.c index 0c7576b967fc..7a6cd681e932 100644 --- a/drivers/comedi/drivers/cb_pcidas.c +++ b/drivers/comedi/drivers/cb_pcidas.c @@ -1288,16 +1288,16 @@ static int cb_pcidas_auto_attach(struct comedi_device *dev, } dev->irq = pcidev->irq; - dev->pacer = comedi_8254_init(dev->iobase + PCIDAS_AI_8254_BASE, - I8254_OSC_BASE_10MHZ, I8254_IO8, 0); - if (!dev->pacer) - return -ENOMEM; + dev->pacer = comedi_8254_io_alloc(dev->iobase + PCIDAS_AI_8254_BASE, + I8254_OSC_BASE_10MHZ, I8254_IO8, 0); + if (IS_ERR(dev->pacer)) + return PTR_ERR(dev->pacer); - devpriv->ao_pacer = comedi_8254_init(dev->iobase + PCIDAS_AO_8254_BASE, - I8254_OSC_BASE_10MHZ, - I8254_IO8, 0); - if (!devpriv->ao_pacer) - return -ENOMEM; + devpriv->ao_pacer = + comedi_8254_io_alloc(dev->iobase + PCIDAS_AO_8254_BASE, + I8254_OSC_BASE_10MHZ, I8254_IO8, 0); + if (IS_ERR(devpriv->ao_pacer)) + return PTR_ERR(devpriv->ao_pacer); ret = comedi_alloc_subdevices(dev, 7); if (ret) @@ -1453,7 +1453,8 @@ static void cb_pcidas_detach(struct comedi_device *dev) if (devpriv->amcc) outl(INTCSR_INBOX_INTR_STATUS, devpriv->amcc + AMCC_OP_REG_INTCSR); - kfree(devpriv->ao_pacer); + if (!IS_ERR(devpriv->ao_pacer)) + kfree(devpriv->ao_pacer); } comedi_pci_detach(dev); } diff --git a/drivers/comedi/drivers/cb_pcimdas.c b/drivers/comedi/drivers/cb_pcimdas.c index 8bdb00774f11..5816ef65ed5f 100644 --- a/drivers/comedi/drivers/cb_pcimdas.c +++ b/drivers/comedi/drivers/cb_pcimdas.c @@ -364,11 +364,11 @@ static int cb_pcimdas_auto_attach(struct comedi_device *dev, devpriv->BADR3 = pci_resource_start(pcidev, 3); dev->iobase = pci_resource_start(pcidev, 4); - dev->pacer = comedi_8254_init(devpriv->BADR3 + PCIMDAS_8254_BASE, - cb_pcimdas_pacer_clk(dev), - I8254_IO8, 0); - if (!dev->pacer) - return -ENOMEM; + dev->pacer = comedi_8254_io_alloc(devpriv->BADR3 + PCIMDAS_8254_BASE, + cb_pcimdas_pacer_clk(dev), + I8254_IO8, 0); + if (IS_ERR(dev->pacer)) + return PTR_ERR(dev->pacer); ret = comedi_alloc_subdevices(dev, 6); if (ret) diff --git a/drivers/comedi/drivers/comedi_8254.c b/drivers/comedi/drivers/comedi_8254.c index 3f8657fc7ee5..696596944506 100644 --- a/drivers/comedi/drivers/comedi_8254.c +++ b/drivers/comedi/drivers/comedi_8254.c @@ -24,14 +24,17 @@ * * This module provides the following basic functions: * - * comedi_8254_init() / comedi_8254_mm_init() + * comedi_8254_io_alloc() / comedi_8254_mm_alloc() * Initializes this module to access the 8254 registers. The _mm version - * sets up the module for MMIO register access the other for PIO access. - * The pointer returned from these functions is normally stored in the - * comedi_device dev->pacer and will be freed by the comedi core during - * the driver (*detach). If a driver has multiple 8254 devices, they need - * to be stored in the drivers private data and freed when the driver is - * detached. + * sets up the module for MMIO register access; the _io version sets it + * up for PIO access. These functions return a pointer to a struct + * comedi_8254 on success, or an ERR_PTR value on failure. The pointer + * returned from these functions is normally stored in the comedi_device + * dev->pacer and will be freed by the comedi core during the driver + * (*detach). If a driver has multiple 8254 devices, they need to be + * stored in the drivers private data and freed when the driver is + * detached. If the ERR_PTR value is stored, code should check the + * pointer value with !IS_ERR(pointer) before freeing. * * NOTE: The counters are reset by setting them to I8254_MODE0 as part of * this initialization. @@ -621,14 +624,14 @@ static struct comedi_8254 *__i8254_init(comedi_8254_iocb_fn *iocb, /* sanity check that the iosize is valid */ if (!(iosize == I8254_IO8 || iosize == I8254_IO16 || iosize == I8254_IO32)) - return NULL; + return ERR_PTR(-EINVAL); if (!iocb) - return NULL; + return ERR_PTR(-EINVAL); i8254 = kzalloc(sizeof(*i8254), GFP_KERNEL); if (!i8254) - return NULL; + return ERR_PTR(-ENOMEM); i8254->iocb = iocb; i8254->context = context; @@ -646,17 +649,19 @@ static struct comedi_8254 *__i8254_init(comedi_8254_iocb_fn *iocb, } /** - * comedi_8254_init - allocate and initialize the 8254 device for pio access + * comedi_8254_io_alloc - allocate and initialize the 8254 device for pio access * @iobase: port I/O base address * @osc_base: base time of the counter in ns * OPTIONAL - only used by comedi_8254_cascade_ns_to_timer() * @iosize: I/O register size * @regshift: register gap shift + * + * Return: A pointer to a struct comedi_8254 or an ERR_PTR value. */ -struct comedi_8254 *comedi_8254_init(unsigned long iobase, - unsigned int osc_base, - unsigned int iosize, - unsigned int regshift) +struct comedi_8254 *comedi_8254_io_alloc(unsigned long iobase, + unsigned int osc_base, + unsigned int iosize, + unsigned int regshift) { comedi_8254_iocb_fn *iocb; @@ -671,24 +676,26 @@ struct comedi_8254 *comedi_8254_init(unsigned long iobase, iocb = i8254_io32_cb; break; default: - return NULL; + return ERR_PTR(-EINVAL); } return __i8254_init(iocb, iobase, osc_base, iosize, regshift); } -EXPORT_SYMBOL_GPL(comedi_8254_init); +EXPORT_SYMBOL_GPL(comedi_8254_io_alloc); /** - * comedi_8254_mm_init - allocate and initialize the 8254 device for mmio access + * comedi_8254_mm_alloc - allocate and initialize the 8254 device for mmio access * @mmio: memory mapped I/O base address * @osc_base: base time of the counter in ns * OPTIONAL - only used by comedi_8254_cascade_ns_to_timer() * @iosize: I/O register size * @regshift: register gap shift + * + * Return: A pointer to a struct comedi_8254 or an ERR_PTR value. */ -struct comedi_8254 *comedi_8254_mm_init(void __iomem *mmio, - unsigned int osc_base, - unsigned int iosize, - unsigned int regshift) +struct comedi_8254 *comedi_8254_mm_alloc(void __iomem *mmio, + unsigned int osc_base, + unsigned int iosize, + unsigned int regshift) { comedi_8254_iocb_fn *iocb; @@ -703,11 +710,11 @@ struct comedi_8254 *comedi_8254_mm_init(void __iomem *mmio, iocb = i8254_mmio32_cb; break; default: - return NULL; + return ERR_PTR(-EINVAL); } return __i8254_init(iocb, (unsigned long)mmio, osc_base, iosize, regshift); } -EXPORT_SYMBOL_GPL(comedi_8254_mm_init); +EXPORT_SYMBOL_GPL(comedi_8254_mm_alloc); static int __init comedi_8254_module_init(void) { diff --git a/drivers/comedi/drivers/das08.c b/drivers/comedi/drivers/das08.c index f8ab3af2e391..6a3b5411aa90 100644 --- a/drivers/comedi/drivers/das08.c +++ b/drivers/comedi/drivers/das08.c @@ -439,10 +439,11 @@ int das08_common_attach(struct comedi_device *dev, unsigned long iobase) /* Counter subdevice (8254) */ s = &dev->subdevices[5]; if (board->i8254_offset) { - dev->pacer = comedi_8254_init(dev->iobase + board->i8254_offset, - 0, I8254_IO8, 0); - if (!dev->pacer) - return -ENOMEM; + dev->pacer = + comedi_8254_io_alloc(dev->iobase + board->i8254_offset, + 0, I8254_IO8, 0); + if (IS_ERR(dev->pacer)) + return PTR_ERR(dev->pacer); comedi_8254_subdevice_init(s, dev->pacer); } else { diff --git a/drivers/comedi/drivers/das16.c b/drivers/comedi/drivers/das16.c index 728dc02156c8..bfe8811be1b5 100644 --- a/drivers/comedi/drivers/das16.c +++ b/drivers/comedi/drivers/das16.c @@ -1067,10 +1067,10 @@ static int das16_attach(struct comedi_device *dev, struct comedi_devconfig *it) osc_base = I8254_OSC_BASE_1MHZ / it->options[3]; } - dev->pacer = comedi_8254_init(dev->iobase + DAS16_TIMER_BASE_REG, - osc_base, I8254_IO8, 0); - if (!dev->pacer) - return -ENOMEM; + dev->pacer = comedi_8254_io_alloc(dev->iobase + DAS16_TIMER_BASE_REG, + osc_base, I8254_IO8, 0); + if (IS_ERR(dev->pacer)) + return PTR_ERR(dev->pacer); das16_alloc_dma(dev, it->options[2]); diff --git a/drivers/comedi/drivers/das16m1.c b/drivers/comedi/drivers/das16m1.c index 275effb77746..ff9c5a8897bd 100644 --- a/drivers/comedi/drivers/das16m1.c +++ b/drivers/comedi/drivers/das16m1.c @@ -529,15 +529,16 @@ static int das16m1_attach(struct comedi_device *dev, dev->irq = it->options[1]; } - dev->pacer = comedi_8254_init(dev->iobase + DAS16M1_8254_IOBASE2, - I8254_OSC_BASE_10MHZ, I8254_IO8, 0); - if (!dev->pacer) - return -ENOMEM; + dev->pacer = comedi_8254_io_alloc(dev->iobase + DAS16M1_8254_IOBASE2, + I8254_OSC_BASE_10MHZ, I8254_IO8, 0); + if (IS_ERR(dev->pacer)) + return PTR_ERR(dev->pacer); - devpriv->counter = comedi_8254_init(dev->iobase + DAS16M1_8254_IOBASE1, - 0, I8254_IO8, 0); - if (!devpriv->counter) - return -ENOMEM; + devpriv->counter = + comedi_8254_io_alloc(dev->iobase + DAS16M1_8254_IOBASE1, + 0, I8254_IO8, 0); + if (IS_ERR(devpriv->counter)) + return PTR_ERR(devpriv->counter); ret = comedi_alloc_subdevices(dev, 4); if (ret) @@ -603,7 +604,8 @@ static void das16m1_detach(struct comedi_device *dev) if (devpriv) { if (devpriv->extra_iobase) release_region(devpriv->extra_iobase, DAS16M1_SIZE2); - kfree(devpriv->counter); + if (!IS_ERR(devpriv->counter)) + kfree(devpriv->counter); } comedi_legacy_detach(dev); } diff --git a/drivers/comedi/drivers/das1800.c b/drivers/comedi/drivers/das1800.c index f09608c0f4ff..7117c67aee7e 100644 --- a/drivers/comedi/drivers/das1800.c +++ b/drivers/comedi/drivers/das1800.c @@ -1233,10 +1233,10 @@ static int das1800_attach(struct comedi_device *dev, if (!devpriv->fifo_buf) return -ENOMEM; - dev->pacer = comedi_8254_init(dev->iobase + DAS1800_COUNTER, - I8254_OSC_BASE_5MHZ, I8254_IO8, 0); - if (!dev->pacer) - return -ENOMEM; + dev->pacer = comedi_8254_io_alloc(dev->iobase + DAS1800_COUNTER, + I8254_OSC_BASE_5MHZ, I8254_IO8, 0); + if (IS_ERR(dev->pacer)) + return PTR_ERR(dev->pacer); ret = comedi_alloc_subdevices(dev, 4); if (ret) diff --git a/drivers/comedi/drivers/das6402.c b/drivers/comedi/drivers/das6402.c index 1af394591e74..68f95330de45 100644 --- a/drivers/comedi/drivers/das6402.c +++ b/drivers/comedi/drivers/das6402.c @@ -590,10 +590,10 @@ static int das6402_attach(struct comedi_device *dev, } } - dev->pacer = comedi_8254_init(dev->iobase + DAS6402_TIMER_BASE, - I8254_OSC_BASE_10MHZ, I8254_IO8, 0); - if (!dev->pacer) - return -ENOMEM; + dev->pacer = comedi_8254_io_alloc(dev->iobase + DAS6402_TIMER_BASE, + I8254_OSC_BASE_10MHZ, I8254_IO8, 0); + if (IS_ERR(dev->pacer)) + return PTR_ERR(dev->pacer); ret = comedi_alloc_subdevices(dev, 4); if (ret) diff --git a/drivers/comedi/drivers/das800.c b/drivers/comedi/drivers/das800.c index 4ca33f46eaa7..300775523031 100644 --- a/drivers/comedi/drivers/das800.c +++ b/drivers/comedi/drivers/das800.c @@ -672,10 +672,10 @@ static int das800_attach(struct comedi_device *dev, struct comedi_devconfig *it) dev->irq = irq; } - dev->pacer = comedi_8254_init(dev->iobase + DAS800_8254, - I8254_OSC_BASE_1MHZ, I8254_IO8, 0); - if (!dev->pacer) - return -ENOMEM; + dev->pacer = comedi_8254_io_alloc(dev->iobase + DAS800_8254, + I8254_OSC_BASE_1MHZ, I8254_IO8, 0); + if (IS_ERR(dev->pacer)) + return PTR_ERR(dev->pacer); ret = comedi_alloc_subdevices(dev, 3); if (ret) diff --git a/drivers/comedi/drivers/me4000.c b/drivers/comedi/drivers/me4000.c index 9aea02b86ed9..7dd3a0071863 100644 --- a/drivers/comedi/drivers/me4000.c +++ b/drivers/comedi/drivers/me4000.c @@ -1209,9 +1209,9 @@ static int me4000_auto_attach(struct comedi_device *dev, if (!timer_base) return -ENODEV; - dev->pacer = comedi_8254_init(timer_base, 0, I8254_IO8, 0); - if (!dev->pacer) - return -ENOMEM; + dev->pacer = comedi_8254_io_alloc(timer_base, 0, I8254_IO8, 0); + if (IS_ERR(dev->pacer)) + return PTR_ERR(dev->pacer); comedi_8254_subdevice_init(s, dev->pacer); } else { diff --git a/drivers/comedi/drivers/ni_at_a2150.c b/drivers/comedi/drivers/ni_at_a2150.c index df8d219e6723..e4e5a0ebd195 100644 --- a/drivers/comedi/drivers/ni_at_a2150.c +++ b/drivers/comedi/drivers/ni_at_a2150.c @@ -707,10 +707,10 @@ static int a2150_attach(struct comedi_device *dev, struct comedi_devconfig *it) /* an IRQ and DMA are required to support async commands */ a2150_alloc_irq_and_dma(dev, it); - dev->pacer = comedi_8254_init(dev->iobase + I8253_BASE_REG, - 0, I8254_IO8, 0); - if (!dev->pacer) - return -ENOMEM; + dev->pacer = comedi_8254_io_alloc(dev->iobase + I8253_BASE_REG, + 0, I8254_IO8, 0); + if (IS_ERR(dev->pacer)) + return PTR_ERR(dev->pacer); ret = comedi_alloc_subdevices(dev, 1); if (ret) diff --git a/drivers/comedi/drivers/ni_at_ao.c b/drivers/comedi/drivers/ni_at_ao.c index 9f3147b72aa8..9cf6b4ff6b65 100644 --- a/drivers/comedi/drivers/ni_at_ao.c +++ b/drivers/comedi/drivers/ni_at_ao.c @@ -303,10 +303,10 @@ static int atao_attach(struct comedi_device *dev, struct comedi_devconfig *it) if (!devpriv) return -ENOMEM; - dev->pacer = comedi_8254_init(dev->iobase + ATAO_82C53_BASE, - 0, I8254_IO8, 0); - if (!dev->pacer) - return -ENOMEM; + dev->pacer = comedi_8254_io_alloc(dev->iobase + ATAO_82C53_BASE, + 0, I8254_IO8, 0); + if (IS_ERR(dev->pacer)) + return PTR_ERR(dev->pacer); ret = comedi_alloc_subdevices(dev, 4); if (ret) diff --git a/drivers/comedi/drivers/ni_labpc_common.c b/drivers/comedi/drivers/ni_labpc_common.c index 763249653228..eb8f6431276a 100644 --- a/drivers/comedi/drivers/ni_labpc_common.c +++ b/drivers/comedi/drivers/ni_labpc_common.c @@ -1222,24 +1222,24 @@ int labpc_common_attach(struct comedi_device *dev, } if (dev->mmio) { - dev->pacer = comedi_8254_mm_init(dev->mmio + COUNTER_B_BASE_REG, - I8254_OSC_BASE_2MHZ, - I8254_IO8, 0); - devpriv->counter = comedi_8254_mm_init(dev->mmio + - COUNTER_A_BASE_REG, - I8254_OSC_BASE_2MHZ, - I8254_IO8, 0); + dev->pacer = + comedi_8254_mm_alloc(dev->mmio + COUNTER_B_BASE_REG, + I8254_OSC_BASE_2MHZ, I8254_IO8, 0); + devpriv->counter = + comedi_8254_mm_alloc(dev->mmio + COUNTER_A_BASE_REG, + I8254_OSC_BASE_2MHZ, I8254_IO8, 0); } else { - dev->pacer = comedi_8254_init(dev->iobase + COUNTER_B_BASE_REG, - I8254_OSC_BASE_2MHZ, - I8254_IO8, 0); - devpriv->counter = comedi_8254_init(dev->iobase + - COUNTER_A_BASE_REG, - I8254_OSC_BASE_2MHZ, - I8254_IO8, 0); + dev->pacer = + comedi_8254_io_alloc(dev->iobase + COUNTER_B_BASE_REG, + I8254_OSC_BASE_2MHZ, I8254_IO8, 0); + devpriv->counter = + comedi_8254_io_alloc(dev->iobase + COUNTER_A_BASE_REG, + I8254_OSC_BASE_2MHZ, I8254_IO8, 0); } - if (!dev->pacer || !devpriv->counter) - return -ENOMEM; + if (IS_ERR(dev->pacer)) + return PTR_ERR(dev->pacer); + if (IS_ERR(devpriv->counter)) + return PTR_ERR(devpriv->counter); ret = comedi_alloc_subdevices(dev, 5); if (ret) @@ -1341,8 +1341,10 @@ void labpc_common_detach(struct comedi_device *dev) { struct labpc_private *devpriv = dev->private; - if (devpriv) - kfree(devpriv->counter); + if (devpriv) { + if (!IS_ERR(devpriv->counter)) + kfree(devpriv->counter); + } } EXPORT_SYMBOL_GPL(labpc_common_detach); diff --git a/drivers/comedi/drivers/pcl711.c b/drivers/comedi/drivers/pcl711.c index 05172c553c8a..0cf3917defe7 100644 --- a/drivers/comedi/drivers/pcl711.c +++ b/drivers/comedi/drivers/pcl711.c @@ -429,10 +429,10 @@ static int pcl711_attach(struct comedi_device *dev, struct comedi_devconfig *it) dev->irq = it->options[1]; } - dev->pacer = comedi_8254_init(dev->iobase + PCL711_TIMER_BASE, - I8254_OSC_BASE_2MHZ, I8254_IO8, 0); - if (!dev->pacer) - return -ENOMEM; + dev->pacer = comedi_8254_io_alloc(dev->iobase + PCL711_TIMER_BASE, + I8254_OSC_BASE_2MHZ, I8254_IO8, 0); + if (IS_ERR(dev->pacer)) + return PTR_ERR(dev->pacer); ret = comedi_alloc_subdevices(dev, 4); if (ret) diff --git a/drivers/comedi/drivers/pcl812.c b/drivers/comedi/drivers/pcl812.c index 70dbc129fcf5..0df639c6a595 100644 --- a/drivers/comedi/drivers/pcl812.c +++ b/drivers/comedi/drivers/pcl812.c @@ -1143,11 +1143,11 @@ static int pcl812_attach(struct comedi_device *dev, struct comedi_devconfig *it) return ret; if (board->irq_bits) { - dev->pacer = comedi_8254_init(dev->iobase + PCL812_TIMER_BASE, - I8254_OSC_BASE_2MHZ, - I8254_IO8, 0); - if (!dev->pacer) - return -ENOMEM; + dev->pacer = + comedi_8254_io_alloc(dev->iobase + PCL812_TIMER_BASE, + I8254_OSC_BASE_2MHZ, I8254_IO8, 0); + if (IS_ERR(dev->pacer)) + return PTR_ERR(dev->pacer); if ((1 << it->options[1]) & board->irq_bits) { ret = request_irq(it->options[1], pcl812_interrupt, 0, diff --git a/drivers/comedi/drivers/pcl816.c b/drivers/comedi/drivers/pcl816.c index a5e5320be648..28d1a88c50f6 100644 --- a/drivers/comedi/drivers/pcl816.c +++ b/drivers/comedi/drivers/pcl816.c @@ -615,10 +615,10 @@ static int pcl816_attach(struct comedi_device *dev, struct comedi_devconfig *it) /* an IRQ and DMA are required to support async commands */ pcl816_alloc_irq_and_dma(dev, it); - dev->pacer = comedi_8254_init(dev->iobase + PCL816_TIMER_BASE, - I8254_OSC_BASE_10MHZ, I8254_IO8, 0); - if (!dev->pacer) - return -ENOMEM; + dev->pacer = comedi_8254_io_alloc(dev->iobase + PCL816_TIMER_BASE, + I8254_OSC_BASE_10MHZ, I8254_IO8, 0); + if (IS_ERR(dev->pacer)) + return PTR_ERR(dev->pacer); ret = comedi_alloc_subdevices(dev, 4); if (ret) diff --git a/drivers/comedi/drivers/pcl818.c b/drivers/comedi/drivers/pcl818.c index 29e503de8267..4127adcfb229 100644 --- a/drivers/comedi/drivers/pcl818.c +++ b/drivers/comedi/drivers/pcl818.c @@ -1015,10 +1015,10 @@ static int pcl818_attach(struct comedi_device *dev, struct comedi_devconfig *it) else osc_base = I8254_OSC_BASE_1MHZ; - dev->pacer = comedi_8254_init(dev->iobase + PCL818_TIMER_BASE, - osc_base, I8254_IO8, 0); - if (!dev->pacer) - return -ENOMEM; + dev->pacer = comedi_8254_io_alloc(dev->iobase + PCL818_TIMER_BASE, + osc_base, I8254_IO8, 0); + if (IS_ERR(dev->pacer)) + return PTR_ERR(dev->pacer); /* max sampling speed */ devpriv->ns_min = board->ns_min; diff --git a/drivers/comedi/drivers/rtd520.c b/drivers/comedi/drivers/rtd520.c index 7e0ec1a2a2ca..44bb0decd7a4 100644 --- a/drivers/comedi/drivers/rtd520.c +++ b/drivers/comedi/drivers/rtd520.c @@ -1289,9 +1289,9 @@ static int rtd_auto_attach(struct comedi_device *dev, /* 8254 Timer/Counter subdevice */ s = &dev->subdevices[3]; - dev->pacer = comedi_8254_mm_init(dev->mmio + LAS0_8254_TIMER_BASE, - RTD_CLOCK_BASE, I8254_IO8, 2); - if (!dev->pacer) + dev->pacer = comedi_8254_mm_alloc(dev->mmio + LAS0_8254_TIMER_BASE, + RTD_CLOCK_BASE, I8254_IO8, 2); + if (IS_ERR(dev->pacer)) return -ENOMEM; comedi_8254_subdevice_init(s, dev->pacer); diff --git a/include/linux/comedi/comedi_8254.h b/include/linux/comedi/comedi_8254.h index 18d12321c87d..393ccb301028 100644 --- a/include/linux/comedi/comedi_8254.h +++ b/include/linux/comedi/comedi_8254.h @@ -136,13 +136,13 @@ void comedi_8254_set_busy(struct comedi_8254 *i8254, void comedi_8254_subdevice_init(struct comedi_subdevice *s, struct comedi_8254 *i8254); -struct comedi_8254 *comedi_8254_init(unsigned long iobase, - unsigned int osc_base, - unsigned int iosize, - unsigned int regshift); -struct comedi_8254 *comedi_8254_mm_init(void __iomem *mmio, - unsigned int osc_base, - unsigned int iosize, - unsigned int regshift); +struct comedi_8254 *comedi_8254_io_alloc(unsigned long iobase, + unsigned int osc_base, + unsigned int iosize, + unsigned int regshift); +struct comedi_8254 *comedi_8254_mm_alloc(void __iomem *mmio, + unsigned int osc_base, + unsigned int iosize, + unsigned int regshift); #endif /* _COMEDI_8254_H */ -- cgit v1.2.3 From 90d256757e0bffd7a9beafd7c2bdc40a0236f9ec Mon Sep 17 00:00:00 2001 From: Ian Abbott Date: Wed, 13 Sep 2023 18:07:03 +0100 Subject: comedi: comedi_8254: Conditionally remove I/O port support The comedi_8254 module supports both port I/O and memory-mapped I/O. In a future patch, the port I/O functions (`inb()`, `outb()`, and friends) will only be declared if the `HAS_IOPORT` configuration option is enabled. Conditionally compile the parts of the module that use port I/O so they are compiled if and only if the `CONFIG_HAS_IOPORT` macro is defined, so that it can still be built if the port I/O functions have not been declared. If `CONFIG_HAS_IOPORT` is undefined, replace the GPL-exported `comedi_8254_io_alloc()` function with a dummy static inline version that just returns `ERR_PTR(-ENXIO)`. Cc: Arnd Bergmann Cc: Niklas Schnelle Signed-off-by: Ian Abbott Link: https://lore.kernel.org/r/20230913170712.111719-5-abbotti@mev.co.uk Signed-off-by: Greg Kroah-Hartman --- drivers/comedi/drivers/comedi_8254.c | 8 ++++++++ include/linux/comedi/comedi_8254.h | 13 +++++++++++++ 2 files changed, 21 insertions(+) (limited to 'include/linux') diff --git a/drivers/comedi/drivers/comedi_8254.c b/drivers/comedi/drivers/comedi_8254.c index 696596944506..6beca2a6d66e 100644 --- a/drivers/comedi/drivers/comedi_8254.c +++ b/drivers/comedi/drivers/comedi_8254.c @@ -122,6 +122,8 @@ #include #include +#ifdef CONFIG_HAS_IOPORT + static unsigned int i8254_io8_cb(struct comedi_8254 *i8254, int dir, unsigned int reg, unsigned int val) { @@ -164,6 +166,8 @@ static unsigned int i8254_io32_cb(struct comedi_8254 *i8254, int dir, } } +#endif /* CONFIG_HAS_IOPORT */ + static unsigned int i8254_mmio8_cb(struct comedi_8254 *i8254, int dir, unsigned int reg, unsigned int val) { @@ -648,6 +652,8 @@ static struct comedi_8254 *__i8254_init(comedi_8254_iocb_fn *iocb, return i8254; } +#ifdef CONFIG_HAS_IOPORT + /** * comedi_8254_io_alloc - allocate and initialize the 8254 device for pio access * @iobase: port I/O base address @@ -682,6 +688,8 @@ struct comedi_8254 *comedi_8254_io_alloc(unsigned long iobase, } EXPORT_SYMBOL_GPL(comedi_8254_io_alloc); +#endif /* CONFIG_HAS_IOPORT */ + /** * comedi_8254_mm_alloc - allocate and initialize the 8254 device for mmio access * @mmio: memory mapped I/O base address diff --git a/include/linux/comedi/comedi_8254.h b/include/linux/comedi/comedi_8254.h index 393ccb301028..d527f04400df 100644 --- a/include/linux/comedi/comedi_8254.h +++ b/include/linux/comedi/comedi_8254.h @@ -12,6 +12,8 @@ #define _COMEDI_8254_H #include +#include +#include struct comedi_device; struct comedi_insn; @@ -136,10 +138,21 @@ void comedi_8254_set_busy(struct comedi_8254 *i8254, void comedi_8254_subdevice_init(struct comedi_subdevice *s, struct comedi_8254 *i8254); +#ifdef CONFIG_HAS_IOPORT struct comedi_8254 *comedi_8254_io_alloc(unsigned long iobase, unsigned int osc_base, unsigned int iosize, unsigned int regshift); +#else +static inline struct comedi_8254 *comedi_8254_io_alloc(unsigned long iobase, + unsigned int osc_base, + unsigned int iosize, + unsigned int regshift) +{ + return ERR_PTR(-ENXIO); +} +#endif + struct comedi_8254 *comedi_8254_mm_alloc(void __iomem *mmio, unsigned int osc_base, unsigned int iosize, -- cgit v1.2.3 From 5c57b1ccecc72738d4b9be2dfcdfb9001be76bd7 Mon Sep 17 00:00:00 2001 From: Ian Abbott Date: Wed, 13 Sep 2023 18:07:05 +0100 Subject: comedi: comedi_8255: Rework subdevice initialization functions Comedi drivers can initialize an 8255 subdevice in I/O space by calling `subdev_8255_init()`, or in memory-mapped I/O space by calling `subdev_8255_mm_init()`, or by supplying a call-back function pointer and context to either of those functions. Change it so that a new function `subdev_8255_cb_init()` shall be called instead when supplying a callback function and context, and remove the call-back function parameter from `subdev_8255_init()` and `subdev_8255_mm_init()`. Also rename `subdev_8255_init()` to `subdev_8255_io_init()`. The parameters are changing, so might as well rename it at the same time. Also rename the `regbase` member of `struct subdev_8255_private` to `context` since this holds the context for the call-back function call. Cc: Arnd Bergmann Cc: Niklas Schnelle Signed-off-by: Ian Abbott Link: https://lore.kernel.org/r/20230913170712.111719-7-abbotti@mev.co.uk Signed-off-by: Greg Kroah-Hartman --- drivers/comedi/drivers/8255.c | 2 +- drivers/comedi/drivers/8255_pci.c | 4 +- drivers/comedi/drivers/adv_pci_dio.c | 4 +- drivers/comedi/drivers/aio_aio12_8.c | 2 +- drivers/comedi/drivers/amplc_pc236_common.c | 2 +- drivers/comedi/drivers/amplc_pci230.c | 2 +- drivers/comedi/drivers/cb_pcidas.c | 2 +- drivers/comedi/drivers/cb_pcidas64.c | 7 +- drivers/comedi/drivers/cb_pcidda.c | 2 +- drivers/comedi/drivers/cb_pcimdas.c | 2 +- drivers/comedi/drivers/cb_pcimdda.c | 2 +- drivers/comedi/drivers/comedi_8255.c | 115 +++++++++++++--------------- drivers/comedi/drivers/daqboard2000.c | 4 +- drivers/comedi/drivers/das08.c | 2 +- drivers/comedi/drivers/das16.c | 2 +- drivers/comedi/drivers/das16m1.c | 2 +- drivers/comedi/drivers/dmm32at.c | 3 +- drivers/comedi/drivers/ni_atmio16d.c | 2 +- drivers/comedi/drivers/ni_daq_dio24.c | 2 +- drivers/comedi/drivers/ni_labpc_common.c | 4 +- drivers/comedi/drivers/ni_mio_common.c | 4 +- drivers/comedi/drivers/pcl724.c | 6 +- drivers/comedi/drivers/pcm3724.c | 2 +- include/linux/comedi/comedi_8255.h | 13 ++-- 24 files changed, 92 insertions(+), 100 deletions(-) (limited to 'include/linux') diff --git a/drivers/comedi/drivers/8255.c b/drivers/comedi/drivers/8255.c index ced8ea09d4fa..f45f7bd1c61a 100644 --- a/drivers/comedi/drivers/8255.c +++ b/drivers/comedi/drivers/8255.c @@ -80,7 +80,7 @@ static int dev_8255_attach(struct comedi_device *dev, if (ret) { s->type = COMEDI_SUBD_UNUSED; } else { - ret = subdev_8255_init(dev, s, NULL, iobase); + ret = subdev_8255_io_init(dev, s, iobase); if (ret) { /* * Release the I/O port region here, as the diff --git a/drivers/comedi/drivers/8255_pci.c b/drivers/comedi/drivers/8255_pci.c index 9ad52e9f5427..8498cabe4d91 100644 --- a/drivers/comedi/drivers/8255_pci.c +++ b/drivers/comedi/drivers/8255_pci.c @@ -242,9 +242,9 @@ static int pci_8255_auto_attach(struct comedi_device *dev, for (i = 0; i < board->n_8255; i++) { s = &dev->subdevices[i]; if (dev->mmio) - ret = subdev_8255_mm_init(dev, s, NULL, i * I8255_SIZE); + ret = subdev_8255_mm_init(dev, s, i * I8255_SIZE); else - ret = subdev_8255_init(dev, s, NULL, i * I8255_SIZE); + ret = subdev_8255_io_init(dev, s, i * I8255_SIZE); if (ret) return ret; } diff --git a/drivers/comedi/drivers/adv_pci_dio.c b/drivers/comedi/drivers/adv_pci_dio.c index 0319d8c7ee47..ca8054504760 100644 --- a/drivers/comedi/drivers/adv_pci_dio.c +++ b/drivers/comedi/drivers/adv_pci_dio.c @@ -642,8 +642,8 @@ static int pci_dio_auto_attach(struct comedi_device *dev, for (j = 0; j < d->chans; j++) { s = &dev->subdevices[subdev++]; - ret = subdev_8255_init(dev, s, NULL, - d->addr + j * I8255_SIZE); + ret = subdev_8255_io_init(dev, s, + d->addr + j * I8255_SIZE); if (ret) return ret; } diff --git a/drivers/comedi/drivers/aio_aio12_8.c b/drivers/comedi/drivers/aio_aio12_8.c index f9d40fa3d3a9..227a86a3a760 100644 --- a/drivers/comedi/drivers/aio_aio12_8.c +++ b/drivers/comedi/drivers/aio_aio12_8.c @@ -247,7 +247,7 @@ static int aio_aio12_8_attach(struct comedi_device *dev, /* Digital I/O subdevice (8255) */ s = &dev->subdevices[2]; - ret = subdev_8255_init(dev, s, NULL, AIO12_8_8255_BASE_REG); + ret = subdev_8255_io_init(dev, s, AIO12_8_8255_BASE_REG); if (ret) return ret; diff --git a/drivers/comedi/drivers/amplc_pc236_common.c b/drivers/comedi/drivers/amplc_pc236_common.c index 9f4f89b1ef23..326ca72c24ec 100644 --- a/drivers/comedi/drivers/amplc_pc236_common.c +++ b/drivers/comedi/drivers/amplc_pc236_common.c @@ -147,7 +147,7 @@ int amplc_pc236_common_attach(struct comedi_device *dev, unsigned long iobase, s = &dev->subdevices[0]; /* digital i/o subdevice (8255) */ - ret = subdev_8255_init(dev, s, NULL, 0x00); + ret = subdev_8255_io_init(dev, s, 0x00); if (ret) return ret; diff --git a/drivers/comedi/drivers/amplc_pci230.c b/drivers/comedi/drivers/amplc_pci230.c index 783da73877b9..c74209c2e83a 100644 --- a/drivers/comedi/drivers/amplc_pci230.c +++ b/drivers/comedi/drivers/amplc_pci230.c @@ -2529,7 +2529,7 @@ static int pci230_auto_attach(struct comedi_device *dev, s = &dev->subdevices[2]; /* digital i/o subdevice */ if (board->have_dio) { - rc = subdev_8255_init(dev, s, NULL, PCI230_PPI_X_BASE); + rc = subdev_8255_io_init(dev, s, PCI230_PPI_X_BASE); if (rc) return rc; } else { diff --git a/drivers/comedi/drivers/cb_pcidas.c b/drivers/comedi/drivers/cb_pcidas.c index 7a6cd681e932..8bb9b0623869 100644 --- a/drivers/comedi/drivers/cb_pcidas.c +++ b/drivers/comedi/drivers/cb_pcidas.c @@ -1352,7 +1352,7 @@ static int cb_pcidas_auto_attach(struct comedi_device *dev, /* 8255 */ s = &dev->subdevices[2]; - ret = subdev_8255_init(dev, s, NULL, PCIDAS_8255_BASE); + ret = subdev_8255_io_init(dev, s, PCIDAS_8255_BASE); if (ret) return ret; diff --git a/drivers/comedi/drivers/cb_pcidas64.c b/drivers/comedi/drivers/cb_pcidas64.c index ca6038a25f26..ff19fc3859e4 100644 --- a/drivers/comedi/drivers/cb_pcidas64.c +++ b/drivers/comedi/drivers/cb_pcidas64.c @@ -3877,11 +3877,10 @@ static int setup_subdevices(struct comedi_device *dev) s = &dev->subdevices[4]; if (board->has_8255) { if (board->layout == LAYOUT_4020) { - ret = subdev_8255_init(dev, s, dio_callback_4020, - I8255_4020_REG); + ret = subdev_8255_cb_init(dev, s, dio_callback_4020, + I8255_4020_REG); } else { - ret = subdev_8255_mm_init(dev, s, NULL, - DIO_8255_OFFSET); + ret = subdev_8255_mm_init(dev, s, DIO_8255_OFFSET); } if (ret) return ret; diff --git a/drivers/comedi/drivers/cb_pcidda.c b/drivers/comedi/drivers/cb_pcidda.c index c52204a6bda4..c353d0f87da9 100644 --- a/drivers/comedi/drivers/cb_pcidda.c +++ b/drivers/comedi/drivers/cb_pcidda.c @@ -365,7 +365,7 @@ static int cb_pcidda_auto_attach(struct comedi_device *dev, /* two 8255 digital io subdevices */ for (i = 0; i < 2; i++) { s = &dev->subdevices[1 + i]; - ret = subdev_8255_init(dev, s, NULL, i * I8255_SIZE); + ret = subdev_8255_io_init(dev, s, i * I8255_SIZE); if (ret) return ret; } diff --git a/drivers/comedi/drivers/cb_pcimdas.c b/drivers/comedi/drivers/cb_pcimdas.c index 5816ef65ed5f..641c30df392e 100644 --- a/drivers/comedi/drivers/cb_pcimdas.c +++ b/drivers/comedi/drivers/cb_pcimdas.c @@ -405,7 +405,7 @@ static int cb_pcimdas_auto_attach(struct comedi_device *dev, /* Digital I/O subdevice */ s = &dev->subdevices[2]; - ret = subdev_8255_init(dev, s, NULL, PCIMDAS_8255_BASE); + ret = subdev_8255_io_init(dev, s, PCIMDAS_8255_BASE); if (ret) return ret; diff --git a/drivers/comedi/drivers/cb_pcimdda.c b/drivers/comedi/drivers/cb_pcimdda.c index bf8093a10315..541b5742bb1b 100644 --- a/drivers/comedi/drivers/cb_pcimdda.c +++ b/drivers/comedi/drivers/cb_pcimdda.c @@ -154,7 +154,7 @@ static int cb_pcimdda_auto_attach(struct comedi_device *dev, s = &dev->subdevices[1]; /* digital i/o subdevice */ - return subdev_8255_init(dev, s, NULL, PCIMDDA_8255_BASE_REG); + return subdev_8255_io_init(dev, s, PCIMDDA_8255_BASE_REG); } static struct comedi_driver cb_pcimdda_driver = { diff --git a/drivers/comedi/drivers/comedi_8255.c b/drivers/comedi/drivers/comedi_8255.c index 5562b9cd0a17..28fd9d8c95cc 100644 --- a/drivers/comedi/drivers/comedi_8255.c +++ b/drivers/comedi/drivers/comedi_8255.c @@ -33,9 +33,9 @@ #include struct subdev_8255_private { - unsigned long regbase; + unsigned long context; int (*io)(struct comedi_device *dev, int dir, int port, int data, - unsigned long regbase); + unsigned long context); }; static int subdev_8255_io(struct comedi_device *dev, @@ -64,7 +64,7 @@ static int subdev_8255_insn(struct comedi_device *dev, unsigned int *data) { struct subdev_8255_private *spriv = s->private; - unsigned long regbase = spriv->regbase; + unsigned long context = spriv->context; unsigned int mask; unsigned int v; @@ -72,18 +72,18 @@ static int subdev_8255_insn(struct comedi_device *dev, if (mask) { if (mask & 0xff) spriv->io(dev, 1, I8255_DATA_A_REG, - s->state & 0xff, regbase); + s->state & 0xff, context); if (mask & 0xff00) spriv->io(dev, 1, I8255_DATA_B_REG, - (s->state >> 8) & 0xff, regbase); + (s->state >> 8) & 0xff, context); if (mask & 0xff0000) spriv->io(dev, 1, I8255_DATA_C_REG, - (s->state >> 16) & 0xff, regbase); + (s->state >> 16) & 0xff, context); } - v = spriv->io(dev, 0, I8255_DATA_A_REG, 0, regbase); - v |= (spriv->io(dev, 0, I8255_DATA_B_REG, 0, regbase) << 8); - v |= (spriv->io(dev, 0, I8255_DATA_C_REG, 0, regbase) << 16); + v = spriv->io(dev, 0, I8255_DATA_A_REG, 0, context); + v |= (spriv->io(dev, 0, I8255_DATA_B_REG, 0, context) << 8); + v |= (spriv->io(dev, 0, I8255_DATA_C_REG, 0, context) << 16); data[1] = v; @@ -94,7 +94,7 @@ static void subdev_8255_do_config(struct comedi_device *dev, struct comedi_subdevice *s) { struct subdev_8255_private *spriv = s->private; - unsigned long regbase = spriv->regbase; + unsigned long context = spriv->context; int config; config = I8255_CTRL_CW; @@ -108,7 +108,7 @@ static void subdev_8255_do_config(struct comedi_device *dev, if (!(s->io_bits & 0xf00000)) config |= I8255_CTRL_C_HI_IO; - spriv->io(dev, 1, I8255_CTRL_REG, config, regbase); + spriv->io(dev, 1, I8255_CTRL_REG, config, context); } static int subdev_8255_insn_config(struct comedi_device *dev, @@ -142,23 +142,19 @@ static int __subdev_8255_init(struct comedi_device *dev, struct comedi_subdevice *s, int (*io)(struct comedi_device *dev, int dir, int port, int data, - unsigned long regbase), - unsigned long regbase, - bool is_mmio) + unsigned long context), + unsigned long context) { struct subdev_8255_private *spriv; + if (!io) + return -EINVAL; + spriv = comedi_alloc_spriv(s, sizeof(*spriv)); if (!spriv) return -ENOMEM; - if (io) - spriv->io = io; - else if (is_mmio) - spriv->io = subdev_8255_mmio; - else - spriv->io = subdev_8255_io; - spriv->regbase = regbase; + spriv->context = context; s->type = COMEDI_SUBD_DIO; s->subdev_flags = SDF_READABLE | SDF_WRITABLE; @@ -174,88 +170,83 @@ static int __subdev_8255_init(struct comedi_device *dev, } /** - * subdev_8255_init - initialize DIO subdevice for driving I/O mapped 8255 + * subdev_8255_io_init - initialize DIO subdevice for driving I/O mapped 8255 * @dev: comedi device owning subdevice * @s: comedi subdevice to initialize - * @io: (optional) register I/O call-back function - * @regbase: offset of 8255 registers from dev->iobase, or call-back context + * @regbase: offset of 8255 registers from dev->iobase * * Initializes a comedi subdevice as a DIO subdevice driving an 8255 chip. * - * If the optional I/O call-back function is provided, its prototype is of - * the following form: - * - * int my_8255_callback(struct comedi_device *dev, int dir, int port, - * int data, unsigned long regbase); - * - * where 'dev', and 'regbase' match the values passed to this function, - * 'port' is the 8255 port number 0 to 3 (including the control port), 'dir' - * is the direction (0 for read, 1 for write) and 'data' is the value to be - * written. It should return 0 if writing or the value read if reading. - * - * If the optional I/O call-back function is not provided, an internal - * call-back function is used which uses consecutive I/O port addresses - * starting at dev->iobase + regbase. - * * Return: -ENOMEM if failed to allocate memory, zero on success. */ -int subdev_8255_init(struct comedi_device *dev, struct comedi_subdevice *s, - int (*io)(struct comedi_device *dev, int dir, int port, - int data, unsigned long regbase), +int subdev_8255_io_init(struct comedi_device *dev, struct comedi_subdevice *s, unsigned long regbase) { - return __subdev_8255_init(dev, s, io, regbase, false); + return __subdev_8255_init(dev, s, subdev_8255_io, regbase); } -EXPORT_SYMBOL_GPL(subdev_8255_init); +EXPORT_SYMBOL_GPL(subdev_8255_io_init); /** * subdev_8255_mm_init - initialize DIO subdevice for driving mmio-mapped 8255 * @dev: comedi device owning subdevice * @s: comedi subdevice to initialize - * @io: (optional) register I/O call-back function - * @regbase: offset of 8255 registers from dev->mmio, or call-back context + * @regbase: offset of 8255 registers from dev->mmio * * Initializes a comedi subdevice as a DIO subdevice driving an 8255 chip. * - * If the optional I/O call-back function is provided, its prototype is of - * the following form: + * Return: -ENOMEM if failed to allocate memory, zero on success. + */ +int subdev_8255_mm_init(struct comedi_device *dev, struct comedi_subdevice *s, + unsigned long regbase) +{ + return __subdev_8255_init(dev, s, subdev_8255_mmio, regbase); +} +EXPORT_SYMBOL_GPL(subdev_8255_mm_init); + +/** + * subdev_8255_cb_init - initialize DIO subdevice for driving callback-mapped 8255 + * @dev: comedi device owning subdevice + * @s: comedi subdevice to initialize + * @io: register I/O call-back function + * @context: call-back context + * + * Initializes a comedi subdevice as a DIO subdevice driving an 8255 chip. + * + * The prototype of the I/O call-back function is of the following form: * * int my_8255_callback(struct comedi_device *dev, int dir, int port, - * int data, unsigned long regbase); + * int data, unsigned long context); * - * where 'dev', and 'regbase' match the values passed to this function, + * where 'dev', and 'context' match the values passed to this function, * 'port' is the 8255 port number 0 to 3 (including the control port), 'dir' * is the direction (0 for read, 1 for write) and 'data' is the value to be * written. It should return 0 if writing or the value read if reading. * - * If the optional I/O call-back function is not provided, an internal - * call-back function is used which uses consecutive MMIO virtual addresses - * starting at dev->mmio + regbase. * * Return: -ENOMEM if failed to allocate memory, zero on success. */ -int subdev_8255_mm_init(struct comedi_device *dev, struct comedi_subdevice *s, +int subdev_8255_cb_init(struct comedi_device *dev, struct comedi_subdevice *s, int (*io)(struct comedi_device *dev, int dir, int port, - int data, unsigned long regbase), - unsigned long regbase) + int data, unsigned long context), + unsigned long context) { - return __subdev_8255_init(dev, s, io, regbase, true); + return __subdev_8255_init(dev, s, io, context); } -EXPORT_SYMBOL_GPL(subdev_8255_mm_init); +EXPORT_SYMBOL_GPL(subdev_8255_cb_init); /** * subdev_8255_regbase - get offset of 8255 registers or call-back context * @s: comedi subdevice * - * Returns the 'regbase' parameter that was previously passed to - * subdev_8255_init() or subdev_8255_mm_init() to set up the subdevice. - * Only valid if the subdevice was set up successfully. + * Returns the 'regbase' or 'context' parameter that was previously passed to + * subdev_8255_io_init(), subdev_8255_mm_init(), or subdev_8255_cb_init() to + * set up the subdevice. Only valid if the subdevice was set up successfully. */ unsigned long subdev_8255_regbase(struct comedi_subdevice *s) { struct subdev_8255_private *spriv = s->private; - return spriv->regbase; + return spriv->context; } EXPORT_SYMBOL_GPL(subdev_8255_regbase); diff --git a/drivers/comedi/drivers/daqboard2000.c b/drivers/comedi/drivers/daqboard2000.c index c0a4e1b06fb3..897bf46b95ee 100644 --- a/drivers/comedi/drivers/daqboard2000.c +++ b/drivers/comedi/drivers/daqboard2000.c @@ -738,8 +738,8 @@ static int db2k_auto_attach(struct comedi_device *dev, unsigned long context) return result; s = &dev->subdevices[2]; - return subdev_8255_init(dev, s, db2k_8255_cb, - DB2K_REG_DIO_P2_EXP_IO_8_BIT); + return subdev_8255_cb_init(dev, s, db2k_8255_cb, + DB2K_REG_DIO_P2_EXP_IO_8_BIT); } static void db2k_detach(struct comedi_device *dev) diff --git a/drivers/comedi/drivers/das08.c b/drivers/comedi/drivers/das08.c index 6a3b5411aa90..5d5b9174f88a 100644 --- a/drivers/comedi/drivers/das08.c +++ b/drivers/comedi/drivers/das08.c @@ -429,7 +429,7 @@ int das08_common_attach(struct comedi_device *dev, unsigned long iobase) s = &dev->subdevices[4]; /* 8255 */ if (board->i8255_offset != 0) { - ret = subdev_8255_init(dev, s, NULL, board->i8255_offset); + ret = subdev_8255_io_init(dev, s, board->i8255_offset); if (ret) return ret; } else { diff --git a/drivers/comedi/drivers/das16.c b/drivers/comedi/drivers/das16.c index bfe8811be1b5..4ed56a02150e 100644 --- a/drivers/comedi/drivers/das16.c +++ b/drivers/comedi/drivers/das16.c @@ -1145,7 +1145,7 @@ static int das16_attach(struct comedi_device *dev, struct comedi_devconfig *it) /* 8255 Digital I/O subdevice */ if (board->has_8255) { s = &dev->subdevices[4]; - ret = subdev_8255_init(dev, s, NULL, board->i8255_offset); + ret = subdev_8255_io_init(dev, s, board->i8255_offset); if (ret) return ret; } diff --git a/drivers/comedi/drivers/das16m1.c b/drivers/comedi/drivers/das16m1.c index ff9c5a8897bd..b8ea737ad3d1 100644 --- a/drivers/comedi/drivers/das16m1.c +++ b/drivers/comedi/drivers/das16m1.c @@ -583,7 +583,7 @@ static int das16m1_attach(struct comedi_device *dev, /* Digital I/O subdevice (8255) */ s = &dev->subdevices[3]; - ret = subdev_8255_init(dev, s, NULL, DAS16M1_8255_IOBASE); + ret = subdev_8255_io_init(dev, s, DAS16M1_8255_IOBASE); if (ret) return ret; diff --git a/drivers/comedi/drivers/dmm32at.c b/drivers/comedi/drivers/dmm32at.c index fe023c722aa3..644e3b643c79 100644 --- a/drivers/comedi/drivers/dmm32at.c +++ b/drivers/comedi/drivers/dmm32at.c @@ -599,7 +599,8 @@ static int dmm32at_attach(struct comedi_device *dev, /* Digital I/O subdevice */ s = &dev->subdevices[2]; - return subdev_8255_init(dev, s, dmm32at_8255_io, DMM32AT_8255_IOBASE); + return subdev_8255_cb_init(dev, s, dmm32at_8255_io, + DMM32AT_8255_IOBASE); } static struct comedi_driver dmm32at_driver = { diff --git a/drivers/comedi/drivers/ni_atmio16d.c b/drivers/comedi/drivers/ni_atmio16d.c index 9fa902529a8e..e5e7cc423c87 100644 --- a/drivers/comedi/drivers/ni_atmio16d.c +++ b/drivers/comedi/drivers/ni_atmio16d.c @@ -677,7 +677,7 @@ static int atmio16d_attach(struct comedi_device *dev, /* 8255 subdevice */ s = &dev->subdevices[3]; if (board->has_8255) { - ret = subdev_8255_init(dev, s, NULL, 0x00); + ret = subdev_8255_io_init(dev, s, 0x00); if (ret) return ret; } else { diff --git a/drivers/comedi/drivers/ni_daq_dio24.c b/drivers/comedi/drivers/ni_daq_dio24.c index 487733111023..9419caf02edc 100644 --- a/drivers/comedi/drivers/ni_daq_dio24.c +++ b/drivers/comedi/drivers/ni_daq_dio24.c @@ -45,7 +45,7 @@ static int dio24_auto_attach(struct comedi_device *dev, /* 8255 dio */ s = &dev->subdevices[0]; - return subdev_8255_init(dev, s, NULL, 0x00); + return subdev_8255_io_init(dev, s, 0x00); } static struct comedi_driver driver_dio24 = { diff --git a/drivers/comedi/drivers/ni_labpc_common.c b/drivers/comedi/drivers/ni_labpc_common.c index eb8f6431276a..5d5c1d0e9cb6 100644 --- a/drivers/comedi/drivers/ni_labpc_common.c +++ b/drivers/comedi/drivers/ni_labpc_common.c @@ -1287,9 +1287,9 @@ int labpc_common_attach(struct comedi_device *dev, /* 8255 dio */ s = &dev->subdevices[2]; if (dev->mmio) - ret = subdev_8255_mm_init(dev, s, NULL, DIO_BASE_REG); + ret = subdev_8255_mm_init(dev, s, DIO_BASE_REG); else - ret = subdev_8255_init(dev, s, NULL, DIO_BASE_REG); + ret = subdev_8255_io_init(dev, s, DIO_BASE_REG); if (ret) return ret; diff --git a/drivers/comedi/drivers/ni_mio_common.c b/drivers/comedi/drivers/ni_mio_common.c index d39998565808..638be08b43e4 100644 --- a/drivers/comedi/drivers/ni_mio_common.c +++ b/drivers/comedi/drivers/ni_mio_common.c @@ -6137,8 +6137,8 @@ static int ni_E_init(struct comedi_device *dev, /* 8255 device */ s = &dev->subdevices[NI_8255_DIO_SUBDEV]; if (board->has_8255) { - ret = subdev_8255_init(dev, s, ni_8255_callback, - NI_E_8255_BASE); + ret = subdev_8255_cb_init(dev, s, ni_8255_callback, + NI_E_8255_BASE); if (ret) return ret; } else { diff --git a/drivers/comedi/drivers/pcl724.c b/drivers/comedi/drivers/pcl724.c index 948a0576c9ef..00474710b81f 100644 --- a/drivers/comedi/drivers/pcl724.c +++ b/drivers/comedi/drivers/pcl724.c @@ -124,10 +124,10 @@ static int pcl724_attach(struct comedi_device *dev, s = &dev->subdevices[i]; if (board->is_pet48) { iobase = dev->iobase + (i * 0x1000); - ret = subdev_8255_init(dev, s, pcl724_8255mapped_io, - iobase); + ret = subdev_8255_cb_init(dev, s, pcl724_8255mapped_io, + iobase); } else { - ret = subdev_8255_init(dev, s, NULL, i * I8255_SIZE); + ret = subdev_8255_io_init(dev, s, i * I8255_SIZE); } if (ret) return ret; diff --git a/drivers/comedi/drivers/pcm3724.c b/drivers/comedi/drivers/pcm3724.c index ca8bef54dacc..fb41de3baef8 100644 --- a/drivers/comedi/drivers/pcm3724.c +++ b/drivers/comedi/drivers/pcm3724.c @@ -204,7 +204,7 @@ static int pcm3724_attach(struct comedi_device *dev, for (i = 0; i < dev->n_subdevices; i++) { s = &dev->subdevices[i]; - ret = subdev_8255_init(dev, s, NULL, i * I8255_SIZE); + ret = subdev_8255_io_init(dev, s, i * I8255_SIZE); if (ret) return ret; s->insn_config = subdev_3724_insn_config; diff --git a/include/linux/comedi/comedi_8255.h b/include/linux/comedi/comedi_8255.h index b2a5bc6b3a49..b396fcfbf8b0 100644 --- a/include/linux/comedi/comedi_8255.h +++ b/include/linux/comedi/comedi_8255.h @@ -27,16 +27,17 @@ struct comedi_device; struct comedi_subdevice; -int subdev_8255_init(struct comedi_device *dev, struct comedi_subdevice *s, - int (*io)(struct comedi_device *dev, int dir, int port, - int data, unsigned long regbase), - unsigned long regbase); +int subdev_8255_io_init(struct comedi_device *dev, struct comedi_subdevice *s, + unsigned long regbase); int subdev_8255_mm_init(struct comedi_device *dev, struct comedi_subdevice *s, - int (*io)(struct comedi_device *dev, int dir, int port, - int data, unsigned long regbase), unsigned long regbase); +int subdev_8255_cb_init(struct comedi_device *dev, struct comedi_subdevice *s, + int (*io)(struct comedi_device *dev, int dir, int port, + int data, unsigned long context), + unsigned long context); + unsigned long subdev_8255_regbase(struct comedi_subdevice *s); #endif -- cgit v1.2.3 From 7187a0939a1773e6a2663a02a6c456047a5e6289 Mon Sep 17 00:00:00 2001 From: Ian Abbott Date: Wed, 13 Sep 2023 18:07:06 +0100 Subject: comedi: comedi_8255: Conditionally remove I/O port support In a future patch, the port I/O functions (`inb()`, `outb()`, and friends will only be declared in the `HAS_IOPORT` configuration option is enabled. The comedi_8255 module supports both port I/O and memory-mapped I/O. Conditionally compile the parts of the module that use port I/O if and only if the `CONFIG_HAS_IOPORT` macro is defined so that it can still be built if the port I/O functions have not been declared. If the `CONFIG_HAS_IOPORT` macro is undefined, replace the GPL-exported `subdev_8255_io_init()` function with a dummy static inline version that just returns `-ENXIO`. Cc: Arnd Bergmann Cc: Niklas Schnelle Signed-off-by: Ian Abbott Link: https://lore.kernel.org/r/20230913170712.111719-8-abbotti@mev.co.uk Signed-off-by: Greg Kroah-Hartman --- drivers/comedi/drivers/comedi_8255.c | 8 ++++++++ include/linux/comedi/comedi_8255.h | 11 +++++++++++ 2 files changed, 19 insertions(+) (limited to 'include/linux') diff --git a/drivers/comedi/drivers/comedi_8255.c b/drivers/comedi/drivers/comedi_8255.c index 28fd9d8c95cc..e4974b508328 100644 --- a/drivers/comedi/drivers/comedi_8255.c +++ b/drivers/comedi/drivers/comedi_8255.c @@ -38,6 +38,8 @@ struct subdev_8255_private { unsigned long context); }; +#ifdef CONFIG_HAS_IOPORT + static int subdev_8255_io(struct comedi_device *dev, int dir, int port, int data, unsigned long regbase) { @@ -48,6 +50,8 @@ static int subdev_8255_io(struct comedi_device *dev, return inb(dev->iobase + regbase + port); } +#endif /* CONFIG_HAS_IOPORT */ + static int subdev_8255_mmio(struct comedi_device *dev, int dir, int port, int data, unsigned long regbase) { @@ -169,6 +173,8 @@ static int __subdev_8255_init(struct comedi_device *dev, return 0; } +#ifdef CONFIG_HAS_IOPORT + /** * subdev_8255_io_init - initialize DIO subdevice for driving I/O mapped 8255 * @dev: comedi device owning subdevice @@ -186,6 +192,8 @@ int subdev_8255_io_init(struct comedi_device *dev, struct comedi_subdevice *s, } EXPORT_SYMBOL_GPL(subdev_8255_io_init); +#endif /* CONFIG_HAS_IOPORT */ + /** * subdev_8255_mm_init - initialize DIO subdevice for driving mmio-mapped 8255 * @dev: comedi device owning subdevice diff --git a/include/linux/comedi/comedi_8255.h b/include/linux/comedi/comedi_8255.h index b396fcfbf8b0..d24a69da389b 100644 --- a/include/linux/comedi/comedi_8255.h +++ b/include/linux/comedi/comedi_8255.h @@ -10,6 +10,8 @@ #ifndef _COMEDI_8255_H #define _COMEDI_8255_H +#include + #define I8255_SIZE 0x04 #define I8255_DATA_A_REG 0x00 @@ -27,8 +29,17 @@ struct comedi_device; struct comedi_subdevice; +#ifdef CONFIG_HAS_IOPORT int subdev_8255_io_init(struct comedi_device *dev, struct comedi_subdevice *s, unsigned long regbase); +#else +static inline int subdev_8255_io_init(struct comedi_device *dev, + struct comedi_subdevice *s, + unsigned long regbase) +{ + return -ENXIO; +} +#endif int subdev_8255_mm_init(struct comedi_device *dev, struct comedi_subdevice *s, unsigned long regbase); -- cgit v1.2.3 From 77f048bcbf07f7dc961f3b2b7815038b5405ec60 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sat, 30 Sep 2023 11:14:47 +0200 Subject: comedi: Annotate struct comedi_lrange with __counted_by Prepare for the coming implementation by GCC and Clang of the __counted_by attribute. Flexible array members annotated with __counted_by can have their accesses bounds-checked at run-time checking via CONFIG_UBSAN_BOUNDS (for array indexing) and CONFIG_FORTIFY_SOURCE (for strcpy/memcpy-family functions). Signed-off-by: Christophe JAILLET Reviewed-by: Kees Cook Reviewed-by: "Gustavo A. R. Silva" Link: https://lore.kernel.org/r/5c3b7459b820e22e2ac6ce892d4aadcc119cc919.1696065263.git.christophe.jaillet@wanadoo.fr Signed-off-by: Greg Kroah-Hartman --- include/linux/comedi/comedidev.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/comedi/comedidev.h b/include/linux/comedi/comedidev.h index 0a1150900ef3..c08416a7364b 100644 --- a/include/linux/comedi/comedidev.h +++ b/include/linux/comedi/comedidev.h @@ -633,7 +633,7 @@ extern const struct comedi_lrange range_unknown; */ struct comedi_lrange { int length; - struct comedi_krange range[]; + struct comedi_krange range[] __counted_by(length); }; /** -- cgit v1.2.3 From 0fedefd4c4e33dd24f726b13b5d7c143e2b483be Mon Sep 17 00:00:00 2001 From: Valentine Sinitsyn Date: Mon, 25 Sep 2023 11:40:12 +0300 Subject: kernfs: sysfs: support custom llseek method for sysfs entries As of now, seeking in sysfs files is handled by generic_file_llseek(). There are situations where one may want to customize seeking logic: - Many sysfs entries are fixed files while generic_file_llseek() accepts past-the-end positions. Not only being useless by itself, this also means a bug in userspace code will trigger not at lseek(), but at some later point making debugging harder. - generic_file_llseek() relies on f_mapping->host to get the file size which might not be correct for all sysfs entries. See commit 636b21b50152 ("PCI: Revoke mappings like devmem") as an example. Implement llseek method to override this behavior at sysfs attribute level. The method is optional, and if it is absent, generic_file_llseek() is called to preserve backwards compatibility. Signed-off-by: Valentine Sinitsyn Link: https://lore.kernel.org/r/20230925084013.309399-1-valesini@yandex-team.ru Signed-off-by: Greg Kroah-Hartman --- fs/kernfs/file.c | 29 ++++++++++++++++++++++++++++- fs/sysfs/file.c | 13 +++++++++++++ include/linux/kernfs.h | 1 + include/linux/sysfs.h | 2 ++ 4 files changed, 44 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c index 180906c36f51..855e3f9d8dcc 100644 --- a/fs/kernfs/file.c +++ b/fs/kernfs/file.c @@ -903,6 +903,33 @@ static __poll_t kernfs_fop_poll(struct file *filp, poll_table *wait) return ret; } +static loff_t kernfs_fop_llseek(struct file *file, loff_t offset, int whence) +{ + struct kernfs_open_file *of = kernfs_of(file); + const struct kernfs_ops *ops; + loff_t ret; + + /* + * @of->mutex nests outside active ref and is primarily to ensure that + * the ops aren't called concurrently for the same open file. + */ + mutex_lock(&of->mutex); + if (!kernfs_get_active(of->kn)) { + mutex_unlock(&of->mutex); + return -ENODEV; + } + + ops = kernfs_ops(of->kn); + if (ops->llseek) + ret = ops->llseek(of, offset, whence); + else + ret = generic_file_llseek(file, offset, whence); + + kernfs_put_active(of->kn); + mutex_unlock(&of->mutex); + return ret; +} + static void kernfs_notify_workfn(struct work_struct *work) { struct kernfs_node *kn; @@ -1005,7 +1032,7 @@ EXPORT_SYMBOL_GPL(kernfs_notify); const struct file_operations kernfs_file_fops = { .read_iter = kernfs_fop_read_iter, .write_iter = kernfs_fop_write_iter, - .llseek = generic_file_llseek, + .llseek = kernfs_fop_llseek, .mmap = kernfs_fop_mmap, .open = kernfs_fop_open, .release = kernfs_fop_release, diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c index a12ac0356c69..6b7652fb8050 100644 --- a/fs/sysfs/file.c +++ b/fs/sysfs/file.c @@ -167,6 +167,18 @@ static int sysfs_kf_bin_mmap(struct kernfs_open_file *of, return battr->mmap(of->file, kobj, battr, vma); } +static loff_t sysfs_kf_bin_llseek(struct kernfs_open_file *of, loff_t offset, + int whence) +{ + struct bin_attribute *battr = of->kn->priv; + struct kobject *kobj = of->kn->parent->priv; + + if (battr->llseek) + return battr->llseek(of->file, kobj, battr, offset, whence); + else + return generic_file_llseek(of->file, offset, whence); +} + static int sysfs_kf_bin_open(struct kernfs_open_file *of) { struct bin_attribute *battr = of->kn->priv; @@ -249,6 +261,7 @@ static const struct kernfs_ops sysfs_bin_kfops_mmap = { .write = sysfs_kf_bin_write, .mmap = sysfs_kf_bin_mmap, .open = sysfs_kf_bin_open, + .llseek = sysfs_kf_bin_llseek, }; int sysfs_add_file_mode_ns(struct kernfs_node *parent, diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h index 2a36f3218b51..99aaa050ccb7 100644 --- a/include/linux/kernfs.h +++ b/include/linux/kernfs.h @@ -316,6 +316,7 @@ struct kernfs_ops { struct poll_table_struct *pt); int (*mmap)(struct kernfs_open_file *of, struct vm_area_struct *vma); + loff_t (*llseek)(struct kernfs_open_file *of, loff_t offset, int whence); }; /* diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h index fd3fe5c8c17f..b717a70219f6 100644 --- a/include/linux/sysfs.h +++ b/include/linux/sysfs.h @@ -181,6 +181,8 @@ struct bin_attribute { char *, loff_t, size_t); ssize_t (*write)(struct file *, struct kobject *, struct bin_attribute *, char *, loff_t, size_t); + loff_t (*llseek)(struct file *, struct kobject *, struct bin_attribute *, + loff_t, int); int (*mmap)(struct file *, struct kobject *, struct bin_attribute *attr, struct vm_area_struct *vma); }; -- cgit v1.2.3 From eb7581deb4c2eef77f6368e1891e123b69349bb0 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 12 Sep 2023 19:53:12 +0300 Subject: resource: Constify resource crosscheck APIs Constify APIs: _contains(), _overlaps(), _intersection(), _union(). Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20230912165312.402422-3-andriy.shevchenko@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- include/linux/ioport.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ioport.h b/include/linux/ioport.h index 25d768d48970..14f5cfabbbc8 100644 --- a/include/linux/ioport.h +++ b/include/linux/ioport.h @@ -229,7 +229,7 @@ static inline unsigned long resource_ext_type(const struct resource *res) return res->flags & IORESOURCE_EXT_TYPE_BITS; } /* True iff r1 completely contains r2 */ -static inline bool resource_contains(struct resource *r1, struct resource *r2) +static inline bool resource_contains(const struct resource *r1, const struct resource *r2) { if (resource_type(r1) != resource_type(r2)) return false; @@ -239,13 +239,13 @@ static inline bool resource_contains(struct resource *r1, struct resource *r2) } /* True if any part of r1 overlaps r2 */ -static inline bool resource_overlaps(struct resource *r1, struct resource *r2) +static inline bool resource_overlaps(const struct resource *r1, const struct resource *r2) { return r1->start <= r2->end && r1->end >= r2->start; } -static inline bool -resource_intersection(struct resource *r1, struct resource *r2, struct resource *r) +static inline bool resource_intersection(const struct resource *r1, const struct resource *r2, + struct resource *r) { if (!resource_overlaps(r1, r2)) return false; @@ -254,8 +254,8 @@ resource_intersection(struct resource *r1, struct resource *r2, struct resource return true; } -static inline bool -resource_union(struct resource *r1, struct resource *r2, struct resource *r) +static inline bool resource_union(const struct resource *r1, const struct resource *r2, + struct resource *r) { if (!resource_overlaps(r1, r2)) return false; -- cgit v1.2.3 From 8a76356e7db02ec7b1913db06605e70294d94672 Mon Sep 17 00:00:00 2001 From: Matti Vaittinen Date: Wed, 27 Sep 2023 11:27:41 +0300 Subject: iio: improve doc for available_scan_mask The available_scan_mask is an array of bitmaps representing the channels which can be simultaneously enabled by the driver. In many cases, the hardware can offer more channels than what the user is interested in obtaining. In such cases, it may be preferred that only a subset of channels are enabled, and the driver reads only a subset of the channels from the hardware. Some devices can't support all channel combinations. For example, the BM1390 pressure sensor must always read the pressure data in order to acknowledge the watermark IRQ, while reading temperature can be omitted. So, the available scan masks would be 'pressure and temperature' and 'pressure only'. When IIO searches for the scan mask it asks the driver to use, it will pick the first suitable one from the 'available_scan_mask' array. Hence, ordering the masks in the array makes a difference. We should 'prefer' reading just the pressure from the hardware (as it is a cheaper operation than reading both pressure and temperature) over reading both pressure and temperature. Hence, we should set the 'only pressure' as the first scan mask in available_scan_mask array. If we set the 'pressure and temperature' as first in the array, then the 'only temperature' will never get used as 'pressure and temperature' can always serve the user's needs. Add (minimal) kerneldoc to the 'available_scan_mask' to hint the user that the ordering of masks matters. Signed-off-by: Matti Vaittinen Link: https://lore.kernel.org/r/4e43bf0186df5c8a56b470318b4827605f9cad6c.1695727471.git.mazziesaccount@gmail.com Signed-off-by: Jonathan Cameron --- include/linux/iio/iio.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/iio/iio.h b/include/linux/iio/iio.h index 202e55b0a28b..7bfa1b9bc8a2 100644 --- a/include/linux/iio/iio.h +++ b/include/linux/iio/iio.h @@ -556,7 +556,9 @@ struct iio_buffer_setup_ops { * and owner * @buffer: [DRIVER] any buffer present * @scan_bytes: [INTERN] num bytes captured to be fed to buffer demux - * @available_scan_masks: [DRIVER] optional array of allowed bitmasks + * @available_scan_masks: [DRIVER] optional array of allowed bitmasks. Sort the + * array in order of preference, the most preferred + * masks first. * @masklength: [INTERN] the length of the mask established from * channels * @active_scan_mask: [INTERN] union of all scan masks requested by buffers -- cgit v1.2.3 From b3a4dbc89d4021b3f90ff6a13537111a004f9d07 Mon Sep 17 00:00:00 2001 From: Gabriel Krisman Bertazi Date: Wed, 4 Oct 2023 20:05:31 -0400 Subject: io_uring/kbuf: Use slab for struct io_buffer objects The allocation of struct io_buffer for metadata of provided buffers is done through a custom allocator that directly gets pages and fragments them. But, slab would do just fine, as this is not a hot path (in fact, it is a deprecated feature) and, by keeping a custom allocator implementation we lose benefits like tracking, poisoning, sanitizers. Finally, the custom code is more complex and requires keeping the list of pages in struct ctx for no good reason. This patch cleans this path up and just uses slab. I microbenchmarked it by forcing the allocation of a large number of objects with the least number of io_uring commands possible (keeping nbufs=USHRT_MAX), with and without the patch. There is a slight increase in time spent in the allocation with slab, of course, but even when allocating to system resources exhaustion, which is not very realistic and happened around 1/2 billion provided buffers for me, it wasn't a significant hit in system time. Specially if we think of a real-world scenario, an application doing register/unregister of provided buffers will hit ctx->io_buffers_cache more often than actually going to slab. Signed-off-by: Gabriel Krisman Bertazi Link: https://lore.kernel.org/r/20231005000531.30800-4-krisman@suse.de Signed-off-by: Jens Axboe --- include/linux/io_uring_types.h | 2 -- io_uring/io_uring.c | 4 +++- io_uring/io_uring.h | 1 + io_uring/kbuf.c | 47 +++++++++++++++++++++++------------------- 4 files changed, 30 insertions(+), 24 deletions(-) (limited to 'include/linux') diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index e178461fa513..e4e67899b134 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -350,8 +350,6 @@ struct io_ring_ctx { struct wait_queue_head rsrc_quiesce_wq; unsigned rsrc_quiesce; - struct list_head io_buffers_pages; - #if defined(CONFIG_UNIX) struct socket *ring_sock; #endif diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 08c9ea46bb95..b9e1af5772f3 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -339,7 +339,6 @@ static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p) spin_lock_init(&ctx->completion_lock); spin_lock_init(&ctx->timeout_lock); INIT_WQ_LIST(&ctx->iopoll_list); - INIT_LIST_HEAD(&ctx->io_buffers_pages); INIT_LIST_HEAD(&ctx->io_buffers_comp); INIT_LIST_HEAD(&ctx->defer_list); INIT_LIST_HEAD(&ctx->timeout_list); @@ -4720,6 +4719,9 @@ static int __init io_uring_init(void) SLAB_ACCOUNT | SLAB_TYPESAFE_BY_RCU, offsetof(struct io_kiocb, cmd.data), sizeof_field(struct io_kiocb, cmd.data), NULL); + io_buf_cachep = kmem_cache_create("io_buffer", sizeof(struct io_buffer), 0, + SLAB_HWCACHE_ALIGN | SLAB_PANIC | SLAB_ACCOUNT, + NULL); #ifdef CONFIG_SYSCTL register_sysctl_init("kernel", kernel_io_uring_disabled_table); diff --git a/io_uring/io_uring.h b/io_uring/io_uring.h index 547c30582fb8..2ff719ae1b57 100644 --- a/io_uring/io_uring.h +++ b/io_uring/io_uring.h @@ -330,6 +330,7 @@ static inline bool io_req_cache_empty(struct io_ring_ctx *ctx) } extern struct kmem_cache *req_cachep; +extern struct kmem_cache *io_buf_cachep; static inline struct io_kiocb *io_extract_req(struct io_ring_ctx *ctx) { diff --git a/io_uring/kbuf.c b/io_uring/kbuf.c index 12a357348733..d5a04467666f 100644 --- a/io_uring/kbuf.c +++ b/io_uring/kbuf.c @@ -22,6 +22,8 @@ /* BIDs are addressed by a 16-bit field in a CQE */ #define MAX_BIDS_PER_BGID (1 << 16) +struct kmem_cache *io_buf_cachep; + struct io_provide_buf { struct file *file; __u64 addr; @@ -258,6 +260,8 @@ static int __io_remove_buffers(struct io_ring_ctx *ctx, void io_destroy_buffers(struct io_ring_ctx *ctx) { struct io_buffer_list *bl; + struct list_head *item, *tmp; + struct io_buffer *buf; unsigned long index; int i; @@ -273,12 +277,9 @@ void io_destroy_buffers(struct io_ring_ctx *ctx) kfree(bl); } - while (!list_empty(&ctx->io_buffers_pages)) { - struct page *page; - - page = list_first_entry(&ctx->io_buffers_pages, struct page, lru); - list_del_init(&page->lru); - __free_page(page); + list_for_each_safe(item, tmp, &ctx->io_buffers_cache) { + buf = list_entry(item, struct io_buffer, list); + kmem_cache_free(io_buf_cachep, buf); } } @@ -361,11 +362,12 @@ int io_provide_buffers_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe return 0; } +#define IO_BUFFER_ALLOC_BATCH 64 + static int io_refill_buffer_cache(struct io_ring_ctx *ctx) { - struct io_buffer *buf; - struct page *page; - int bufs_in_page; + struct io_buffer *bufs[IO_BUFFER_ALLOC_BATCH]; + int allocated; /* * Completions that don't happen inline (eg not under uring_lock) will @@ -385,22 +387,25 @@ static int io_refill_buffer_cache(struct io_ring_ctx *ctx) /* * No free buffers and no completion entries either. Allocate a new - * page worth of buffer entries and add those to our freelist. + * batch of buffer entries and add those to our freelist. */ - page = alloc_page(GFP_KERNEL_ACCOUNT); - if (!page) - return -ENOMEM; - list_add(&page->lru, &ctx->io_buffers_pages); - - buf = page_address(page); - bufs_in_page = PAGE_SIZE / sizeof(*buf); - while (bufs_in_page) { - list_add_tail(&buf->list, &ctx->io_buffers_cache); - buf++; - bufs_in_page--; + allocated = kmem_cache_alloc_bulk(io_buf_cachep, GFP_KERNEL_ACCOUNT, + ARRAY_SIZE(bufs), (void **) bufs); + if (unlikely(!allocated)) { + /* + * Bulk alloc is all-or-nothing. If we fail to get a batch, + * retry single alloc to be on the safe side. + */ + bufs[0] = kmem_cache_alloc(io_buf_cachep, GFP_KERNEL); + if (!bufs[0]) + return -ENOMEM; + allocated = 1; } + while (allocated) + list_add_tail(&bufs[--allocated]->list, &ctx->io_buffers_cache); + return 0; } -- cgit v1.2.3 From 2819f23ac12ce93ff79ca7a54597df9a4a1f6331 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Thu, 5 Oct 2023 09:13:48 -0400 Subject: eventfs: Use eventfs_remove_events_dir() The update to removing the eventfs_file changed the way the events top level directory was handled. Instead of returning a dentry, it now returns the eventfs_inode. In this changed, the removing of the events top level directory is not much different than removing any of the other directories. Because of this, the removal just called eventfs_remove_dir() instead of eventfs_remove_events_dir(). Although eventfs_remove_dir() does the clean up, it misses out on the dget() of the ei->dentry done in eventfs_create_events_dir(). It makes more sense to match eventfs_create_events_dir() with a specific function eventfs_remove_events_dir() and this specific function can then perform the dput() to the dentry that had the dget() when it was created. Fixes: 5790b1fb3d67 ("eventfs: Remove eventfs_file and just use eventfs_inode") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202310051743.y9EobbUr-lkp@intel.com/ Signed-off-by: Steven Rostedt (Google) --- fs/tracefs/event_inode.c | 19 +++++++------------ include/linux/tracefs.h | 1 + kernel/trace/trace_events.c | 2 +- 3 files changed, 9 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/fs/tracefs/event_inode.c b/fs/tracefs/event_inode.c index eab18b157ef5..1ccd100bc565 100644 --- a/fs/tracefs/event_inode.c +++ b/fs/tracefs/event_inode.c @@ -901,22 +901,17 @@ void eventfs_remove_dir(struct eventfs_inode *ei) } /** - * eventfs_remove_events_dir - remove eventfs dir or file from list - * @dentry: events's dentry to be removed. + * eventfs_remove_events_dir - remove the top level eventfs directory + * @ei: the event_inode returned by eventfs_create_events_dir(). * - * This function remove events main directory + * This function removes the events main directory */ -void eventfs_remove_events_dir(struct dentry *dentry) +void eventfs_remove_events_dir(struct eventfs_inode *ei) { - struct tracefs_inode *ti; - - if (!dentry || !dentry->d_inode) - return; + struct dentry *dentry = ei->dentry; - ti = get_tracefs(dentry->d_inode); - if (!ti || !(ti->flags & TRACEFS_EVENT_INODE)) - return; + eventfs_remove_dir(ei); - d_invalidate(dentry); + /* Matches the dget() from eventfs_create_events_dir() */ dput(dentry); } diff --git a/include/linux/tracefs.h b/include/linux/tracefs.h index 0c39704455d9..13359b1a35d1 100644 --- a/include/linux/tracefs.h +++ b/include/linux/tracefs.h @@ -41,6 +41,7 @@ struct eventfs_inode *eventfs_create_dir(const char *name, struct eventfs_inode const struct eventfs_entry *entries, int size, void *data); +void eventfs_remove_events_dir(struct eventfs_inode *ei); void eventfs_remove_dir(struct eventfs_inode *ei); struct dentry *tracefs_create_file(const char *name, umode_t mode, diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index a3b9d9423824..0e3a1c70e410 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -3872,7 +3872,7 @@ int event_trace_del_tracer(struct trace_array *tr) down_write(&trace_event_sem); __trace_remove_event_dirs(tr); - eventfs_remove_dir(tr->event_dir); + eventfs_remove_events_dir(tr->event_dir); up_write(&trace_event_sem); tr->event_dir = NULL; -- cgit v1.2.3 From 7e6f3b6d2c352b5fde37ce3fed83bdf6172eebd4 Mon Sep 17 00:00:00 2001 From: Vicki Pfau Date: Wed, 27 Sep 2023 13:22:12 -0700 Subject: PCI: Prevent xHCI driver from claiming AMD VanGogh USB3 DRD device The AMD VanGogh SoC contains a DesignWare USB3 Dual-Role Device that can be operated as either a USB Host or a USB Device, similar to on the AMD Nolan platform. be6646bfbaec ("PCI: Prevent xHCI driver from claiming AMD Nolan USB3 DRD device") added a quirk to let the dwc3 driver claim the Nolan device since it provides more specific support. Extend that quirk to include the VanGogh SoC USB3 device. Link: https://lore.kernel.org/r/20230927202212.2388216-1-vi@endrift.com Signed-off-by: Vicki Pfau [bhelgaas: include be6646bfbaec reference, add stable tag] Signed-off-by: Bjorn Helgaas Cc: stable@vger.kernel.org # v3.19+ --- drivers/pci/quirks.c | 8 +++++--- include/linux/pci_ids.h | 1 + 2 files changed, 6 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index eeec1d6f9023..e3e915329510 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -690,7 +690,7 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_RS100, quirk_ati_ /* * In the AMD NL platform, this device ([1022:7912]) has a class code of * PCI_CLASS_SERIAL_USB_XHCI (0x0c0330), which means the xhci driver will - * claim it. + * claim it. The same applies on the VanGogh platform device ([1022:163a]). * * But the dwc3 driver is a more specific driver for this device, and we'd * prefer to use it instead of xhci. To prevent xhci from claiming the @@ -698,7 +698,7 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_RS100, quirk_ati_ * defines as "USB device (not host controller)". The dwc3 driver can then * claim it based on its Vendor and Device ID. */ -static void quirk_amd_nl_class(struct pci_dev *pdev) +static void quirk_amd_dwc_class(struct pci_dev *pdev) { u32 class = pdev->class; @@ -708,7 +708,9 @@ static void quirk_amd_nl_class(struct pci_dev *pdev) class, pdev->class); } DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_NL_USB, - quirk_amd_nl_class); + quirk_amd_dwc_class); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_VANGOGH_USB, + quirk_amd_dwc_class); /* * Synopsys USB 3.x host HAPS platform has a class code of diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 5fb3d4c393a9..3a8e24e9a93f 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -579,6 +579,7 @@ #define PCI_DEVICE_ID_AMD_1AH_M00H_DF_F3 0x12c3 #define PCI_DEVICE_ID_AMD_1AH_M20H_DF_F3 0x16fb #define PCI_DEVICE_ID_AMD_MI200_DF_F3 0x14d3 +#define PCI_DEVICE_ID_AMD_VANGOGH_USB 0x163a #define PCI_DEVICE_ID_AMD_CNB17H_F3 0x1703 #define PCI_DEVICE_ID_AMD_LANCE 0x2000 #define PCI_DEVICE_ID_AMD_LANCE_HOME 0x2001 -- cgit v1.2.3 From 5b9ceb63c49b9934cf2ec70b3b76951927e50a24 Mon Sep 17 00:00:00 2001 From: John Sanpe Date: Fri, 15 Sep 2023 22:06:50 +0800 Subject: logic_pio: Remove logic_outb(), _outw(), outl() duplicate declarations Remove duplicate declarations of logic_out* functions. Link: https://lore.kernel.org/r/20230915140650.3562504-1-sanpeqf@gmail.com Signed-off-by: John Sanpe Signed-off-by: Bjorn Helgaas --- include/linux/logic_pio.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/logic_pio.h b/include/linux/logic_pio.h index 54945aa824b4..babf4e3c28ba 100644 --- a/include/linux/logic_pio.h +++ b/include/linux/logic_pio.h @@ -39,9 +39,6 @@ struct logic_pio_host_ops { #ifdef CONFIG_INDIRECT_PIO u8 logic_inb(unsigned long addr); -void logic_outb(u8 value, unsigned long addr); -void logic_outw(u16 value, unsigned long addr); -void logic_outl(u32 value, unsigned long addr); u16 logic_inw(unsigned long addr); u32 logic_inl(unsigned long addr); void logic_outb(u8 value, unsigned long addr); -- cgit v1.2.3 From 7151d87a175c6618fe81705755eb3dc4199cad4e Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Wed, 27 Sep 2023 10:31:30 +0200 Subject: virtchnl: Add header dependencies The uses BIT, struct_size and ETH_ALEN macros but does not include appropriate header files that defines them. Add these dependencies so this header file can be included anywhere. Signed-off-by: Ivan Vecera Reviewed-by: Przemek Kitszel Reviewed-by: Jesse Brandeburg Reviewed-by: Aleksandr Loktionov Signed-off-by: Tony Nguyen --- include/linux/avf/virtchnl.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/avf/virtchnl.h b/include/linux/avf/virtchnl.h index dd71d3009771..6b3acf15be5c 100644 --- a/include/linux/avf/virtchnl.h +++ b/include/linux/avf/virtchnl.h @@ -4,6 +4,10 @@ #ifndef _VIRTCHNL_H_ #define _VIRTCHNL_H_ +#include +#include +#include + /* Description: * This header file describes the Virtual Function (VF) - Physical Function * (PF) communication protocol used by the drivers for all devices starting -- cgit v1.2.3 From 9beebc2b5d0038a65977a7a14909598c64ce070f Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Thu, 28 Sep 2023 09:24:28 +0200 Subject: can: dev: add can_state_get_by_berr_counter() to return the CAN state based on the current error counters Some CAN controllers do not have a register that contains the current CAN state, but only a register that contains the error counters. Introduce a new function can_state_get_by_berr_counter() that returns the current TX and RX state depending on the provided CAN bit error counters. Link: https://lore.kernel.org/all/20231005-at91_can-rx_offload-v2-1-9987d53600e0@pengutronix.de Signed-off-by: Marc Kleine-Budde --- drivers/net/can/dev/dev.c | 22 ++++++++++++++++++++++ include/linux/can/dev.h | 4 ++++ 2 files changed, 26 insertions(+) (limited to 'include/linux') diff --git a/drivers/net/can/dev/dev.c b/drivers/net/can/dev/dev.c index 82b12902fc35..3a3be5cdfc1f 100644 --- a/drivers/net/can/dev/dev.c +++ b/drivers/net/can/dev/dev.c @@ -90,6 +90,28 @@ const char *can_get_state_str(const enum can_state state) } EXPORT_SYMBOL_GPL(can_get_state_str); +static enum can_state can_state_err_to_state(u16 err) +{ + if (err < CAN_ERROR_WARNING_THRESHOLD) + return CAN_STATE_ERROR_ACTIVE; + if (err < CAN_ERROR_PASSIVE_THRESHOLD) + return CAN_STATE_ERROR_WARNING; + if (err < CAN_BUS_OFF_THRESHOLD) + return CAN_STATE_ERROR_PASSIVE; + + return CAN_STATE_BUS_OFF; +} + +void can_state_get_by_berr_counter(const struct net_device *dev, + const struct can_berr_counter *bec, + enum can_state *tx_state, + enum can_state *rx_state) +{ + *tx_state = can_state_err_to_state(bec->txerr); + *rx_state = can_state_err_to_state(bec->rxerr); +} +EXPORT_SYMBOL_GPL(can_state_get_by_berr_counter); + void can_change_state(struct net_device *dev, struct can_frame *cf, enum can_state tx_state, enum can_state rx_state) { diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h index 982ba245eb41..1b92aed49363 100644 --- a/include/linux/can/dev.h +++ b/include/linux/can/dev.h @@ -195,6 +195,10 @@ int can_restart_now(struct net_device *dev); void can_bus_off(struct net_device *dev); const char *can_get_state_str(const enum can_state state); +void can_state_get_by_berr_counter(const struct net_device *dev, + const struct can_berr_counter *bec, + enum can_state *tx_state, + enum can_state *rx_state); void can_change_state(struct net_device *dev, struct can_frame *cf, enum can_state tx_state, enum can_state rx_state); -- cgit v1.2.3 From 9c8c3fa3a52bc55696ccc4dfcb8a49f969b5fb0e Mon Sep 17 00:00:00 2001 From: Akihiko Odaki Date: Thu, 5 Oct 2023 16:21:36 +0900 Subject: bpf: Fix the comment for bpf_restore_data_end() The comment used to say: > Restore data saved by bpf_compute_data_pointers(). But bpf_compute_data_pointers() does not save the data; bpf_compute_and_save_data_end() does. Signed-off-by: Akihiko Odaki Acked-by: Stanislav Fomichev Link: https://lore.kernel.org/r/20231005072137.29870-1-akihiko.odaki@daynix.com Signed-off-by: Martin KaFai Lau --- include/linux/filter.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index 27406aee2d40..ff7ecc89d3dd 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -736,7 +736,7 @@ static inline void bpf_compute_and_save_data_end( cb->data_end = skb->data + skb_headlen(skb); } -/* Restore data saved by bpf_compute_data_pointers(). */ +/* Restore data saved by bpf_compute_and_save_data_end(). */ static inline void bpf_restore_data_end( struct sk_buff *skb, void *saved_data_end) { -- cgit v1.2.3 From 3fbc5c3b8522d655cf91d32c158261060fdc02fe Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Mon, 25 Sep 2023 15:17:07 +0200 Subject: PM: domains: Introduce dev_pm_domain_set_performance_state() The generic PM domain is currently the only PM domain variant that supports performance scaling. To allow performance scaling to be supported through a common interface, let's add an optional callback ->set_performance_state(), in the struct dev_pm_domain. Moreover, let's add a function, dev_pm_domain_set_performance_state(), that may be called by consumers to request a new performance state for a device through its PM domain. Note that, in most cases it's preferred that a consumer use the OPP library to request a new performance state for its device. Although, this requires some additional changes to be supported, which are being implemented from subsequent changes. Signed-off-by: Ulf Hansson Acked-by: Rafael J. Wysocki Signed-off-by: Viresh Kumar --- drivers/base/power/common.c | 21 +++++++++++++++++++++ include/linux/pm.h | 2 ++ include/linux/pm_domain.h | 6 ++++++ 3 files changed, 29 insertions(+) (limited to 'include/linux') diff --git a/drivers/base/power/common.c b/drivers/base/power/common.c index 72115917e0bd..44ec20918a4d 100644 --- a/drivers/base/power/common.c +++ b/drivers/base/power/common.c @@ -228,3 +228,24 @@ void dev_pm_domain_set(struct device *dev, struct dev_pm_domain *pd) device_pm_check_callbacks(dev); } EXPORT_SYMBOL_GPL(dev_pm_domain_set); + +/** + * dev_pm_domain_set_performance_state - Request a new performance state. + * @dev: The device to make the request for. + * @state: Target performance state for the device. + * + * This function should be called when a new performance state needs to be + * requested for a device that is attached to a PM domain. Note that, the + * support for performance scaling for PM domains is optional. + * + * Returns 0 on success and when performance scaling isn't supported, negative + * error code on failure. + */ +int dev_pm_domain_set_performance_state(struct device *dev, unsigned int state) +{ + if (dev->pm_domain && dev->pm_domain->set_performance_state) + return dev->pm_domain->set_performance_state(dev, state); + + return 0; +} +EXPORT_SYMBOL_GPL(dev_pm_domain_set_performance_state); diff --git a/include/linux/pm.h b/include/linux/pm.h index 1400c37b29c7..4c9f571609c8 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -719,6 +719,7 @@ extern void dev_pm_put_subsys_data(struct device *dev); * @activate: Called before executing probe routines for bus types and drivers. * @sync: Called after successful driver probe. * @dismiss: Called after unsuccessful driver probe and after driver removal. + * @set_performance_state: Called to request a new performance state. * * Power domains provide callbacks that are executed during system suspend, * hibernation, system resume and during runtime PM transitions instead of @@ -731,6 +732,7 @@ struct dev_pm_domain { int (*activate)(struct device *dev); void (*sync)(struct device *dev); void (*dismiss)(struct device *dev); + int (*set_performance_state)(struct device *dev, unsigned int state); }; /* diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index f776fb93eaa0..bda2964a0a56 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -430,6 +430,7 @@ struct device *dev_pm_domain_attach_by_name(struct device *dev, void dev_pm_domain_detach(struct device *dev, bool power_off); int dev_pm_domain_start(struct device *dev); void dev_pm_domain_set(struct device *dev, struct dev_pm_domain *pd); +int dev_pm_domain_set_performance_state(struct device *dev, unsigned int state); #else static inline int dev_pm_domain_attach(struct device *dev, bool power_on) { @@ -452,6 +453,11 @@ static inline int dev_pm_domain_start(struct device *dev) } static inline void dev_pm_domain_set(struct device *dev, struct dev_pm_domain *pd) {} +static inline int dev_pm_domain_set_performance_state(struct device *dev, + unsigned int state) +{ + return 0; +} #endif #endif /* _LINUX_PM_DOMAIN_H */ -- cgit v1.2.3 From 248a38d5cc3f3505e6cfbbc0514435c9f1ba00af Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Mon, 25 Sep 2023 15:17:09 +0200 Subject: OPP: Add dev_pm_opp_add_dynamic() to allow more flexibility The dev_pm_opp_add() API is limited to add dynamic OPPs with a frequency and a voltage level. To enable more flexibility, let's add a new API, dev_pm_opp_add_dynamic() that's takes a struct dev_pm_opp_data* instead of a list of in-parameters. Signed-off-by: Ulf Hansson Signed-off-by: Viresh Kumar --- drivers/opp/core.c | 22 ++++++++++------------ drivers/opp/of.c | 10 ++++++---- drivers/opp/opp.h | 2 +- include/linux/pm_opp.h | 29 +++++++++++++++++++++++++---- 4 files changed, 42 insertions(+), 21 deletions(-) (limited to 'include/linux') diff --git a/drivers/opp/core.c b/drivers/opp/core.c index 919cc53bc02e..54b6138e1189 100644 --- a/drivers/opp/core.c +++ b/drivers/opp/core.c @@ -2002,8 +2002,7 @@ int _opp_add(struct device *dev, struct dev_pm_opp *new_opp, * _opp_add_v1() - Allocate a OPP based on v1 bindings. * @opp_table: OPP table * @dev: device for which we do this operation - * @freq: Frequency in Hz for this OPP - * @u_volt: Voltage in uVolts for this OPP + * @data: The OPP data for the OPP to add * @dynamic: Dynamically added OPPs. * * This function adds an opp definition to the opp table and returns status. @@ -2021,10 +2020,10 @@ int _opp_add(struct device *dev, struct dev_pm_opp *new_opp, * -ENOMEM Memory allocation failure */ int _opp_add_v1(struct opp_table *opp_table, struct device *dev, - unsigned long freq, long u_volt, bool dynamic) + struct dev_pm_opp_data *data, bool dynamic) { struct dev_pm_opp *new_opp; - unsigned long tol; + unsigned long tol, u_volt = data->u_volt; int ret; if (!assert_single_clk(opp_table)) @@ -2035,7 +2034,7 @@ int _opp_add_v1(struct opp_table *opp_table, struct device *dev, return -ENOMEM; /* populate the opp table */ - new_opp->rates[0] = freq; + new_opp->rates[0] = data->freq; tol = u_volt * opp_table->voltage_tolerance_v1 / 100; new_opp->supplies[0].u_volt = u_volt; new_opp->supplies[0].u_volt_min = u_volt - tol; @@ -2825,10 +2824,9 @@ unlock: } /** - * dev_pm_opp_add() - Add an OPP table from a table definitions - * @dev: device for which we do this operation - * @freq: Frequency in Hz for this OPP - * @u_volt: Voltage in uVolts for this OPP + * dev_pm_opp_add_dynamic() - Add an OPP table from a table definitions + * @dev: The device for which we do this operation + * @data: The OPP data for the OPP to add * * This function adds an opp definition to the opp table and returns status. * The opp is made available by default and it can be controlled using @@ -2841,7 +2839,7 @@ unlock: * Duplicate OPPs (both freq and volt are same) and !opp->available * -ENOMEM Memory allocation failure */ -int dev_pm_opp_add(struct device *dev, unsigned long freq, unsigned long u_volt) +int dev_pm_opp_add_dynamic(struct device *dev, struct dev_pm_opp_data *data) { struct opp_table *opp_table; int ret; @@ -2853,13 +2851,13 @@ int dev_pm_opp_add(struct device *dev, unsigned long freq, unsigned long u_volt) /* Fix regulator count for dynamic OPPs */ opp_table->regulator_count = 1; - ret = _opp_add_v1(opp_table, dev, freq, u_volt, true); + ret = _opp_add_v1(opp_table, dev, data, true); if (ret) dev_pm_opp_put_opp_table(opp_table); return ret; } -EXPORT_SYMBOL_GPL(dev_pm_opp_add); +EXPORT_SYMBOL_GPL(dev_pm_opp_add_dynamic); /** * _opp_set_availability() - helper to set the availability of an opp diff --git a/drivers/opp/of.c b/drivers/opp/of.c index ada4963c7cfa..ade6d42cae46 100644 --- a/drivers/opp/of.c +++ b/drivers/opp/of.c @@ -1077,13 +1077,15 @@ static int _of_add_opp_table_v1(struct device *dev, struct opp_table *opp_table) val = prop->value; while (nr) { - unsigned long freq = be32_to_cpup(val++) * 1000; - unsigned long volt = be32_to_cpup(val++); + struct dev_pm_opp_data data = { + .freq = be32_to_cpup(val++) * 1000, + .u_volt = be32_to_cpup(val++), + }; - ret = _opp_add_v1(opp_table, dev, freq, volt, false); + ret = _opp_add_v1(opp_table, dev, &data, false); if (ret) { dev_err(dev, "%s: Failed to add OPP %ld (%d)\n", - __func__, freq, ret); + __func__, data.freq, ret); goto remove_static_opp; } nr -= 2; diff --git a/drivers/opp/opp.h b/drivers/opp/opp.h index 8a5ea38f3a3d..fefdf9845692 100644 --- a/drivers/opp/opp.h +++ b/drivers/opp/opp.h @@ -251,7 +251,7 @@ struct dev_pm_opp *_opp_allocate(struct opp_table *opp_table); void _opp_free(struct dev_pm_opp *opp); int _opp_compare_key(struct opp_table *opp_table, struct dev_pm_opp *opp1, struct dev_pm_opp *opp2); int _opp_add(struct device *dev, struct dev_pm_opp *new_opp, struct opp_table *opp_table); -int _opp_add_v1(struct opp_table *opp_table, struct device *dev, unsigned long freq, long u_volt, bool dynamic); +int _opp_add_v1(struct opp_table *opp_table, struct device *dev, struct dev_pm_opp_data *data, bool dynamic); void _dev_pm_opp_cpumask_remove_table(const struct cpumask *cpumask, int last_cpu); struct opp_table *_add_opp_table_indexed(struct device *dev, int index, bool getclk); void _put_opp_list_kref(struct opp_table *opp_table); diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h index 91f87d7e807c..a8ee93ba41d8 100644 --- a/include/linux/pm_opp.h +++ b/include/linux/pm_opp.h @@ -92,6 +92,16 @@ struct dev_pm_opp_config { struct device ***virt_devs; }; +/** + * struct dev_pm_opp_data - The data to use to initialize an OPP. + * @freq: The clock rate in Hz for the OPP. + * @u_volt: The voltage in uV for the OPP. + */ +struct dev_pm_opp_data { + unsigned long freq; + unsigned long u_volt; +}; + #if defined(CONFIG_PM_OPP) struct opp_table *dev_pm_opp_get_opp_table(struct device *dev); @@ -152,8 +162,8 @@ struct dev_pm_opp *dev_pm_opp_find_bw_floor(struct device *dev, void dev_pm_opp_put(struct dev_pm_opp *opp); -int dev_pm_opp_add(struct device *dev, unsigned long freq, - unsigned long u_volt); +int dev_pm_opp_add_dynamic(struct device *dev, struct dev_pm_opp_data *opp); + void dev_pm_opp_remove(struct device *dev, unsigned long freq); void dev_pm_opp_remove_all_dynamic(struct device *dev); @@ -322,8 +332,8 @@ static inline struct dev_pm_opp *dev_pm_opp_find_bw_floor(struct device *dev, static inline void dev_pm_opp_put(struct dev_pm_opp *opp) {} -static inline int dev_pm_opp_add(struct device *dev, unsigned long freq, - unsigned long u_volt) +static inline int +dev_pm_opp_add_dynamic(struct device *dev, struct dev_pm_opp_data *opp) { return -EOPNOTSUPP; } @@ -519,6 +529,17 @@ static inline int dev_pm_opp_of_find_icc_paths(struct device *dev, struct opp_ta /* OPP Configuration helpers */ +static inline int dev_pm_opp_add(struct device *dev, unsigned long freq, + unsigned long u_volt) +{ + struct dev_pm_opp_data data = { + .freq = freq, + .u_volt = u_volt, + }; + + return dev_pm_opp_add_dynamic(dev, &data); +} + /* Regulators helpers */ static inline int dev_pm_opp_set_regulators(struct device *dev, const char * const names[]) -- cgit v1.2.3 From a0242c81bb759ef03184be8eddcc7d5bdf36cc16 Mon Sep 17 00:00:00 2001 From: Krishna chaitanya chundru Date: Thu, 7 Sep 2023 11:30:31 +0530 Subject: OPP: Add dev_pm_opp_find_level_floor() Add dev_pm_opp_find_level_floor(), as is done for frequency and bandwidth. Signed-off-by: Krishna chaitanya chundru [ Viresh: Updated commit log and rearranged code ] Signed-off-by: Viresh Kumar --- drivers/opp/core.c | 25 +++++++++++++++++++++++++ include/linux/pm_opp.h | 9 +++++++++ 2 files changed, 34 insertions(+) (limited to 'include/linux') diff --git a/drivers/opp/core.c b/drivers/opp/core.c index 8978e94c9ca1..cdac46698021 100644 --- a/drivers/opp/core.c +++ b/drivers/opp/core.c @@ -813,6 +813,31 @@ struct dev_pm_opp *dev_pm_opp_find_level_ceil(struct device *dev, } EXPORT_SYMBOL_GPL(dev_pm_opp_find_level_ceil); +/** + * dev_pm_opp_find_level_floor() - Search for a rounded floor level + * @dev: device for which we do this operation + * @level: Start level + * + * Search for the matching floor *available* OPP from a starting level + * for a device. + * + * Return: matching *opp and refreshes *level accordingly, else returns + * ERR_PTR in case of error and should be handled using IS_ERR. Error return + * values can be: + * EINVAL: for bad pointer + * ERANGE: no match found for search + * ENODEV: if device not found in list of registered devices + * + * The callers are required to call dev_pm_opp_put() for the returned OPP after + * use. + */ +struct dev_pm_opp *dev_pm_opp_find_level_floor(struct device *dev, + unsigned long *level) +{ + return _find_key_floor(dev, level, 0, true, _read_level, NULL); +} +EXPORT_SYMBOL_GPL(dev_pm_opp_find_level_floor); + /** * dev_pm_opp_find_bw_ceil() - Search for a rounded ceil bandwidth * @dev: device for which we do this operation diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h index 9ad168f4cbf1..ccd97bcef269 100644 --- a/include/linux/pm_opp.h +++ b/include/linux/pm_opp.h @@ -156,6 +156,9 @@ struct dev_pm_opp *dev_pm_opp_find_level_exact(struct device *dev, struct dev_pm_opp *dev_pm_opp_find_level_ceil(struct device *dev, unsigned int *level); +struct dev_pm_opp *dev_pm_opp_find_level_floor(struct device *dev, + unsigned long *level); + struct dev_pm_opp *dev_pm_opp_find_bw_ceil(struct device *dev, unsigned int *bw, int index); @@ -320,6 +323,12 @@ static inline struct dev_pm_opp *dev_pm_opp_find_level_ceil(struct device *dev, return ERR_PTR(-EOPNOTSUPP); } +static inline struct dev_pm_opp *dev_pm_opp_find_level_floor(struct device *dev, + unsigned long *level) +{ + return ERR_PTR(-EOPNOTSUPP); +} + static inline struct dev_pm_opp *dev_pm_opp_find_bw_ceil(struct device *dev, unsigned int *bw, int index) { -- cgit v1.2.3 From 3166383da081461244918aeed7ad028ef11b17cc Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Mon, 25 Sep 2023 15:17:10 +0200 Subject: OPP: Extend dev_pm_opp_data with a level Let's extend the dev_pm_opp_data with a level variable, to allow users to specify a corresponding level (performance state) for a dynamically added OPP. Signed-off-by: Ulf Hansson Signed-off-by: Viresh Kumar --- drivers/opp/core.c | 1 + include/linux/pm_opp.h | 2 ++ 2 files changed, 3 insertions(+) (limited to 'include/linux') diff --git a/drivers/opp/core.c b/drivers/opp/core.c index 54b6138e1189..ca8d1304b508 100644 --- a/drivers/opp/core.c +++ b/drivers/opp/core.c @@ -2035,6 +2035,7 @@ int _opp_add_v1(struct opp_table *opp_table, struct device *dev, /* populate the opp table */ new_opp->rates[0] = data->freq; + new_opp->level = data->level; tol = u_volt * opp_table->voltage_tolerance_v1 / 100; new_opp->supplies[0].u_volt = u_volt; new_opp->supplies[0].u_volt_min = u_volt - tol; diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h index a8ee93ba41d8..9ad168f4cbf1 100644 --- a/include/linux/pm_opp.h +++ b/include/linux/pm_opp.h @@ -94,10 +94,12 @@ struct dev_pm_opp_config { /** * struct dev_pm_opp_data - The data to use to initialize an OPP. + * @level: The performance level for the OPP. * @freq: The clock rate in Hz for the OPP. * @u_volt: The voltage in uV for the OPP. */ struct dev_pm_opp_data { + unsigned int level; unsigned long freq; unsigned long u_volt; }; -- cgit v1.2.3 From 869b6ea1609f655a43251bf41757aa44e5350a8f Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 4 Oct 2023 15:32:01 +0200 Subject: quota: Fix slow quotaoff Eric has reported that commit dabc8b207566 ("quota: fix dqput() to follow the guarantees dquot_srcu should provide") heavily increases runtime of generic/270 xfstest for ext4 in nojournal mode. The reason for this is that ext4 in nojournal mode leaves dquots dirty until the last dqput() and thus the cleanup done in quota_release_workfn() has to write them all. Due to the way quota_release_workfn() is written this results in synchronize_srcu() call for each dirty dquot which makes the dquot cleanup when turning quotas off extremely slow. To be able to avoid synchronize_srcu() for each dirty dquot we need to rework how we track dquots to be cleaned up. Instead of keeping the last dquot reference while it is on releasing_dquots list, we drop it right away and mark the dquot with new DQ_RELEASING_B bit instead. This way we can we can remove dquot from releasing_dquots list when new reference to it is acquired and thus there's no need to call synchronize_srcu() each time we drop dq_list_lock. References: https://lore.kernel.org/all/ZRytn6CxFK2oECUt@debian-BULLSEYE-live-builder-AMD64 Reported-by: Eric Whitney Fixes: dabc8b207566 ("quota: fix dqput() to follow the guarantees dquot_srcu should provide") CC: stable@vger.kernel.org Signed-off-by: Jan Kara --- fs/quota/dquot.c | 66 ++++++++++++++++++++++++++++-------------------- include/linux/quota.h | 4 ++- include/linux/quotaops.h | 2 +- 3 files changed, 43 insertions(+), 29 deletions(-) (limited to 'include/linux') diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index 9e72bfe8bbad..31e897ad5e6a 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -233,19 +233,18 @@ static void put_quota_format(struct quota_format_type *fmt) * All dquots are placed to the end of inuse_list when first created, and this * list is used for invalidate operation, which must look at every dquot. * - * When the last reference of a dquot will be dropped, the dquot will be - * added to releasing_dquots. We'd then queue work item which would call + * When the last reference of a dquot is dropped, the dquot is added to + * releasing_dquots. We'll then queue work item which will call * synchronize_srcu() and after that perform the final cleanup of all the - * dquots on the list. Both releasing_dquots and free_dquots use the - * dq_free list_head in the dquot struct. When a dquot is removed from - * releasing_dquots, a reference count is always subtracted, and if - * dq_count == 0 at that point, the dquot will be added to the free_dquots. + * dquots on the list. Each cleaned up dquot is moved to free_dquots list. + * Both releasing_dquots and free_dquots use the dq_free list_head in the dquot + * struct. * - * Unused dquots (dq_count == 0) are added to the free_dquots list when freed, - * and this list is searched whenever we need an available dquot. Dquots are - * removed from the list as soon as they are used again, and - * dqstats.free_dquots gives the number of dquots on the list. When - * dquot is invalidated it's completely released from memory. + * Unused and cleaned up dquots are in the free_dquots list and this list is + * searched whenever we need an available dquot. Dquots are removed from the + * list as soon as they are used again and dqstats.free_dquots gives the number + * of dquots on the list. When dquot is invalidated it's completely released + * from memory. * * Dirty dquots are added to the dqi_dirty_list of quota_info when mark * dirtied, and this list is searched when writing dirty dquots back to @@ -321,6 +320,7 @@ static inline void put_dquot_last(struct dquot *dquot) static inline void put_releasing_dquots(struct dquot *dquot) { list_add_tail(&dquot->dq_free, &releasing_dquots); + set_bit(DQ_RELEASING_B, &dquot->dq_flags); } static inline void remove_free_dquot(struct dquot *dquot) @@ -328,8 +328,10 @@ static inline void remove_free_dquot(struct dquot *dquot) if (list_empty(&dquot->dq_free)) return; list_del_init(&dquot->dq_free); - if (!atomic_read(&dquot->dq_count)) + if (!test_bit(DQ_RELEASING_B, &dquot->dq_flags)) dqstats_dec(DQST_FREE_DQUOTS); + else + clear_bit(DQ_RELEASING_B, &dquot->dq_flags); } static inline void put_inuse(struct dquot *dquot) @@ -581,12 +583,6 @@ restart: continue; /* Wait for dquot users */ if (atomic_read(&dquot->dq_count)) { - /* dquot in releasing_dquots, flush and retry */ - if (!list_empty(&dquot->dq_free)) { - spin_unlock(&dq_list_lock); - goto restart; - } - atomic_inc(&dquot->dq_count); spin_unlock(&dq_list_lock); /* @@ -605,6 +601,15 @@ restart: * restart. */ goto restart; } + /* + * The last user already dropped its reference but dquot didn't + * get fully cleaned up yet. Restart the scan which flushes the + * work cleaning up released dquots. + */ + if (test_bit(DQ_RELEASING_B, &dquot->dq_flags)) { + spin_unlock(&dq_list_lock); + goto restart; + } /* * Quota now has no users and it has been written on last * dqput() @@ -696,6 +701,13 @@ int dquot_writeback_dquots(struct super_block *sb, int type) dq_dirty); WARN_ON(!dquot_active(dquot)); + /* If the dquot is releasing we should not touch it */ + if (test_bit(DQ_RELEASING_B, &dquot->dq_flags)) { + spin_unlock(&dq_list_lock); + flush_delayed_work("a_release_work); + spin_lock(&dq_list_lock); + continue; + } /* Now we have active dquot from which someone is * holding reference so we can safely just increase @@ -809,18 +821,18 @@ static void quota_release_workfn(struct work_struct *work) /* Exchange the list head to avoid livelock. */ list_replace_init(&releasing_dquots, &rls_head); spin_unlock(&dq_list_lock); + synchronize_srcu(&dquot_srcu); restart: - synchronize_srcu(&dquot_srcu); spin_lock(&dq_list_lock); while (!list_empty(&rls_head)) { dquot = list_first_entry(&rls_head, struct dquot, dq_free); - /* Dquot got used again? */ - if (atomic_read(&dquot->dq_count) > 1) { - remove_free_dquot(dquot); - atomic_dec(&dquot->dq_count); - continue; - } + WARN_ON_ONCE(atomic_read(&dquot->dq_count)); + /* + * Note that DQ_RELEASING_B protects us from racing with + * invalidate_dquots() calls so we are safe to work with the + * dquot even after we drop dq_list_lock. + */ if (dquot_dirty(dquot)) { spin_unlock(&dq_list_lock); /* Commit dquot before releasing */ @@ -834,7 +846,6 @@ restart: } /* Dquot is inactive and clean, now move it to free list */ remove_free_dquot(dquot); - atomic_dec(&dquot->dq_count); put_dquot_last(dquot); } spin_unlock(&dq_list_lock); @@ -875,6 +886,7 @@ void dqput(struct dquot *dquot) BUG_ON(!list_empty(&dquot->dq_free)); #endif put_releasing_dquots(dquot); + atomic_dec(&dquot->dq_count); spin_unlock(&dq_list_lock); queue_delayed_work(system_unbound_wq, "a_release_work, 1); } @@ -963,7 +975,7 @@ we_slept: dqstats_inc(DQST_LOOKUPS); } /* Wait for dq_lock - after this we know that either dquot_release() is - * already finished or it will be canceled due to dq_count > 1 test */ + * already finished or it will be canceled due to dq_count > 0 test */ wait_on_dquot(dquot); /* Read the dquot / allocate space in quota file */ if (!dquot_active(dquot)) { diff --git a/include/linux/quota.h b/include/linux/quota.h index fd692b4a41d5..07071e64abf3 100644 --- a/include/linux/quota.h +++ b/include/linux/quota.h @@ -285,7 +285,9 @@ static inline void dqstats_dec(unsigned int type) #define DQ_FAKE_B 3 /* no limits only usage */ #define DQ_READ_B 4 /* dquot was read into memory */ #define DQ_ACTIVE_B 5 /* dquot is active (dquot_release not called) */ -#define DQ_LASTSET_B 6 /* Following 6 bits (see QIF_) are reserved\ +#define DQ_RELEASING_B 6 /* dquot is in releasing_dquots list waiting + * to be cleaned up */ +#define DQ_LASTSET_B 7 /* Following 6 bits (see QIF_) are reserved\ * for the mask of entries set via SETQUOTA\ * quotactl. They are set under dq_data_lock\ * and the quota format handling dquot can\ diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h index 11a4becff3a9..4fa4ef0a173a 100644 --- a/include/linux/quotaops.h +++ b/include/linux/quotaops.h @@ -57,7 +57,7 @@ static inline bool dquot_is_busy(struct dquot *dquot) { if (test_bit(DQ_MOD_B, &dquot->dq_flags)) return true; - if (atomic_read(&dquot->dq_count) > 1) + if (atomic_read(&dquot->dq_count) > 0) return true; return false; } -- cgit v1.2.3 From 5a0b11a180a9b82b4437a4be1cf73530053f139b Mon Sep 17 00:00:00 2001 From: Vasant Hegde Date: Fri, 6 Oct 2023 09:57:02 +0000 Subject: iommu/amd: Remove iommu_v2 module AMD GPU driver which was the only in-kernel user of iommu_v2 module removed dependency on iommu_v2 module. Also we are working on adding SVA support in AMD IOMMU driver. Device drivers are expected to use common SVA framework to enable device PASID/PRI features. Removing iommu_v2 module and then adding SVA simplifies the development. Hence remove iommu_v2 module. Cc: Alex Deucher Cc: Joerg Roedel Cc: Felix Kuehling Signed-off-by: Vasant Hegde Reviewed-by: Jason Gunthorpe Reviewed-by: Jerry Snitselaar Tested-by: Alex Deucher Link: https://lore.kernel.org/r/20231006095706.5694-2-vasant.hegde@amd.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/Kconfig | 9 - drivers/iommu/amd/Makefile | 1 - drivers/iommu/amd/amd_iommu.h | 5 - drivers/iommu/amd/iommu.c | 40 -- drivers/iommu/amd/iommu_v2.c | 996 ------------------------------------------ include/linux/amd-iommu.h | 94 ---- 6 files changed, 1145 deletions(-) delete mode 100644 drivers/iommu/amd/iommu_v2.c (limited to 'include/linux') diff --git a/drivers/iommu/amd/Kconfig b/drivers/iommu/amd/Kconfig index 9b5fc3356bf2..75132ae861a2 100644 --- a/drivers/iommu/amd/Kconfig +++ b/drivers/iommu/amd/Kconfig @@ -22,15 +22,6 @@ config AMD_IOMMU your BIOS for an option to enable it or if you have an IVRS ACPI table. -config AMD_IOMMU_V2 - tristate "AMD IOMMU Version 2 driver" - depends on AMD_IOMMU - select MMU_NOTIFIER - help - This option enables support for the AMD IOMMUv2 features of the IOMMU - hardware. Select this option if you want to use devices that support - the PCI PRI and PASID interface. - config AMD_IOMMU_DEBUGFS bool "Enable AMD IOMMU internals in DebugFS" depends on AMD_IOMMU && IOMMU_DEBUGFS diff --git a/drivers/iommu/amd/Makefile b/drivers/iommu/amd/Makefile index 773d8aa00283..f454fbb1569e 100644 --- a/drivers/iommu/amd/Makefile +++ b/drivers/iommu/amd/Makefile @@ -1,4 +1,3 @@ # SPDX-License-Identifier: GPL-2.0-only obj-$(CONFIG_AMD_IOMMU) += iommu.o init.o quirks.o io_pgtable.o io_pgtable_v2.o obj-$(CONFIG_AMD_IOMMU_DEBUGFS) += debugfs.o -obj-$(CONFIG_AMD_IOMMU_V2) += iommu_v2.o diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h index 9df53961d5ef..5b8a1e2dd3d0 100644 --- a/drivers/iommu/amd/amd_iommu.h +++ b/drivers/iommu/amd/amd_iommu.h @@ -38,9 +38,6 @@ extern int amd_iommu_guest_ir; extern enum io_pgtable_fmt amd_iommu_pgtable; extern int amd_iommu_gpt_level; -/* IOMMUv2 specific functions */ -struct iommu_domain; - bool amd_iommu_v2_supported(void); struct amd_iommu *get_amd_iommu(unsigned int idx); u8 amd_iommu_pc_get_max_banks(unsigned int idx); @@ -57,8 +54,6 @@ void amd_iommu_pdev_disable_cap_pri(struct pci_dev *pdev); int amd_iommu_register_ppr_notifier(struct notifier_block *nb); int amd_iommu_unregister_ppr_notifier(struct notifier_block *nb); -void amd_iommu_domain_direct_map(struct iommu_domain *dom); -int amd_iommu_domain_enable_v2(struct iommu_domain *dom, int pasids); int amd_iommu_flush_page(struct iommu_domain *dom, u32 pasid, u64 address); void amd_iommu_update_and_flush_device_table(struct protection_domain *domain); void amd_iommu_domain_update(struct protection_domain *domain); diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index f3448a2b6c0e..fd0d7b2f30dc 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -2568,46 +2568,6 @@ int amd_iommu_unregister_ppr_notifier(struct notifier_block *nb) } EXPORT_SYMBOL(amd_iommu_unregister_ppr_notifier); -void amd_iommu_domain_direct_map(struct iommu_domain *dom) -{ - struct protection_domain *domain = to_pdomain(dom); - unsigned long flags; - - spin_lock_irqsave(&domain->lock, flags); - - if (domain->iop.pgtbl_cfg.tlb) - free_io_pgtable_ops(&domain->iop.iop.ops); - - spin_unlock_irqrestore(&domain->lock, flags); -} -EXPORT_SYMBOL(amd_iommu_domain_direct_map); - -int amd_iommu_domain_enable_v2(struct iommu_domain *dom, int pasids) -{ - struct protection_domain *pdom = to_pdomain(dom); - unsigned long flags; - int ret; - - spin_lock_irqsave(&pdom->lock, flags); - - /* - * Save us all sanity checks whether devices already in the - * domain support IOMMUv2. Just force that the domain has no - * devices attached when it is switched into IOMMUv2 mode. - */ - ret = -EBUSY; - if (pdom->dev_cnt > 0 || pdom->flags & PD_IOMMUV2_MASK) - goto out; - - if (!pdom->gcr3_tbl) - ret = setup_gcr3_table(pdom, pasids); - -out: - spin_unlock_irqrestore(&pdom->lock, flags); - return ret; -} -EXPORT_SYMBOL(amd_iommu_domain_enable_v2); - static int __flush_pasid(struct protection_domain *domain, u32 pasid, u64 address, bool size) { diff --git a/drivers/iommu/amd/iommu_v2.c b/drivers/iommu/amd/iommu_v2.c deleted file mode 100644 index 57c2fb1146e2..000000000000 --- a/drivers/iommu/amd/iommu_v2.c +++ /dev/null @@ -1,996 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Copyright (C) 2010-2012 Advanced Micro Devices, Inc. - * Author: Joerg Roedel - */ - -#define pr_fmt(fmt) "AMD-Vi: " fmt - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "amd_iommu.h" - -MODULE_LICENSE("GPL v2"); -MODULE_AUTHOR("Joerg Roedel "); - -#define PRI_QUEUE_SIZE 512 - -struct pri_queue { - atomic_t inflight; - bool finish; - int status; -}; - -struct pasid_state { - struct list_head list; /* For global state-list */ - refcount_t count; /* Reference count */ - unsigned mmu_notifier_count; /* Counting nested mmu_notifier - calls */ - struct mm_struct *mm; /* mm_struct for the faults */ - struct mmu_notifier mn; /* mmu_notifier handle */ - struct pri_queue pri[PRI_QUEUE_SIZE]; /* PRI tag states */ - struct device_state *device_state; /* Link to our device_state */ - u32 pasid; /* PASID index */ - bool invalid; /* Used during setup and - teardown of the pasid */ - spinlock_t lock; /* Protect pri_queues and - mmu_notifer_count */ - wait_queue_head_t wq; /* To wait for count == 0 */ -}; - -struct device_state { - struct list_head list; - u32 sbdf; - atomic_t count; - struct pci_dev *pdev; - struct pasid_state **states; - struct iommu_domain *domain; - int pasid_levels; - int max_pasids; - amd_iommu_invalid_ppr_cb inv_ppr_cb; - amd_iommu_invalidate_ctx inv_ctx_cb; - spinlock_t lock; - wait_queue_head_t wq; -}; - -struct fault { - struct work_struct work; - struct device_state *dev_state; - struct pasid_state *state; - struct mm_struct *mm; - u64 address; - u32 pasid; - u16 tag; - u16 finish; - u16 flags; -}; - -static LIST_HEAD(state_list); -static DEFINE_SPINLOCK(state_lock); - -static struct workqueue_struct *iommu_wq; - -static void free_pasid_states(struct device_state *dev_state); - -static struct device_state *__get_device_state(u32 sbdf) -{ - struct device_state *dev_state; - - list_for_each_entry(dev_state, &state_list, list) { - if (dev_state->sbdf == sbdf) - return dev_state; - } - - return NULL; -} - -static struct device_state *get_device_state(u32 sbdf) -{ - struct device_state *dev_state; - unsigned long flags; - - spin_lock_irqsave(&state_lock, flags); - dev_state = __get_device_state(sbdf); - if (dev_state != NULL) - atomic_inc(&dev_state->count); - spin_unlock_irqrestore(&state_lock, flags); - - return dev_state; -} - -static void free_device_state(struct device_state *dev_state) -{ - struct iommu_group *group; - - /* Get rid of any remaining pasid states */ - free_pasid_states(dev_state); - - /* - * Wait until the last reference is dropped before freeing - * the device state. - */ - wait_event(dev_state->wq, !atomic_read(&dev_state->count)); - - /* - * First detach device from domain - No more PRI requests will arrive - * from that device after it is unbound from the IOMMUv2 domain. - */ - group = iommu_group_get(&dev_state->pdev->dev); - if (WARN_ON(!group)) - return; - - iommu_detach_group(dev_state->domain, group); - - iommu_group_put(group); - - /* Everything is down now, free the IOMMUv2 domain */ - iommu_domain_free(dev_state->domain); - - /* Finally get rid of the device-state */ - kfree(dev_state); -} - -static void put_device_state(struct device_state *dev_state) -{ - if (atomic_dec_and_test(&dev_state->count)) - wake_up(&dev_state->wq); -} - -/* Must be called under dev_state->lock */ -static struct pasid_state **__get_pasid_state_ptr(struct device_state *dev_state, - u32 pasid, bool alloc) -{ - struct pasid_state **root, **ptr; - int level, index; - - level = dev_state->pasid_levels; - root = dev_state->states; - - while (true) { - - index = (pasid >> (9 * level)) & 0x1ff; - ptr = &root[index]; - - if (level == 0) - break; - - if (*ptr == NULL) { - if (!alloc) - return NULL; - - *ptr = (void *)get_zeroed_page(GFP_ATOMIC); - if (*ptr == NULL) - return NULL; - } - - root = (struct pasid_state **)*ptr; - level -= 1; - } - - return ptr; -} - -static int set_pasid_state(struct device_state *dev_state, - struct pasid_state *pasid_state, - u32 pasid) -{ - struct pasid_state **ptr; - unsigned long flags; - int ret; - - spin_lock_irqsave(&dev_state->lock, flags); - ptr = __get_pasid_state_ptr(dev_state, pasid, true); - - ret = -ENOMEM; - if (ptr == NULL) - goto out_unlock; - - ret = -ENOMEM; - if (*ptr != NULL) - goto out_unlock; - - *ptr = pasid_state; - - ret = 0; - -out_unlock: - spin_unlock_irqrestore(&dev_state->lock, flags); - - return ret; -} - -static void clear_pasid_state(struct device_state *dev_state, u32 pasid) -{ - struct pasid_state **ptr; - unsigned long flags; - - spin_lock_irqsave(&dev_state->lock, flags); - ptr = __get_pasid_state_ptr(dev_state, pasid, true); - - if (ptr == NULL) - goto out_unlock; - - *ptr = NULL; - -out_unlock: - spin_unlock_irqrestore(&dev_state->lock, flags); -} - -static struct pasid_state *get_pasid_state(struct device_state *dev_state, - u32 pasid) -{ - struct pasid_state **ptr, *ret = NULL; - unsigned long flags; - - spin_lock_irqsave(&dev_state->lock, flags); - ptr = __get_pasid_state_ptr(dev_state, pasid, false); - - if (ptr == NULL) - goto out_unlock; - - ret = *ptr; - if (ret) - refcount_inc(&ret->count); - -out_unlock: - spin_unlock_irqrestore(&dev_state->lock, flags); - - return ret; -} - -static void free_pasid_state(struct pasid_state *pasid_state) -{ - kfree(pasid_state); -} - -static void put_pasid_state(struct pasid_state *pasid_state) -{ - if (refcount_dec_and_test(&pasid_state->count)) - wake_up(&pasid_state->wq); -} - -static void put_pasid_state_wait(struct pasid_state *pasid_state) -{ - if (!refcount_dec_and_test(&pasid_state->count)) - wait_event(pasid_state->wq, !refcount_read(&pasid_state->count)); - free_pasid_state(pasid_state); -} - -static void unbind_pasid(struct pasid_state *pasid_state) -{ - struct iommu_domain *domain; - - domain = pasid_state->device_state->domain; - - /* - * Mark pasid_state as invalid, no more faults will we added to the - * work queue after this is visible everywhere. - */ - pasid_state->invalid = true; - - /* Make sure this is visible */ - smp_wmb(); - - /* After this the device/pasid can't access the mm anymore */ - amd_iommu_domain_clear_gcr3(domain, pasid_state->pasid); - - /* Make sure no more pending faults are in the queue */ - flush_workqueue(iommu_wq); -} - -static void free_pasid_states_level1(struct pasid_state **tbl) -{ - int i; - - for (i = 0; i < 512; ++i) { - if (tbl[i] == NULL) - continue; - - free_page((unsigned long)tbl[i]); - } -} - -static void free_pasid_states_level2(struct pasid_state **tbl) -{ - struct pasid_state **ptr; - int i; - - for (i = 0; i < 512; ++i) { - if (tbl[i] == NULL) - continue; - - ptr = (struct pasid_state **)tbl[i]; - free_pasid_states_level1(ptr); - } -} - -static void free_pasid_states(struct device_state *dev_state) -{ - struct pasid_state *pasid_state; - int i; - - for (i = 0; i < dev_state->max_pasids; ++i) { - pasid_state = get_pasid_state(dev_state, i); - if (pasid_state == NULL) - continue; - - put_pasid_state(pasid_state); - - /* Clear the pasid state so that the pasid can be re-used */ - clear_pasid_state(dev_state, pasid_state->pasid); - - /* - * This will call the mn_release function and - * unbind the PASID - */ - mmu_notifier_unregister(&pasid_state->mn, pasid_state->mm); - - put_pasid_state_wait(pasid_state); /* Reference taken in - amd_iommu_bind_pasid */ - - /* Drop reference taken in amd_iommu_bind_pasid */ - put_device_state(dev_state); - } - - if (dev_state->pasid_levels == 2) - free_pasid_states_level2(dev_state->states); - else if (dev_state->pasid_levels == 1) - free_pasid_states_level1(dev_state->states); - else - BUG_ON(dev_state->pasid_levels != 0); - - free_page((unsigned long)dev_state->states); -} - -static struct pasid_state *mn_to_state(struct mmu_notifier *mn) -{ - return container_of(mn, struct pasid_state, mn); -} - -static void mn_arch_invalidate_secondary_tlbs(struct mmu_notifier *mn, - struct mm_struct *mm, - unsigned long start, unsigned long end) -{ - struct pasid_state *pasid_state; - struct device_state *dev_state; - - pasid_state = mn_to_state(mn); - dev_state = pasid_state->device_state; - - if ((start ^ (end - 1)) < PAGE_SIZE) - amd_iommu_flush_page(dev_state->domain, pasid_state->pasid, - start); - else - amd_iommu_flush_tlb(dev_state->domain, pasid_state->pasid); -} - -static void mn_release(struct mmu_notifier *mn, struct mm_struct *mm) -{ - struct pasid_state *pasid_state; - struct device_state *dev_state; - bool run_inv_ctx_cb; - - might_sleep(); - - pasid_state = mn_to_state(mn); - dev_state = pasid_state->device_state; - run_inv_ctx_cb = !pasid_state->invalid; - - if (run_inv_ctx_cb && dev_state->inv_ctx_cb) - dev_state->inv_ctx_cb(dev_state->pdev, pasid_state->pasid); - - unbind_pasid(pasid_state); -} - -static const struct mmu_notifier_ops iommu_mn = { - .release = mn_release, - .arch_invalidate_secondary_tlbs = mn_arch_invalidate_secondary_tlbs, -}; - -static void set_pri_tag_status(struct pasid_state *pasid_state, - u16 tag, int status) -{ - unsigned long flags; - - spin_lock_irqsave(&pasid_state->lock, flags); - pasid_state->pri[tag].status = status; - spin_unlock_irqrestore(&pasid_state->lock, flags); -} - -static void finish_pri_tag(struct device_state *dev_state, - struct pasid_state *pasid_state, - u16 tag) -{ - unsigned long flags; - - spin_lock_irqsave(&pasid_state->lock, flags); - if (atomic_dec_and_test(&pasid_state->pri[tag].inflight) && - pasid_state->pri[tag].finish) { - amd_iommu_complete_ppr(dev_state->pdev, pasid_state->pasid, - pasid_state->pri[tag].status, tag); - pasid_state->pri[tag].finish = false; - pasid_state->pri[tag].status = PPR_SUCCESS; - } - spin_unlock_irqrestore(&pasid_state->lock, flags); -} - -static void handle_fault_error(struct fault *fault) -{ - int status; - - if (!fault->dev_state->inv_ppr_cb) { - set_pri_tag_status(fault->state, fault->tag, PPR_INVALID); - return; - } - - status = fault->dev_state->inv_ppr_cb(fault->dev_state->pdev, - fault->pasid, - fault->address, - fault->flags); - switch (status) { - case AMD_IOMMU_INV_PRI_RSP_SUCCESS: - set_pri_tag_status(fault->state, fault->tag, PPR_SUCCESS); - break; - case AMD_IOMMU_INV_PRI_RSP_INVALID: - set_pri_tag_status(fault->state, fault->tag, PPR_INVALID); - break; - case AMD_IOMMU_INV_PRI_RSP_FAIL: - set_pri_tag_status(fault->state, fault->tag, PPR_FAILURE); - break; - default: - BUG(); - } -} - -static bool access_error(struct vm_area_struct *vma, struct fault *fault) -{ - unsigned long requested = 0; - - if (fault->flags & PPR_FAULT_EXEC) - requested |= VM_EXEC; - - if (fault->flags & PPR_FAULT_READ) - requested |= VM_READ; - - if (fault->flags & PPR_FAULT_WRITE) - requested |= VM_WRITE; - - return (requested & ~vma->vm_flags) != 0; -} - -static void do_fault(struct work_struct *work) -{ - struct fault *fault = container_of(work, struct fault, work); - struct vm_area_struct *vma; - vm_fault_t ret = VM_FAULT_ERROR; - unsigned int flags = 0; - struct mm_struct *mm; - u64 address; - - mm = fault->state->mm; - address = fault->address; - - if (fault->flags & PPR_FAULT_USER) - flags |= FAULT_FLAG_USER; - if (fault->flags & PPR_FAULT_WRITE) - flags |= FAULT_FLAG_WRITE; - flags |= FAULT_FLAG_REMOTE; - - mmap_read_lock(mm); - vma = vma_lookup(mm, address); - if (!vma) - /* failed to get a vma in the right range */ - goto out; - - /* Check if we have the right permissions on the vma */ - if (access_error(vma, fault)) - goto out; - - ret = handle_mm_fault(vma, address, flags, NULL); -out: - mmap_read_unlock(mm); - - if (ret & VM_FAULT_ERROR) - /* failed to service fault */ - handle_fault_error(fault); - - finish_pri_tag(fault->dev_state, fault->state, fault->tag); - - put_pasid_state(fault->state); - - kfree(fault); -} - -static int ppr_notifier(struct notifier_block *nb, unsigned long e, void *data) -{ - struct amd_iommu_fault *iommu_fault; - struct pasid_state *pasid_state; - struct device_state *dev_state; - struct pci_dev *pdev = NULL; - unsigned long flags; - struct fault *fault; - bool finish; - u16 tag, devid, seg_id; - int ret; - - iommu_fault = data; - tag = iommu_fault->tag & 0x1ff; - finish = (iommu_fault->tag >> 9) & 1; - - seg_id = PCI_SBDF_TO_SEGID(iommu_fault->sbdf); - devid = PCI_SBDF_TO_DEVID(iommu_fault->sbdf); - pdev = pci_get_domain_bus_and_slot(seg_id, PCI_BUS_NUM(devid), - devid & 0xff); - if (!pdev) - return -ENODEV; - - ret = NOTIFY_DONE; - - /* In kdump kernel pci dev is not initialized yet -> send INVALID */ - if (amd_iommu_is_attach_deferred(&pdev->dev)) { - amd_iommu_complete_ppr(pdev, iommu_fault->pasid, - PPR_INVALID, tag); - goto out; - } - - dev_state = get_device_state(iommu_fault->sbdf); - if (dev_state == NULL) - goto out; - - pasid_state = get_pasid_state(dev_state, iommu_fault->pasid); - if (pasid_state == NULL || pasid_state->invalid) { - /* We know the device but not the PASID -> send INVALID */ - amd_iommu_complete_ppr(dev_state->pdev, iommu_fault->pasid, - PPR_INVALID, tag); - goto out_drop_state; - } - - spin_lock_irqsave(&pasid_state->lock, flags); - atomic_inc(&pasid_state->pri[tag].inflight); - if (finish) - pasid_state->pri[tag].finish = true; - spin_unlock_irqrestore(&pasid_state->lock, flags); - - fault = kzalloc(sizeof(*fault), GFP_ATOMIC); - if (fault == NULL) { - /* We are OOM - send success and let the device re-fault */ - finish_pri_tag(dev_state, pasid_state, tag); - goto out_drop_state; - } - - fault->dev_state = dev_state; - fault->address = iommu_fault->address; - fault->state = pasid_state; - fault->tag = tag; - fault->finish = finish; - fault->pasid = iommu_fault->pasid; - fault->flags = iommu_fault->flags; - INIT_WORK(&fault->work, do_fault); - - queue_work(iommu_wq, &fault->work); - - ret = NOTIFY_OK; - -out_drop_state: - - if (ret != NOTIFY_OK && pasid_state) - put_pasid_state(pasid_state); - - put_device_state(dev_state); - -out: - pci_dev_put(pdev); - return ret; -} - -static struct notifier_block ppr_nb = { - .notifier_call = ppr_notifier, -}; - -int amd_iommu_bind_pasid(struct pci_dev *pdev, u32 pasid, - struct task_struct *task) -{ - struct pasid_state *pasid_state; - struct device_state *dev_state; - struct mm_struct *mm; - u32 sbdf; - int ret; - - might_sleep(); - - if (!amd_iommu_v2_supported()) - return -ENODEV; - - sbdf = get_pci_sbdf_id(pdev); - dev_state = get_device_state(sbdf); - - if (dev_state == NULL) - return -EINVAL; - - ret = -EINVAL; - if (pasid >= dev_state->max_pasids) - goto out; - - ret = -ENOMEM; - pasid_state = kzalloc(sizeof(*pasid_state), GFP_KERNEL); - if (pasid_state == NULL) - goto out; - - - refcount_set(&pasid_state->count, 1); - init_waitqueue_head(&pasid_state->wq); - spin_lock_init(&pasid_state->lock); - - mm = get_task_mm(task); - pasid_state->mm = mm; - pasid_state->device_state = dev_state; - pasid_state->pasid = pasid; - pasid_state->invalid = true; /* Mark as valid only if we are - done with setting up the pasid */ - pasid_state->mn.ops = &iommu_mn; - - if (pasid_state->mm == NULL) - goto out_free; - - ret = mmu_notifier_register(&pasid_state->mn, mm); - if (ret) - goto out_free; - - ret = set_pasid_state(dev_state, pasid_state, pasid); - if (ret) - goto out_unregister; - - ret = amd_iommu_domain_set_gcr3(dev_state->domain, pasid, - __pa(pasid_state->mm->pgd)); - if (ret) - goto out_clear_state; - - /* Now we are ready to handle faults */ - pasid_state->invalid = false; - - /* - * Drop the reference to the mm_struct here. We rely on the - * mmu_notifier release call-back to inform us when the mm - * is going away. - */ - mmput(mm); - - return 0; - -out_clear_state: - clear_pasid_state(dev_state, pasid); - -out_unregister: - mmu_notifier_unregister(&pasid_state->mn, mm); - mmput(mm); - -out_free: - free_pasid_state(pasid_state); - -out: - put_device_state(dev_state); - - return ret; -} -EXPORT_SYMBOL(amd_iommu_bind_pasid); - -void amd_iommu_unbind_pasid(struct pci_dev *pdev, u32 pasid) -{ - struct pasid_state *pasid_state; - struct device_state *dev_state; - u32 sbdf; - - might_sleep(); - - if (!amd_iommu_v2_supported()) - return; - - sbdf = get_pci_sbdf_id(pdev); - dev_state = get_device_state(sbdf); - if (dev_state == NULL) - return; - - if (pasid >= dev_state->max_pasids) - goto out; - - pasid_state = get_pasid_state(dev_state, pasid); - if (pasid_state == NULL) - goto out; - /* - * Drop reference taken here. We are safe because we still hold - * the reference taken in the amd_iommu_bind_pasid function. - */ - put_pasid_state(pasid_state); - - /* Clear the pasid state so that the pasid can be re-used */ - clear_pasid_state(dev_state, pasid_state->pasid); - - /* - * Call mmu_notifier_unregister to drop our reference - * to pasid_state->mm - */ - mmu_notifier_unregister(&pasid_state->mn, pasid_state->mm); - - put_pasid_state_wait(pasid_state); /* Reference taken in - amd_iommu_bind_pasid */ -out: - /* Drop reference taken in this function */ - put_device_state(dev_state); - - /* Drop reference taken in amd_iommu_bind_pasid */ - put_device_state(dev_state); -} -EXPORT_SYMBOL(amd_iommu_unbind_pasid); - -int amd_iommu_init_device(struct pci_dev *pdev, int pasids) -{ - struct device_state *dev_state; - struct iommu_group *group; - unsigned long flags; - int ret, tmp; - u32 sbdf; - - might_sleep(); - - /* - * When memory encryption is active the device is likely not in a - * direct-mapped domain. Forbid using IOMMUv2 functionality for now. - */ - if (cc_platform_has(CC_ATTR_MEM_ENCRYPT)) - return -ENODEV; - - if (!amd_iommu_v2_supported()) - return -ENODEV; - - if (pasids <= 0 || pasids > (PASID_MASK + 1)) - return -EINVAL; - - sbdf = get_pci_sbdf_id(pdev); - - dev_state = kzalloc(sizeof(*dev_state), GFP_KERNEL); - if (dev_state == NULL) - return -ENOMEM; - - spin_lock_init(&dev_state->lock); - init_waitqueue_head(&dev_state->wq); - dev_state->pdev = pdev; - dev_state->sbdf = sbdf; - - tmp = pasids; - for (dev_state->pasid_levels = 0; (tmp - 1) & ~0x1ff; tmp >>= 9) - dev_state->pasid_levels += 1; - - atomic_set(&dev_state->count, 1); - dev_state->max_pasids = pasids; - - ret = -ENOMEM; - dev_state->states = (void *)get_zeroed_page(GFP_KERNEL); - if (dev_state->states == NULL) - goto out_free_dev_state; - - dev_state->domain = iommu_domain_alloc(&pci_bus_type); - if (dev_state->domain == NULL) - goto out_free_states; - - /* See iommu_is_default_domain() */ - dev_state->domain->type = IOMMU_DOMAIN_IDENTITY; - amd_iommu_domain_direct_map(dev_state->domain); - - ret = amd_iommu_domain_enable_v2(dev_state->domain, pasids); - if (ret) - goto out_free_domain; - - group = iommu_group_get(&pdev->dev); - if (!group) { - ret = -EINVAL; - goto out_free_domain; - } - - ret = iommu_attach_group(dev_state->domain, group); - if (ret != 0) - goto out_drop_group; - - iommu_group_put(group); - - spin_lock_irqsave(&state_lock, flags); - - if (__get_device_state(sbdf) != NULL) { - spin_unlock_irqrestore(&state_lock, flags); - ret = -EBUSY; - goto out_free_domain; - } - - list_add_tail(&dev_state->list, &state_list); - - spin_unlock_irqrestore(&state_lock, flags); - - return 0; - -out_drop_group: - iommu_group_put(group); - -out_free_domain: - iommu_domain_free(dev_state->domain); - -out_free_states: - free_page((unsigned long)dev_state->states); - -out_free_dev_state: - kfree(dev_state); - - return ret; -} -EXPORT_SYMBOL(amd_iommu_init_device); - -void amd_iommu_free_device(struct pci_dev *pdev) -{ - struct device_state *dev_state; - unsigned long flags; - u32 sbdf; - - if (!amd_iommu_v2_supported()) - return; - - sbdf = get_pci_sbdf_id(pdev); - - spin_lock_irqsave(&state_lock, flags); - - dev_state = __get_device_state(sbdf); - if (dev_state == NULL) { - spin_unlock_irqrestore(&state_lock, flags); - return; - } - - list_del(&dev_state->list); - - spin_unlock_irqrestore(&state_lock, flags); - - put_device_state(dev_state); - free_device_state(dev_state); -} -EXPORT_SYMBOL(amd_iommu_free_device); - -int amd_iommu_set_invalid_ppr_cb(struct pci_dev *pdev, - amd_iommu_invalid_ppr_cb cb) -{ - struct device_state *dev_state; - unsigned long flags; - u32 sbdf; - int ret; - - if (!amd_iommu_v2_supported()) - return -ENODEV; - - sbdf = get_pci_sbdf_id(pdev); - - spin_lock_irqsave(&state_lock, flags); - - ret = -EINVAL; - dev_state = __get_device_state(sbdf); - if (dev_state == NULL) - goto out_unlock; - - dev_state->inv_ppr_cb = cb; - - ret = 0; - -out_unlock: - spin_unlock_irqrestore(&state_lock, flags); - - return ret; -} -EXPORT_SYMBOL(amd_iommu_set_invalid_ppr_cb); - -int amd_iommu_set_invalidate_ctx_cb(struct pci_dev *pdev, - amd_iommu_invalidate_ctx cb) -{ - struct device_state *dev_state; - unsigned long flags; - u32 sbdf; - int ret; - - if (!amd_iommu_v2_supported()) - return -ENODEV; - - sbdf = get_pci_sbdf_id(pdev); - - spin_lock_irqsave(&state_lock, flags); - - ret = -EINVAL; - dev_state = __get_device_state(sbdf); - if (dev_state == NULL) - goto out_unlock; - - dev_state->inv_ctx_cb = cb; - - ret = 0; - -out_unlock: - spin_unlock_irqrestore(&state_lock, flags); - - return ret; -} -EXPORT_SYMBOL(amd_iommu_set_invalidate_ctx_cb); - -static int __init amd_iommu_v2_init(void) -{ - int ret; - - if (!amd_iommu_v2_supported()) { - pr_info("AMD IOMMUv2 functionality not available on this system - This is not a bug.\n"); - /* - * Load anyway to provide the symbols to other modules - * which may use AMD IOMMUv2 optionally. - */ - return 0; - } - - ret = -ENOMEM; - iommu_wq = alloc_workqueue("amd_iommu_v2", WQ_MEM_RECLAIM, 0); - if (iommu_wq == NULL) - goto out; - - amd_iommu_register_ppr_notifier(&ppr_nb); - - pr_info("AMD IOMMUv2 loaded and initialized\n"); - - return 0; - -out: - return ret; -} - -static void __exit amd_iommu_v2_exit(void) -{ - struct device_state *dev_state, *next; - unsigned long flags; - LIST_HEAD(freelist); - - if (!amd_iommu_v2_supported()) - return; - - amd_iommu_unregister_ppr_notifier(&ppr_nb); - - flush_workqueue(iommu_wq); - - /* - * The loop below might call flush_workqueue(), so call - * destroy_workqueue() after it - */ - spin_lock_irqsave(&state_lock, flags); - - list_for_each_entry_safe(dev_state, next, &state_list, list) { - WARN_ON_ONCE(1); - - put_device_state(dev_state); - list_del(&dev_state->list); - list_add_tail(&dev_state->list, &freelist); - } - - spin_unlock_irqrestore(&state_lock, flags); - - /* - * Since free_device_state waits on the count to be zero, - * we need to free dev_state outside the spinlock. - */ - list_for_each_entry_safe(dev_state, next, &freelist, list) { - list_del(&dev_state->list); - free_device_state(dev_state); - } - - destroy_workqueue(iommu_wq); -} - -module_init(amd_iommu_v2_init); -module_exit(amd_iommu_v2_exit); diff --git a/include/linux/amd-iommu.h b/include/linux/amd-iommu.h index 99a5201d9e62..35ca00b09384 100644 --- a/include/linux/amd-iommu.h +++ b/include/linux/amd-iommu.h @@ -33,84 +33,6 @@ struct pci_dev; extern int amd_iommu_detect(void); -/** - * amd_iommu_init_device() - Init device for use with IOMMUv2 driver - * @pdev: The PCI device to initialize - * @pasids: Number of PASIDs to support for this device - * - * This function does all setup for the device pdev so that it can be - * used with IOMMUv2. - * Returns 0 on success or negative value on error. - */ -extern int amd_iommu_init_device(struct pci_dev *pdev, int pasids); - -/** - * amd_iommu_free_device() - Free all IOMMUv2 related device resources - * and disable IOMMUv2 usage for this device - * @pdev: The PCI device to disable IOMMUv2 usage for' - */ -extern void amd_iommu_free_device(struct pci_dev *pdev); - -/** - * amd_iommu_bind_pasid() - Bind a given task to a PASID on a device - * @pdev: The PCI device to bind the task to - * @pasid: The PASID on the device the task should be bound to - * @task: the task to bind - * - * The function returns 0 on success or a negative value on error. - */ -extern int amd_iommu_bind_pasid(struct pci_dev *pdev, u32 pasid, - struct task_struct *task); - -/** - * amd_iommu_unbind_pasid() - Unbind a PASID from its task on - * a device - * @pdev: The device of the PASID - * @pasid: The PASID to unbind - * - * When this function returns the device is no longer using the PASID - * and the PASID is no longer bound to its task. - */ -extern void amd_iommu_unbind_pasid(struct pci_dev *pdev, u32 pasid); - -/** - * amd_iommu_set_invalid_ppr_cb() - Register a call-back for failed - * PRI requests - * @pdev: The PCI device the call-back should be registered for - * @cb: The call-back function - * - * The IOMMUv2 driver invokes this call-back when it is unable to - * successfully handle a PRI request. The device driver can then decide - * which PRI response the device should see. Possible return values for - * the call-back are: - * - * - AMD_IOMMU_INV_PRI_RSP_SUCCESS - Send SUCCESS back to the device - * - AMD_IOMMU_INV_PRI_RSP_INVALID - Send INVALID back to the device - * - AMD_IOMMU_INV_PRI_RSP_FAIL - Send Failure back to the device, - * the device is required to disable - * PRI when it receives this response - * - * The function returns 0 on success or negative value on error. - */ -#define AMD_IOMMU_INV_PRI_RSP_SUCCESS 0 -#define AMD_IOMMU_INV_PRI_RSP_INVALID 1 -#define AMD_IOMMU_INV_PRI_RSP_FAIL 2 - -typedef int (*amd_iommu_invalid_ppr_cb)(struct pci_dev *pdev, - u32 pasid, - unsigned long address, - u16); - -extern int amd_iommu_set_invalid_ppr_cb(struct pci_dev *pdev, - amd_iommu_invalid_ppr_cb cb); - -#define PPR_FAULT_EXEC (1 << 1) -#define PPR_FAULT_READ (1 << 2) -#define PPR_FAULT_WRITE (1 << 5) -#define PPR_FAULT_USER (1 << 6) -#define PPR_FAULT_RSVD (1 << 7) -#define PPR_FAULT_GN (1 << 8) - /** * amd_iommu_device_info() - Get information about IOMMUv2 support of a * PCI device @@ -137,22 +59,6 @@ struct amd_iommu_device_info { extern int amd_iommu_device_info(struct pci_dev *pdev, struct amd_iommu_device_info *info); -/** - * amd_iommu_set_invalidate_ctx_cb() - Register a call-back for invalidating - * a pasid context. This call-back is - * invoked when the IOMMUv2 driver needs to - * invalidate a PASID context, for example - * because the task that is bound to that - * context is about to exit. - * - * @pdev: The PCI device the call-back should be registered for - * @cb: The call-back function - */ - -typedef void (*amd_iommu_invalidate_ctx)(struct pci_dev *pdev, u32 pasid); - -extern int amd_iommu_set_invalidate_ctx_cb(struct pci_dev *pdev, - amd_iommu_invalidate_ctx cb); #else /* CONFIG_AMD_IOMMU */ static inline int amd_iommu_detect(void) { return -ENODEV; } -- cgit v1.2.3 From 37b282fa04dd7b9bfa7c290fe07ef1730444debb Mon Sep 17 00:00:00 2001 From: Vasant Hegde Date: Fri, 6 Oct 2023 09:57:04 +0000 Subject: iommu/amd: Remove amd_iommu_device_info() No one is using this function. Hence remove it. Also move PCI device feature detection flags to amd_iommu_types.h as its only used inside AMD IOMMU driver. Signed-off-by: Vasant Hegde Reviewed-by: Jason Gunthorpe Reviewed-by: Jerry Snitselaar Tested-by: Alex Deucher Link: https://lore.kernel.org/r/20231006095706.5694-4-vasant.hegde@amd.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/amd_iommu_types.h | 8 +++++++ drivers/iommu/amd/iommu.c | 42 ------------------------------------- include/linux/amd-iommu.h | 26 ----------------------- 3 files changed, 8 insertions(+), 68 deletions(-) (limited to 'include/linux') diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h index 25b731f3d984..e742006f2885 100644 --- a/drivers/iommu/amd/amd_iommu_types.h +++ b/drivers/iommu/amd/amd_iommu_types.h @@ -786,6 +786,14 @@ struct devid_map { bool cmd_line; }; +#define AMD_IOMMU_DEVICE_FLAG_ATS_SUP 0x1 /* ATS feature supported */ +#define AMD_IOMMU_DEVICE_FLAG_PRI_SUP 0x2 /* PRI feature supported */ +#define AMD_IOMMU_DEVICE_FLAG_PASID_SUP 0x4 /* PASID context supported */ +/* Device may request execution on memory pages */ +#define AMD_IOMMU_DEVICE_FLAG_EXEC_SUP 0x8 +/* Device may request super-user privileges */ +#define AMD_IOMMU_DEVICE_FLAG_PRIV_SUP 0x10 + /* * This struct contains device specific data for the IOMMU */ diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 5abf181f5ecd..8f43d89a98db 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -2741,48 +2741,6 @@ int amd_iommu_complete_ppr(struct pci_dev *pdev, u32 pasid, } EXPORT_SYMBOL(amd_iommu_complete_ppr); -int amd_iommu_device_info(struct pci_dev *pdev, - struct amd_iommu_device_info *info) -{ - int max_pasids; - int pos; - - if (pdev == NULL || info == NULL) - return -EINVAL; - - if (!amd_iommu_v2_supported()) - return -EINVAL; - - memset(info, 0, sizeof(*info)); - - if (pci_ats_supported(pdev)) - info->flags |= AMD_IOMMU_DEVICE_FLAG_ATS_SUP; - - pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI); - if (pos) - info->flags |= AMD_IOMMU_DEVICE_FLAG_PRI_SUP; - - pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PASID); - if (pos) { - int features; - - max_pasids = 1 << (9 * (amd_iommu_max_glx_val + 1)); - max_pasids = min(max_pasids, (1 << 20)); - - info->flags |= AMD_IOMMU_DEVICE_FLAG_PASID_SUP; - info->max_pasids = min(pci_max_pasids(pdev), max_pasids); - - features = pci_pasid_features(pdev); - if (features & PCI_PASID_CAP_EXEC) - info->flags |= AMD_IOMMU_DEVICE_FLAG_EXEC_SUP; - if (features & PCI_PASID_CAP_PRIV) - info->flags |= AMD_IOMMU_DEVICE_FLAG_PRIV_SUP; - } - - return 0; -} -EXPORT_SYMBOL(amd_iommu_device_info); - #ifdef CONFIG_IRQ_REMAP /***************************************************************************** diff --git a/include/linux/amd-iommu.h b/include/linux/amd-iommu.h index 35ca00b09384..dc7ed2f46886 100644 --- a/include/linux/amd-iommu.h +++ b/include/linux/amd-iommu.h @@ -33,32 +33,6 @@ struct pci_dev; extern int amd_iommu_detect(void); -/** - * amd_iommu_device_info() - Get information about IOMMUv2 support of a - * PCI device - * @pdev: PCI device to query information from - * @info: A pointer to an amd_iommu_device_info structure which will contain - * the information about the PCI device - * - * Returns 0 on success, negative value on error - */ - -#define AMD_IOMMU_DEVICE_FLAG_ATS_SUP 0x1 /* ATS feature supported */ -#define AMD_IOMMU_DEVICE_FLAG_PRI_SUP 0x2 /* PRI feature supported */ -#define AMD_IOMMU_DEVICE_FLAG_PASID_SUP 0x4 /* PASID context supported */ -#define AMD_IOMMU_DEVICE_FLAG_EXEC_SUP 0x8 /* Device may request execution - on memory pages */ -#define AMD_IOMMU_DEVICE_FLAG_PRIV_SUP 0x10 /* Device may request - super-user privileges */ - -struct amd_iommu_device_info { - int max_pasids; - u32 flags; -}; - -extern int amd_iommu_device_info(struct pci_dev *pdev, - struct amd_iommu_device_info *info); - #else /* CONFIG_AMD_IOMMU */ static inline int amd_iommu_detect(void) { return -ENODEV; } -- cgit v1.2.3 From 1609626c32c4538439f6333d0b6c912af9f13b77 Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Thu, 5 Oct 2023 15:44:54 +0100 Subject: firmware: arm_ffa: Update the FF-A command list with v1.1 additions Arm Firmware Framework for A-profile(FFA) v1.1 introduces notifications and indirect messaging based upon notifications support and extends some of the memory interfaces. Let us add all the newly supported FF-A function IDs in the spec. Also update to the error values and associated handling. Link: https://lore.kernel.org/r/20231005-ffa_v1-1_notif-v4-1-cddd3237809c@arm.com Signed-off-by: Sudeep Holla --- drivers/firmware/arm_ffa/driver.c | 1 + include/linux/arm_ffa.h | 20 ++++++++++++++++++++ 2 files changed, 21 insertions(+) (limited to 'include/linux') diff --git a/drivers/firmware/arm_ffa/driver.c b/drivers/firmware/arm_ffa/driver.c index 7cd6b1564e80..a64512388ea5 100644 --- a/drivers/firmware/arm_ffa/driver.c +++ b/drivers/firmware/arm_ffa/driver.c @@ -64,6 +64,7 @@ static const int ffa_linux_errmap[] = { -EACCES, /* FFA_RET_DENIED */ -EAGAIN, /* FFA_RET_RETRY */ -ECANCELED, /* FFA_RET_ABORTED */ + -ENODATA, /* FFA_RET_NO_DATA */ }; static inline int ffa_to_linux_errno(int errno) diff --git a/include/linux/arm_ffa.h b/include/linux/arm_ffa.h index cc060da51bec..2ea1717a0825 100644 --- a/include/linux/arm_ffa.h +++ b/include/linux/arm_ffa.h @@ -20,6 +20,7 @@ #define FFA_ERROR FFA_SMC_32(0x60) #define FFA_SUCCESS FFA_SMC_32(0x61) +#define FFA_FN64_SUCCESS FFA_SMC_64(0x61) #define FFA_INTERRUPT FFA_SMC_32(0x62) #define FFA_VERSION FFA_SMC_32(0x63) #define FFA_FEATURES FFA_SMC_32(0x64) @@ -54,6 +55,23 @@ #define FFA_MEM_FRAG_RX FFA_SMC_32(0x7A) #define FFA_MEM_FRAG_TX FFA_SMC_32(0x7B) #define FFA_NORMAL_WORLD_RESUME FFA_SMC_32(0x7C) +#define FFA_NOTIFICATION_BITMAP_CREATE FFA_SMC_32(0x7D) +#define FFA_NOTIFICATION_BITMAP_DESTROY FFA_SMC_32(0x7E) +#define FFA_NOTIFICATION_BIND FFA_SMC_32(0x7F) +#define FFA_NOTIFICATION_UNBIND FFA_SMC_32(0x80) +#define FFA_NOTIFICATION_SET FFA_SMC_32(0x81) +#define FFA_NOTIFICATION_GET FFA_SMC_32(0x82) +#define FFA_NOTIFICATION_INFO_GET FFA_SMC_32(0x83) +#define FFA_FN64_NOTIFICATION_INFO_GET FFA_SMC_64(0x83) +#define FFA_RX_ACQUIRE FFA_SMC_32(0x84) +#define FFA_SPM_ID_GET FFA_SMC_32(0x85) +#define FFA_MSG_SEND2 FFA_SMC_32(0x86) +#define FFA_SECONDARY_EP_REGISTER FFA_SMC_32(0x87) +#define FFA_FN64_SECONDARY_EP_REGISTER FFA_SMC_64(0x87) +#define FFA_MEM_PERM_GET FFA_SMC_32(0x88) +#define FFA_FN64_MEM_PERM_GET FFA_SMC_64(0x88) +#define FFA_MEM_PERM_SET FFA_SMC_32(0x89) +#define FFA_FN64_MEM_PERM_SET FFA_SMC_64(0x89) /* * For some calls it is necessary to use SMC64 to pass or return 64-bit values. @@ -76,6 +94,7 @@ #define FFA_RET_DENIED (-6) #define FFA_RET_RETRY (-7) #define FFA_RET_ABORTED (-8) +#define FFA_RET_NO_DATA (-9) /* FFA version encoding */ #define FFA_MAJOR_VERSION_MASK GENMASK(30, 16) @@ -86,6 +105,7 @@ (FIELD_PREP(FFA_MAJOR_VERSION_MASK, (major)) | \ FIELD_PREP(FFA_MINOR_VERSION_MASK, (minor))) #define FFA_VERSION_1_0 FFA_PACK_VERSION_INFO(1, 0) +#define FFA_VERSION_1_1 FFA_PACK_VERSION_INFO(1, 1) /** * FF-A specification mentions explicitly about '4K pages'. This should -- cgit v1.2.3 From fe2ddb6b42358ad25a6aed30512fb284522335f3 Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Thu, 5 Oct 2023 15:44:57 +0100 Subject: firmware: arm_ffa: Implement the FFA_RUN interface FFA_RUN is used by a scheduler to allocate CPU cycles to a target endpoint execution context specified in the target information parameter. If the endpoint execution context is in the waiting/blocked state, it transitions to the running state. Expose the ability to call FFA_RUN in order to give any partition in the system cpu cycles to perform IMPDEF functionality. Link: https://lore.kernel.org/r/20231005-ffa_v1-1_notif-v4-4-cddd3237809c@arm.com Signed-off-by: Sudeep Holla --- drivers/firmware/arm_ffa/driver.c | 22 ++++++++++++++++++++++ include/linux/arm_ffa.h | 5 +++++ 2 files changed, 27 insertions(+) (limited to 'include/linux') diff --git a/drivers/firmware/arm_ffa/driver.c b/drivers/firmware/arm_ffa/driver.c index 20f8f4ca8e89..82e54d231074 100644 --- a/drivers/firmware/arm_ffa/driver.c +++ b/drivers/firmware/arm_ffa/driver.c @@ -616,6 +616,23 @@ static int ffa_notification_bind_common(u16 dst_id, u64 bitmap, return 0; } +static int ffa_run(struct ffa_device *dev, u16 vcpu) +{ + ffa_value_t ret; + u32 target = dev->vm_id << 16 | vcpu; + + invoke_ffa_fn((ffa_value_t){ .a0 = FFA_RUN, .a1 = target, }, &ret); + + while (ret.a0 == FFA_INTERRUPT) + invoke_ffa_fn((ffa_value_t){ .a0 = FFA_RUN, .a1 = ret.a1, }, + &ret); + + if (ret.a0 == FFA_ERROR) + return ffa_to_linux_errno((int)ret.a2); + + return 0; +} + static void ffa_set_up_mem_ops_native_flag(void) { if (!ffa_features(FFA_FN_NATIVE(MEM_LEND), 0, NULL, NULL) || @@ -700,10 +717,15 @@ static const struct ffa_mem_ops ffa_drv_mem_ops = { .memory_lend = ffa_memory_lend, }; +static const struct ffa_cpu_ops ffa_drv_cpu_ops = { + .run = ffa_run, +}; + static const struct ffa_ops ffa_drv_ops = { .info_ops = &ffa_drv_info_ops, .msg_ops = &ffa_drv_msg_ops, .mem_ops = &ffa_drv_mem_ops, + .cpu_ops = &ffa_drv_cpu_ops, }; void ffa_device_match_uuid(struct ffa_device *ffa_dev, const uuid_t *uuid) diff --git a/include/linux/arm_ffa.h b/include/linux/arm_ffa.h index 2ea1717a0825..12fd134bf670 100644 --- a/include/linux/arm_ffa.h +++ b/include/linux/arm_ffa.h @@ -387,10 +387,15 @@ struct ffa_mem_ops { int (*memory_lend)(struct ffa_mem_ops_args *args); }; +struct ffa_cpu_ops { + int (*run)(struct ffa_device *dev, u16 vcpu); +}; + struct ffa_ops { const struct ffa_info_ops *info_ops; const struct ffa_msg_ops *msg_ops; const struct ffa_mem_ops *mem_ops; + const struct ffa_cpu_ops *cpu_ops; }; #endif /* _LINUX_ARM_FFA_H */ -- cgit v1.2.3 From 0184450b8b1e7734110472616c4758839e1aff96 Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Thu, 5 Oct 2023 15:45:02 +0100 Subject: firmware: arm_ffa: Add schedule receiver callback mechanism Enable client drivers to register a callback function that will be called when one or more notifications are pending for a target partition as part of schedule receiver interrupt handling. Link: https://lore.kernel.org/r/20231005-ffa_v1-1_notif-v4-9-cddd3237809c@arm.com Signed-off-by: Sudeep Holla --- drivers/firmware/arm_ffa/driver.c | 103 ++++++++++++++++++++++++++++++++++++-- include/linux/arm_ffa.h | 8 +++ 2 files changed, 108 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/drivers/firmware/arm_ffa/driver.c b/drivers/firmware/arm_ffa/driver.c index f8d01840f5ec..04cdb49cc78b 100644 --- a/drivers/firmware/arm_ffa/driver.c +++ b/drivers/firmware/arm_ffa/driver.c @@ -37,6 +37,7 @@ #include #include #include +#include #include "common.h" @@ -99,6 +100,8 @@ struct ffa_drv_info { struct ffa_pcpu_irq __percpu *irq_pcpu; struct workqueue_struct *notif_pcpu_wq; struct work_struct irq_work; + struct xarray partition_info; + unsigned int partition_count; }; static struct ffa_drv_info *drv_info; @@ -694,9 +697,26 @@ static int ffa_notification_get(u32 flags, struct ffa_notify_bitmaps *notify) return 0; } -static void __do_sched_recv_cb(u16 partition_id, u16 vcpu, bool is_per_vcpu) +struct ffa_dev_part_info { + ffa_sched_recv_cb callback; + void *cb_data; + rwlock_t rw_lock; +}; + +static void __do_sched_recv_cb(u16 part_id, u16 vcpu, bool is_per_vcpu) { - pr_err("Callback for partition 0x%x failed.\n", partition_id); + struct ffa_dev_part_info *partition; + ffa_sched_recv_cb callback; + void *cb_data; + + partition = xa_load(&drv_info->partition_info, part_id); + read_lock(&partition->rw_lock); + callback = partition->callback; + cb_data = partition->cb_data; + read_unlock(&partition->rw_lock); + + if (callback) + callback(vcpu, is_per_vcpu, cb_data); } static void ffa_notification_info_get(void) @@ -845,6 +865,39 @@ static int ffa_memory_lend(struct ffa_mem_ops_args *args) return ffa_memory_ops(FFA_MEM_LEND, args); } +static int ffa_sched_recv_cb_update(u16 part_id, ffa_sched_recv_cb callback, + void *cb_data, bool is_registration) +{ + struct ffa_dev_part_info *partition; + bool cb_valid; + + partition = xa_load(&drv_info->partition_info, part_id); + write_lock(&partition->rw_lock); + + cb_valid = !!partition->callback; + if (!(is_registration ^ cb_valid)) { + write_unlock(&partition->rw_lock); + return -EINVAL; + } + + partition->callback = callback; + partition->cb_data = cb_data; + + write_unlock(&partition->rw_lock); + return 0; +} + +static int ffa_sched_recv_cb_register(struct ffa_device *dev, + ffa_sched_recv_cb cb, void *cb_data) +{ + return ffa_sched_recv_cb_update(dev->vm_id, cb, cb_data, true); +} + +static int ffa_sched_recv_cb_unregister(struct ffa_device *dev) +{ + return ffa_sched_recv_cb_update(dev->vm_id, NULL, NULL, false); +} + static const struct ffa_info_ops ffa_drv_info_ops = { .api_version_get = ffa_api_version_get, .partition_info_get = ffa_partition_info_get, @@ -865,11 +918,17 @@ static const struct ffa_cpu_ops ffa_drv_cpu_ops = { .run = ffa_run, }; +static const struct ffa_notifier_ops ffa_drv_notifier_ops = { + .sched_recv_cb_register = ffa_sched_recv_cb_register, + .sched_recv_cb_unregister = ffa_sched_recv_cb_unregister, +}; + static const struct ffa_ops ffa_drv_ops = { .info_ops = &ffa_drv_info_ops, .msg_ops = &ffa_drv_msg_ops, .mem_ops = &ffa_drv_mem_ops, .cpu_ops = &ffa_drv_cpu_ops, + .notifier_ops = &ffa_drv_notifier_ops, }; void ffa_device_match_uuid(struct ffa_device *ffa_dev, const uuid_t *uuid) @@ -900,6 +959,7 @@ static void ffa_setup_partitions(void) int count, idx; uuid_t uuid; struct ffa_device *ffa_dev; + struct ffa_dev_part_info *info; struct ffa_partition_info *pbuf, *tpbuf; count = ffa_partition_probe(&uuid_null, &pbuf); @@ -908,6 +968,7 @@ static void ffa_setup_partitions(void) return; } + xa_init(&drv_info->partition_info); for (idx = 0, tpbuf = pbuf; idx < count; idx++, tpbuf++) { import_uuid(&uuid, (u8 *)tpbuf->uuid); @@ -927,10 +988,42 @@ static void ffa_setup_partitions(void) if (drv_info->version > FFA_VERSION_1_0 && !(tpbuf->properties & FFA_PARTITION_AARCH64_EXEC)) ffa_mode_32bit_set(ffa_dev); + + info = kzalloc(sizeof(*info), GFP_KERNEL); + if (!info) { + ffa_device_unregister(ffa_dev); + continue; + } + xa_store(&drv_info->partition_info, tpbuf->id, info, GFP_KERNEL); } + drv_info->partition_count = count; + kfree(pbuf); } +static void ffa_partitions_cleanup(void) +{ + struct ffa_dev_part_info **info; + int idx, count = drv_info->partition_count; + + if (!count) + return; + + info = kcalloc(count, sizeof(**info), GFP_KERNEL); + if (!info) + return; + + xa_extract(&drv_info->partition_info, (void **)info, 0, VM_ID_MASK, + count, XA_PRESENT); + + for (idx = 0; idx < count; idx++) + kfree(info[idx]); + kfree(info); + + drv_info->partition_count = 0; + xa_destroy(&drv_info->partition_info); +} + /* FFA FEATURE IDs */ #define FFA_FEAT_NOTIFICATION_PENDING_INT (1) #define FFA_FEAT_SCHEDULE_RECEIVER_INT (2) @@ -1164,9 +1257,11 @@ static int __init ffa_init(void) ret = ffa_notifications_setup(); if (ret) - goto free_pages; + goto partitions_cleanup; return 0; +partitions_cleanup: + ffa_partitions_cleanup(); free_pages: if (drv_info->tx_buffer) free_pages_exact(drv_info->tx_buffer, RXTX_BUFFER_SIZE); @@ -1182,9 +1277,11 @@ subsys_initcall(ffa_init); static void __exit ffa_exit(void) { ffa_notifications_cleanup(); + ffa_partitions_cleanup(); ffa_rxtx_unmap(drv_info->vm_id); free_pages_exact(drv_info->tx_buffer, RXTX_BUFFER_SIZE); free_pages_exact(drv_info->rx_buffer, RXTX_BUFFER_SIZE); + xa_destroy(&drv_info->partition_info); kfree(drv_info); arm_ffa_bus_exit(); } diff --git a/include/linux/arm_ffa.h b/include/linux/arm_ffa.h index 12fd134bf670..f9cf6114ef82 100644 --- a/include/linux/arm_ffa.h +++ b/include/linux/arm_ffa.h @@ -391,11 +391,19 @@ struct ffa_cpu_ops { int (*run)(struct ffa_device *dev, u16 vcpu); }; +typedef void (*ffa_sched_recv_cb)(u16 vcpu, bool is_per_vcpu, void *cb_data); +struct ffa_notifier_ops { + int (*sched_recv_cb_register)(struct ffa_device *dev, + ffa_sched_recv_cb cb, void *cb_data); + int (*sched_recv_cb_unregister)(struct ffa_device *dev); +}; + struct ffa_ops { const struct ffa_info_ops *info_ops; const struct ffa_msg_ops *msg_ops; const struct ffa_mem_ops *mem_ops; const struct ffa_cpu_ops *cpu_ops; + const struct ffa_notifier_ops *notifier_ops; }; #endif /* _LINUX_ARM_FFA_H */ -- cgit v1.2.3 From e0573444edbf4ee7e3c191d3d08a4ccbd26628be Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Thu, 5 Oct 2023 15:45:03 +0100 Subject: firmware: arm_ffa: Add interfaces to request notification callbacks Add interface to the FFA driver to allow for client drivers to request and relinquish a notification as well as provide a callback for the notification. Link: https://lore.kernel.org/r/20231005-ffa_v1-1_notif-v4-10-cddd3237809c@arm.com Signed-off-by: Sudeep Holla --- drivers/firmware/arm_ffa/driver.c | 145 ++++++++++++++++++++++++++++++++++++++ include/linux/arm_ffa.h | 5 ++ 2 files changed, 150 insertions(+) (limited to 'include/linux') diff --git a/drivers/firmware/arm_ffa/driver.c b/drivers/firmware/arm_ffa/driver.c index 04cdb49cc78b..1c3b34eb06e4 100644 --- a/drivers/firmware/arm_ffa/driver.c +++ b/drivers/firmware/arm_ffa/driver.c @@ -27,11 +27,13 @@ #include #include #include +#include #include #include #include #include #include +#include #include #include #include @@ -57,6 +59,8 @@ */ #define RXTX_BUFFER_SIZE SZ_4K +#define FFA_MAX_NOTIFICATIONS 64 + static ffa_fn *invoke_ffa_fn; static const int ffa_linux_errmap[] = { @@ -102,6 +106,8 @@ struct ffa_drv_info { struct work_struct irq_work; struct xarray partition_info; unsigned int partition_count; + DECLARE_HASHTABLE(notifier_hash, ilog2(FFA_MAX_NOTIFICATIONS)); + struct mutex notify_lock; /* lock to protect notifier hashtable */ }; static struct ffa_drv_info *drv_info; @@ -626,6 +632,8 @@ static int ffa_notification_bitmap_destroy(void) #define MAX_IDS_64 20 #define MAX_IDS_32 10 +#define PER_VCPU_NOTIFICATION_FLAG BIT(0) + static int ffa_notification_bind_common(u16 dst_id, u64 bitmap, u32 flags, bool is_bind) { @@ -865,6 +873,21 @@ static int ffa_memory_lend(struct ffa_mem_ops_args *args) return ffa_memory_ops(FFA_MEM_LEND, args); } +#define FFA_SECURE_PARTITION_ID_FLAG BIT(15) + +enum notify_type { + NON_SECURE_VM, + SECURE_PARTITION, + FRAMEWORK, +}; + +struct notifier_cb_info { + struct hlist_node hnode; + ffa_notifier_cb cb; + void *cb_data; + enum notify_type type; +}; + static int ffa_sched_recv_cb_update(u16 part_id, ffa_sched_recv_cb callback, void *cb_data, bool is_registration) { @@ -898,6 +921,123 @@ static int ffa_sched_recv_cb_unregister(struct ffa_device *dev) return ffa_sched_recv_cb_update(dev->vm_id, NULL, NULL, false); } +static int ffa_notification_bind(u16 dst_id, u64 bitmap, u32 flags) +{ + return ffa_notification_bind_common(dst_id, bitmap, flags, true); +} + +static int ffa_notification_unbind(u16 dst_id, u64 bitmap) +{ + return ffa_notification_bind_common(dst_id, bitmap, 0, false); +} + +/* Should be called while the notify_lock is taken */ +static struct notifier_cb_info * +notifier_hash_node_get(u16 notify_id, enum notify_type type) +{ + struct notifier_cb_info *node; + + hash_for_each_possible(drv_info->notifier_hash, node, hnode, notify_id) + if (type == node->type) + return node; + + return NULL; +} + +static int +update_notifier_cb(int notify_id, enum notify_type type, ffa_notifier_cb cb, + void *cb_data, bool is_registration) +{ + struct notifier_cb_info *cb_info = NULL; + bool cb_found; + + cb_info = notifier_hash_node_get(notify_id, type); + cb_found = !!cb_info; + + if (!(is_registration ^ cb_found)) + return -EINVAL; + + if (is_registration) { + cb_info = kzalloc(sizeof(*cb_info), GFP_KERNEL); + if (!cb_info) + return -ENOMEM; + + cb_info->type = type; + cb_info->cb = cb; + cb_info->cb_data = cb_data; + + hash_add(drv_info->notifier_hash, &cb_info->hnode, notify_id); + } else { + hash_del(&cb_info->hnode); + } + + return 0; +} + +static enum notify_type ffa_notify_type_get(u16 vm_id) +{ + if (vm_id & FFA_SECURE_PARTITION_ID_FLAG) + return SECURE_PARTITION; + else + return NON_SECURE_VM; +} + +static int ffa_notify_relinquish(struct ffa_device *dev, int notify_id) +{ + int rc; + enum notify_type type = ffa_notify_type_get(dev->vm_id); + + if (notify_id >= FFA_MAX_NOTIFICATIONS) + return -EINVAL; + + mutex_lock(&drv_info->notify_lock); + + rc = update_notifier_cb(notify_id, type, NULL, NULL, false); + if (rc) { + pr_err("Could not unregister notification callback\n"); + mutex_unlock(&drv_info->notify_lock); + return rc; + } + + rc = ffa_notification_unbind(dev->vm_id, BIT(notify_id)); + + mutex_unlock(&drv_info->notify_lock); + + return rc; +} + +static int ffa_notify_request(struct ffa_device *dev, bool is_per_vcpu, + ffa_notifier_cb cb, void *cb_data, int notify_id) +{ + int rc; + u32 flags = 0; + enum notify_type type = ffa_notify_type_get(dev->vm_id); + + if (notify_id >= FFA_MAX_NOTIFICATIONS) + return -EINVAL; + + mutex_lock(&drv_info->notify_lock); + + if (is_per_vcpu) + flags = PER_VCPU_NOTIFICATION_FLAG; + + rc = ffa_notification_bind(dev->vm_id, BIT(notify_id), flags); + if (rc) { + mutex_unlock(&drv_info->notify_lock); + return rc; + } + + rc = update_notifier_cb(notify_id, type, cb, cb_data, true); + if (rc) { + pr_err("Failed to register callback for %d - %d\n", + notify_id, rc); + ffa_notification_unbind(dev->vm_id, BIT(notify_id)); + } + mutex_unlock(&drv_info->notify_lock); + + return rc; +} + static const struct ffa_info_ops ffa_drv_info_ops = { .api_version_get = ffa_api_version_get, .partition_info_get = ffa_partition_info_get, @@ -921,6 +1061,8 @@ static const struct ffa_cpu_ops ffa_drv_cpu_ops = { static const struct ffa_notifier_ops ffa_drv_notifier_ops = { .sched_recv_cb_register = ffa_sched_recv_cb_register, .sched_recv_cb_unregister = ffa_sched_recv_cb_unregister, + .notify_request = ffa_notify_request, + .notify_relinquish = ffa_notify_relinquish, }; static const struct ffa_ops ffa_drv_ops = { @@ -1194,6 +1336,9 @@ static int ffa_notifications_setup(void) if (ret) goto cleanup; + hash_init(drv_info->notifier_hash); + mutex_init(&drv_info->notify_lock); + return 0; cleanup: ffa_notifications_cleanup(); diff --git a/include/linux/arm_ffa.h b/include/linux/arm_ffa.h index f9cf6114ef82..fb6f388a3737 100644 --- a/include/linux/arm_ffa.h +++ b/include/linux/arm_ffa.h @@ -392,10 +392,15 @@ struct ffa_cpu_ops { }; typedef void (*ffa_sched_recv_cb)(u16 vcpu, bool is_per_vcpu, void *cb_data); +typedef void (*ffa_notifier_cb)(int notify_id, void *cb_data); + struct ffa_notifier_ops { int (*sched_recv_cb_register)(struct ffa_device *dev, ffa_sched_recv_cb cb, void *cb_data); int (*sched_recv_cb_unregister)(struct ffa_device *dev); + int (*notify_request)(struct ffa_device *dev, bool per_vcpu, + ffa_notifier_cb cb, void *cb_data, int notify_id); + int (*notify_relinquish)(struct ffa_device *dev, int notify_id); }; struct ffa_ops { -- cgit v1.2.3 From e5adb3b20e39dbf18651322a9bc24eb55050188f Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Thu, 5 Oct 2023 15:45:04 +0100 Subject: firmware: arm_ffa: Add interface to send a notification to a given partition The framework provides an interface to the sender endpoint to specify the notification to signal to the receiver endpoint. A sender signals a notification by requesting its partition manager to set the corresponding bit in the notifications bitmap of the receiver. Expose the ability to send a notification to another partition. Link: https://lore.kernel.org/r/20231005-ffa_v1-1_notif-v4-11-cddd3237809c@arm.com Signed-off-by: Sudeep Holla --- drivers/firmware/arm_ffa/driver.c | 13 +++++++++++++ include/linux/arm_ffa.h | 2 ++ 2 files changed, 15 insertions(+) (limited to 'include/linux') diff --git a/drivers/firmware/arm_ffa/driver.c b/drivers/firmware/arm_ffa/driver.c index 1c3b34eb06e4..590005234073 100644 --- a/drivers/firmware/arm_ffa/driver.c +++ b/drivers/firmware/arm_ffa/driver.c @@ -1038,6 +1038,18 @@ static int ffa_notify_request(struct ffa_device *dev, bool is_per_vcpu, return rc; } +static int ffa_notify_send(struct ffa_device *dev, int notify_id, + bool is_per_vcpu, u16 vcpu) +{ + u32 flags = 0; + + if (is_per_vcpu) + flags |= (PER_VCPU_NOTIFICATION_FLAG | vcpu << 16); + + return ffa_notification_set(dev->vm_id, drv_info->vm_id, flags, + BIT(notify_id)); +} + static const struct ffa_info_ops ffa_drv_info_ops = { .api_version_get = ffa_api_version_get, .partition_info_get = ffa_partition_info_get, @@ -1063,6 +1075,7 @@ static const struct ffa_notifier_ops ffa_drv_notifier_ops = { .sched_recv_cb_unregister = ffa_sched_recv_cb_unregister, .notify_request = ffa_notify_request, .notify_relinquish = ffa_notify_relinquish, + .notify_send = ffa_notify_send, }; static const struct ffa_ops ffa_drv_ops = { diff --git a/include/linux/arm_ffa.h b/include/linux/arm_ffa.h index fb6f388a3737..f6df81f14b6d 100644 --- a/include/linux/arm_ffa.h +++ b/include/linux/arm_ffa.h @@ -401,6 +401,8 @@ struct ffa_notifier_ops { int (*notify_request)(struct ffa_device *dev, bool per_vcpu, ffa_notifier_cb cb, void *cb_data, int notify_id); int (*notify_relinquish)(struct ffa_device *dev, int notify_id); + int (*notify_send)(struct ffa_device *dev, int notify_id, bool per_vcpu, + u16 vcpu); }; struct ffa_ops { -- cgit v1.2.3 From c9b21ef0d0a87695d7bfeee9a04b89760b49ccf5 Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Thu, 5 Oct 2023 15:45:06 +0100 Subject: firmware: arm_ffa: Simplify the computation of transmit and fragment length The computation of endpoint memory access descriptor's composite memory region descriptor offset is using COMPOSITE_CONSTITUENTS_OFFSET which is unnecessary complicated. Composite memory region descriptor always follow the endpoint memory access descriptor array and hence it is computed accordingly. COMPOSITE_CONSTITUENTS_OFFSET is useless and wrong for any input other than endpoint memory access descriptor count. Let us drop the usage of COMPOSITE_CONSTITUENTS_OFFSET to simplify the computation of total transmit and fragment length in the memory transactions. Link: https://lore.kernel.org/r/20231005-ffa_v1-1_notif-v4-13-cddd3237809c@arm.com Signed-off-by: Sudeep Holla --- drivers/firmware/arm_ffa/driver.c | 14 ++++++++------ include/linux/arm_ffa.h | 2 -- 2 files changed, 8 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/drivers/firmware/arm_ffa/driver.c b/drivers/firmware/arm_ffa/driver.c index 6f688b4a4620..49fcbeb63eaa 100644 --- a/drivers/firmware/arm_ffa/driver.c +++ b/drivers/firmware/arm_ffa/driver.c @@ -442,23 +442,25 @@ ffa_setup_and_transmit(u32 func_id, void *buffer, u32 max_fragsize, { int rc = 0; bool first = true; + u32 composite_offset; phys_addr_t addr = 0; + struct ffa_mem_region *mem_region = buffer; struct ffa_composite_mem_region *composite; struct ffa_mem_region_addr_range *constituents; struct ffa_mem_region_attributes *ep_mem_access; - struct ffa_mem_region *mem_region = buffer; u32 idx, frag_len, length, buf_sz = 0, num_entries = sg_nents(args->sg); mem_region->tag = args->tag; mem_region->flags = args->flags; mem_region->sender_id = drv_info->vm_id; mem_region->attributes = ffa_memory_attributes_get(func_id); - ep_mem_access = &mem_region->ep_mem_access[0]; + ep_mem_access = buffer + COMPOSITE_OFFSET(0); + composite_offset = COMPOSITE_OFFSET(args->nattrs); for (idx = 0; idx < args->nattrs; idx++, ep_mem_access++) { ep_mem_access->receiver = args->attrs[idx].receiver; ep_mem_access->attrs = args->attrs[idx].attrs; - ep_mem_access->composite_off = COMPOSITE_OFFSET(args->nattrs); + ep_mem_access->composite_off = composite_offset; ep_mem_access->flag = 0; ep_mem_access->reserved = 0; } @@ -467,13 +469,13 @@ ffa_setup_and_transmit(u32 func_id, void *buffer, u32 max_fragsize, mem_region->reserved_1 = 0; mem_region->ep_count = args->nattrs; - composite = buffer + COMPOSITE_OFFSET(args->nattrs); + composite = buffer + composite_offset; composite->total_pg_cnt = ffa_get_num_pages_sg(args->sg); composite->addr_range_cnt = num_entries; composite->reserved = 0; - length = COMPOSITE_CONSTITUENTS_OFFSET(args->nattrs, num_entries); - frag_len = COMPOSITE_CONSTITUENTS_OFFSET(args->nattrs, 0); + length = composite_offset + CONSTITUENTS_OFFSET(num_entries); + frag_len = composite_offset + CONSTITUENTS_OFFSET(0); if (frag_len > max_fragsize) return -ENXIO; diff --git a/include/linux/arm_ffa.h b/include/linux/arm_ffa.h index f6df81f14b6d..748d0a83a4bc 100644 --- a/include/linux/arm_ffa.h +++ b/include/linux/arm_ffa.h @@ -356,8 +356,6 @@ struct ffa_mem_region { (offsetof(struct ffa_mem_region, ep_mem_access[x])) #define CONSTITUENTS_OFFSET(x) \ (offsetof(struct ffa_composite_mem_region, constituents[x])) -#define COMPOSITE_CONSTITUENTS_OFFSET(x, y) \ - (COMPOSITE_OFFSET(x) + CONSTITUENTS_OFFSET(y)) struct ffa_mem_ops_args { bool use_txbuf; -- cgit v1.2.3 From bc5bc309db45a7ab218ce8259ba9bc7659be61ca Mon Sep 17 00:00:00 2001 From: Yafang Shao Date: Thu, 5 Oct 2023 08:41:23 +0000 Subject: bpf: Inherit system settings for CPU security mitigations Currently, there exists a system-wide setting related to CPU security mitigations, denoted as 'mitigations='. When set to 'mitigations=off', it deactivates all optional CPU mitigations. Therefore, if we implement a system-wide 'mitigations=off' setting, it should inherently bypass Spectre v1 and Spectre v4 in the BPF subsystem. Please note that there is also a more specific 'nospectre_v1' setting on x86 and ppc architectures, though it is not currently exported. For the time being, let's disregard more fine-grained options. This idea emerged during our discussion about potential Spectre v1 attacks with Luis [0]. [0] https://lore.kernel.org/bpf/b4fc15f7-b204-767e-ebb9-fdb4233961fb@iogearbox.net Signed-off-by: Yafang Shao Signed-off-by: Daniel Borkmann Acked-by: Stanislav Fomichev Acked-by: Song Liu Acked-by: KP Singh Cc: Luis Gerhorst Link: https://lore.kernel.org/bpf/20231005084123.1338-1-laoar.shao@gmail.com --- include/linux/bpf.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index a82efd34b741..61bde4520f5c 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -2164,12 +2164,12 @@ static inline bool bpf_allow_uninit_stack(void) static inline bool bpf_bypass_spec_v1(void) { - return perfmon_capable(); + return perfmon_capable() || cpu_mitigations_off(); } static inline bool bpf_bypass_spec_v4(void) { - return perfmon_capable(); + return perfmon_capable() || cpu_mitigations_off(); } int bpf_map_new_fd(struct bpf_map *map, int flags); -- cgit v1.2.3 From e7a1b32e43b194bbf930281ae7f5149c420cd122 Mon Sep 17 00:00:00 2001 From: Pierre Gondois Date: Thu, 5 Oct 2023 15:41:20 +0200 Subject: cpufreq: Rebuild sched-domains when removing cpufreq driver The Energy Aware Scheduler (EAS) relies on the schedutil governor. When moving to/from the schedutil governor, sched domains must be rebuilt to allow re-evaluating the enablement conditions of EAS. This is done through sched_cpufreq_governor_change(). Having a cpufreq governor assumes a cpufreq driver is running. Inserting/removing a cpufreq driver should trigger a re-evaluation of EAS enablement conditions, avoiding to see EAS enabled when removing a running cpufreq driver. Rebuild the sched domains in schedutil's sugov_init()/sugov_exit(), allowing to check EAS's enablement condition whenever schedutil governor is initialized/exited from. Move relevant code up in schedutil.c to avoid a split and conditional function declaration. Rename sched_cpufreq_governor_change() to sugov_eas_rebuild_sd(). Signed-off-by: Pierre Gondois Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 3 +-- include/linux/cpufreq.h | 8 ------ kernel/sched/cpufreq_schedutil.c | 55 +++++++++++++++++++++------------------- 3 files changed, 30 insertions(+), 36 deletions(-) (limited to 'include/linux') diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 60ed89000e82..4bc15634d49c 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1544,7 +1544,7 @@ static int cpufreq_online(unsigned int cpu) /* * Register with the energy model before - * sched_cpufreq_governor_change() is called, which will result + * sugov_eas_rebuild_sd() is called, which will result * in rebuilding of the sched domains, which should only be done * once the energy model is properly initialized for the policy * first. @@ -2652,7 +2652,6 @@ static int cpufreq_set_policy(struct cpufreq_policy *policy, ret = cpufreq_start_governor(policy); if (!ret) { pr_debug("governor change\n"); - sched_cpufreq_governor_change(policy, old_gov); return 0; } cpufreq_exit_governor(policy); diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 71d186d6933a..1c5ca92a0555 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -1193,14 +1193,6 @@ static inline int of_perf_domain_get_sharing_cpumask(int pcpu, const char *list_ } #endif -#if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL) -void sched_cpufreq_governor_change(struct cpufreq_policy *policy, - struct cpufreq_governor *old_gov); -#else -static inline void sched_cpufreq_governor_change(struct cpufreq_policy *policy, - struct cpufreq_governor *old_gov) { } -#endif - extern unsigned int arch_freq_get_on_cpu(int cpu); #ifndef arch_set_freq_scale diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index f3a95def49cc..492ec650d48f 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -555,6 +555,31 @@ static const struct kobj_type sugov_tunables_ktype = { /********************** cpufreq governor interface *********************/ +#ifdef CONFIG_ENERGY_MODEL +static void rebuild_sd_workfn(struct work_struct *work) +{ + rebuild_sched_domains_energy(); +} + +static DECLARE_WORK(rebuild_sd_work, rebuild_sd_workfn); + +/* + * EAS shouldn't be attempted without sugov, so rebuild the sched_domains + * on governor changes to make sure the scheduler knows about it. + */ +static void sugov_eas_rebuild_sd(void) +{ + /* + * When called from the cpufreq_register_driver() path, the + * cpu_hotplug_lock is already held, so use a work item to + * avoid nested locking in rebuild_sched_domains(). + */ + schedule_work(&rebuild_sd_work); +} +#else +static inline void sugov_eas_rebuild_sd(void) { }; +#endif + struct cpufreq_governor schedutil_gov; static struct sugov_policy *sugov_policy_alloc(struct cpufreq_policy *policy) @@ -709,6 +734,8 @@ static int sugov_init(struct cpufreq_policy *policy) if (ret) goto fail; + sugov_eas_rebuild_sd(); + out: mutex_unlock(&global_tunables_lock); return 0; @@ -750,6 +777,8 @@ static void sugov_exit(struct cpufreq_policy *policy) sugov_kthread_stop(sg_policy); sugov_policy_free(sg_policy); cpufreq_disable_fast_switch(policy); + + sugov_eas_rebuild_sd(); } static int sugov_start(struct cpufreq_policy *policy) @@ -833,29 +862,3 @@ struct cpufreq_governor *cpufreq_default_governor(void) #endif cpufreq_governor_init(schedutil_gov); - -#ifdef CONFIG_ENERGY_MODEL -static void rebuild_sd_workfn(struct work_struct *work) -{ - rebuild_sched_domains_energy(); -} -static DECLARE_WORK(rebuild_sd_work, rebuild_sd_workfn); - -/* - * EAS shouldn't be attempted without sugov, so rebuild the sched_domains - * on governor changes to make sure the scheduler knows about it. - */ -void sched_cpufreq_governor_change(struct cpufreq_policy *policy, - struct cpufreq_governor *old_gov) -{ - if (old_gov == &schedutil_gov || policy->governor == &schedutil_gov) { - /* - * When called from the cpufreq_register_driver() path, the - * cpu_hotplug_lock is already held, so use a work item to - * avoid nested locking in rebuild_sched_domains(). - */ - schedule_work(&rebuild_sd_work); - } - -} -#endif -- cgit v1.2.3 From 51a23b1be92046f0cc52384d30cf060700f1a54e Mon Sep 17 00:00:00 2001 From: Li Zhijian Date: Wed, 2 Aug 2023 17:28:56 +0800 Subject: acpi,mm: fix typo sibiling -> sibling First found this typo as reviewing memory tier code. Fix it by sed like: $ sed -i 's/sibiling/sibling/g' $(git grep -l sibiling) so the acpi one will be corrected as well. Link: https://lkml.kernel.org/r/20230802092856.819328-1-lizhijian@cn.fujitsu.com Signed-off-by: Li Zhijian Cc: Aneesh Kumar K.V Cc: Huang, Ying Cc: Len Brown Cc: "Rafael J. Wysocki" Signed-off-by: Andrew Morton --- drivers/acpi/acpi_pad.c | 2 +- include/linux/memory-tiers.h | 2 +- mm/memory-tiers.c | 10 +++++----- 3 files changed, 7 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/drivers/acpi/acpi_pad.c b/drivers/acpi/acpi_pad.c index 7a453c5ff303..7f073ca64f0e 100644 --- a/drivers/acpi/acpi_pad.c +++ b/drivers/acpi/acpi_pad.c @@ -100,7 +100,7 @@ static void round_robin_cpu(unsigned int tsk_index) for_each_cpu(cpu, pad_busy_cpus) cpumask_or(tmp, tmp, topology_sibling_cpumask(cpu)); cpumask_andnot(tmp, cpu_online_mask, tmp); - /* avoid HT sibilings if possible */ + /* avoid HT siblings if possible */ if (cpumask_empty(tmp)) cpumask_andnot(tmp, cpu_online_mask, pad_busy_cpus); if (cpumask_empty(tmp)) { diff --git a/include/linux/memory-tiers.h b/include/linux/memory-tiers.h index 437441cdf78f..4fa178b50784 100644 --- a/include/linux/memory-tiers.h +++ b/include/linux/memory-tiers.h @@ -22,7 +22,7 @@ struct memory_tier; struct memory_dev_type { /* list of memory types that are part of same tier as this type */ - struct list_head tier_sibiling; + struct list_head tier_sibling; /* abstract distance for this specific memory type */ int adistance; /* Nodes of same abstract distance */ diff --git a/mm/memory-tiers.c b/mm/memory-tiers.c index 37a4f59d9585..876d8a5e210e 100644 --- a/mm/memory-tiers.c +++ b/mm/memory-tiers.c @@ -115,7 +115,7 @@ static __always_inline nodemask_t get_memtier_nodemask(struct memory_tier *memti nodemask_t nodes = NODE_MASK_NONE; struct memory_dev_type *memtype; - list_for_each_entry(memtype, &memtier->memory_types, tier_sibiling) + list_for_each_entry(memtype, &memtier->memory_types, tier_sibling) nodes_or(nodes, nodes, memtype->nodes); return nodes; @@ -174,7 +174,7 @@ static struct memory_tier *find_create_memory_tier(struct memory_dev_type *memty * If the memtype is already part of a memory tier, * just return that. */ - if (!list_empty(&memtype->tier_sibiling)) { + if (!list_empty(&memtype->tier_sibling)) { list_for_each_entry(memtier, &memory_tiers, list) { if (adistance == memtier->adistance_start) return memtier; @@ -218,7 +218,7 @@ static struct memory_tier *find_create_memory_tier(struct memory_dev_type *memty memtier = new_memtier; link_memtype: - list_add(&memtype->tier_sibiling, &memtier->memory_types); + list_add(&memtype->tier_sibling, &memtier->memory_types); return memtier; } @@ -527,7 +527,7 @@ static bool clear_node_memory_tier(int node) memtype = node_memory_types[node].memtype; node_clear(node, memtype->nodes); if (nodes_empty(memtype->nodes)) { - list_del_init(&memtype->tier_sibiling); + list_del_init(&memtype->tier_sibling); if (list_empty(&memtier->memory_types)) destroy_memory_tier(memtier); } @@ -553,7 +553,7 @@ struct memory_dev_type *alloc_memory_type(int adistance) return ERR_PTR(-ENOMEM); memtype->adistance = adistance; - INIT_LIST_HEAD(&memtype->tier_sibiling); + INIT_LIST_HEAD(&memtype->tier_sibling); memtype->nodes = NODE_MASK_NONE; kref_init(&memtype->kref); return memtype; -- cgit v1.2.3 From 55d2a0bd5eadaade850efa9d3a7ffbb0aeb67198 Mon Sep 17 00:00:00 2001 From: Baolin Wang Date: Mon, 18 Sep 2023 14:31:42 +0800 Subject: mm: add statistics for PUD level pagetable Recently, we found that cross-die access to pagetable pages on ARM64 machines can cause performance fluctuations in our business. Currently, there are no PMU events available to track this situation on our ARM64 machines, so accurate pagetable accounting can help to analyze this issue, but now the PUD level pagetable accounting is missed. So introduce pagetable_pud_ctor/dtor() to help to get accurate PUD pagetable accounting, as well as converting the architectures which use generic PUD pagetable allocation to add corresponding PUD pagetable accounting. Moreover this patch will mark the PUD level pagetable with PG_table flag, which will help to do sanity validation in unpoison_memory(). On my testing machine, I can see more pagetables statistics after the patch with page-types tool: Before patch: flags page-count MB symbolic-flags long-symbolic-flags 0x0000000004000000 27326 106 __________________________g_________________ pgtable After patch: 0x0000000004000000 27541 107 __________________________g_________________ pgtable Link: https://lkml.kernel.org/r/876c71c03a7e69c17722a690e3225a4f7b172fb2.1695017383.git.baolin.wang@linux.alibaba.com Signed-off-by: Baolin Wang Acked-by: Mike Rapoport (IBM) Acked-by: Vishal Moola (Oracle) Cc: Andy Lutomirski Cc: Aneesh Kumar K.V Cc: Arnd Bergmann Cc: Borislav Petkov Cc: Catalin Marinas Cc: Dave Hansen Cc: Huacai Chen Cc: Ingo Molnar Cc: Matthew Wilcox (Oracle) Cc: Nicholas Piggin Cc: Peter Zijlstra Cc: Thomas Bogendoerfer Cc: Thomas Gleixner Cc: Will Deacon Signed-off-by: Andrew Morton --- arch/arm64/include/asm/tlb.h | 5 ++++- arch/loongarch/include/asm/pgalloc.h | 1 + arch/mips/include/asm/pgalloc.h | 1 + arch/x86/mm/pgtable.c | 3 +++ include/asm-generic/pgalloc.h | 7 ++++++- include/linux/mm.h | 16 ++++++++++++++++ 6 files changed, 31 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/arch/arm64/include/asm/tlb.h b/arch/arm64/include/asm/tlb.h index 2c29239d05c3..846c563689a8 100644 --- a/arch/arm64/include/asm/tlb.h +++ b/arch/arm64/include/asm/tlb.h @@ -96,7 +96,10 @@ static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmdp, static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pudp, unsigned long addr) { - tlb_remove_ptdesc(tlb, virt_to_ptdesc(pudp)); + struct ptdesc *ptdesc = virt_to_ptdesc(pudp); + + pagetable_pud_dtor(ptdesc); + tlb_remove_ptdesc(tlb, ptdesc); } #endif diff --git a/arch/loongarch/include/asm/pgalloc.h b/arch/loongarch/include/asm/pgalloc.h index 79470f0b4f1d..4e2d6b7ca2ee 100644 --- a/arch/loongarch/include/asm/pgalloc.h +++ b/arch/loongarch/include/asm/pgalloc.h @@ -84,6 +84,7 @@ static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long address) if (!ptdesc) return NULL; + pagetable_pud_ctor(ptdesc); pud = ptdesc_address(ptdesc); pud_init(pud); diff --git a/arch/mips/include/asm/pgalloc.h b/arch/mips/include/asm/pgalloc.h index 40e40a7eb94a..f4440edcd8fe 100644 --- a/arch/mips/include/asm/pgalloc.h +++ b/arch/mips/include/asm/pgalloc.h @@ -95,6 +95,7 @@ static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long address) if (!ptdesc) return NULL; + pagetable_pud_ctor(ptdesc); pud = ptdesc_address(ptdesc); pud_init(pud); diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index 9deadf517f14..0cbc1b8e8e3d 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -76,6 +76,9 @@ void ___pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd) #if CONFIG_PGTABLE_LEVELS > 3 void ___pud_free_tlb(struct mmu_gather *tlb, pud_t *pud) { + struct ptdesc *ptdesc = virt_to_ptdesc(pud); + + pagetable_pud_dtor(ptdesc); paravirt_release_pud(__pa(pud) >> PAGE_SHIFT); paravirt_tlb_remove_table(tlb, virt_to_page(pud)); } diff --git a/include/asm-generic/pgalloc.h b/include/asm-generic/pgalloc.h index c75d4a753849..879e5f8aa5e9 100644 --- a/include/asm-generic/pgalloc.h +++ b/include/asm-generic/pgalloc.h @@ -169,6 +169,8 @@ static inline pud_t *__pud_alloc_one(struct mm_struct *mm, unsigned long addr) ptdesc = pagetable_alloc(gfp, 0); if (!ptdesc) return NULL; + + pagetable_pud_ctor(ptdesc); return ptdesc_address(ptdesc); } @@ -190,8 +192,11 @@ static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr) static inline void __pud_free(struct mm_struct *mm, pud_t *pud) { + struct ptdesc *ptdesc = virt_to_ptdesc(pud); + BUG_ON((unsigned long)pud & (PAGE_SIZE-1)); - pagetable_free(virt_to_ptdesc(pud)); + pagetable_pud_dtor(ptdesc); + pagetable_free(ptdesc); } #ifndef __HAVE_ARCH_PUD_FREE diff --git a/include/linux/mm.h b/include/linux/mm.h index 31dc25d3f6b5..126b54b45442 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -3049,6 +3049,22 @@ static inline spinlock_t *pud_lock(struct mm_struct *mm, pud_t *pud) return ptl; } +static inline void pagetable_pud_ctor(struct ptdesc *ptdesc) +{ + struct folio *folio = ptdesc_folio(ptdesc); + + __folio_set_pgtable(folio); + lruvec_stat_add_folio(folio, NR_PAGETABLE); +} + +static inline void pagetable_pud_dtor(struct ptdesc *ptdesc) +{ + struct folio *folio = ptdesc_folio(ptdesc); + + __folio_clear_pgtable(folio); + lruvec_stat_sub_folio(folio, NR_PAGETABLE); +} + extern void __init pagecache_init(void); extern void free_initmem(void); -- cgit v1.2.3 From 7ced098fcfe596feab3cea4f40128b0119c7bf1a Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Mon, 18 Sep 2023 22:18:32 +0200 Subject: mm: document mmu_notifier_invalidate_range_start_nonblock() Document what mmu_notifier_invalidate_range_start_nonblock() is for. Also add a __must_check annotation to signal that callers must bail out if a notifier vetoes the operation. Link: https://lkml.kernel.org/r/20230918201832.265108-1-jannh@google.com Signed-off-by: Jann Horn Reviewed-by: Jason Gunthorpe Reviewed-by: Alistair Popple Signed-off-by: Andrew Morton --- include/linux/mmu_notifier.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h index 6e3c857606f1..f349e08a9dfe 100644 --- a/include/linux/mmu_notifier.h +++ b/include/linux/mmu_notifier.h @@ -459,7 +459,14 @@ mmu_notifier_invalidate_range_start(struct mmu_notifier_range *range) lock_map_release(&__mmu_notifier_invalidate_range_start_map); } -static inline int +/* + * This version of mmu_notifier_invalidate_range_start() avoids blocking, but it + * can return an error if a notifier can't proceed without blocking, in which + * case you're not allowed to modify PTEs in the specified range. + * + * This is mainly intended for OOM handling. + */ +static inline int __must_check mmu_notifier_invalidate_range_start_nonblock(struct mmu_notifier_range *range) { int ret = 0; -- cgit v1.2.3 From 24e41bf8a6b424c76c5902fb999e9eca61bdf83d Mon Sep 17 00:00:00 2001 From: Florent Revest Date: Mon, 28 Aug 2023 17:08:57 +0200 Subject: mm: add a NO_INHERIT flag to the PR_SET_MDWE prctl This extends the current PR_SET_MDWE prctl arg with a bit to indicate that the process doesn't want MDWE protection to propagate to children. To implement this no-inherit mode, the tag in current->mm->flags must be absent from MMF_INIT_MASK. This means that the encoding for "MDWE but without inherit" is different in the prctl than in the mm flags. This leads to a bit of bit-mangling in the prctl implementation. Link: https://lkml.kernel.org/r/20230828150858.393570-6-revest@chromium.org Signed-off-by: Florent Revest Reviewed-by: Kees Cook Reviewed-by: Catalin Marinas Cc: Alexey Izbyshev Cc: Anshuman Khandual Cc: Ayush Jain Cc: David Hildenbrand Cc: Greg Thelen Cc: Joey Gouly Cc: KP Singh Cc: Mark Brown Cc: Michal Hocko Cc: Peter Xu Cc: Ryan Roberts Cc: Szabolcs Nagy Cc: Topi Miettinen Signed-off-by: Andrew Morton --- include/linux/sched/coredump.h | 10 ++++++++++ include/uapi/linux/prctl.h | 1 + kernel/fork.c | 2 +- kernel/sys.c | 32 ++++++++++++++++++++++++++------ tools/include/uapi/linux/prctl.h | 1 + 5 files changed, 39 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched/coredump.h b/include/linux/sched/coredump.h index 0ee96ea7a0e9..1b37fa8fc723 100644 --- a/include/linux/sched/coredump.h +++ b/include/linux/sched/coredump.h @@ -91,4 +91,14 @@ static inline int get_dumpable(struct mm_struct *mm) MMF_DISABLE_THP_MASK | MMF_HAS_MDWE_MASK) #define MMF_VM_MERGE_ANY 29 +#define MMF_HAS_MDWE_NO_INHERIT 30 + +static inline unsigned long mmf_init_flags(unsigned long flags) +{ + if (flags & (1UL << MMF_HAS_MDWE_NO_INHERIT)) + flags &= ~((1UL << MMF_HAS_MDWE) | + (1UL << MMF_HAS_MDWE_NO_INHERIT)); + return flags & MMF_INIT_MASK; +} + #endif /* _LINUX_SCHED_COREDUMP_H */ diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h index 9a85c69782bd..370ed14b1ae0 100644 --- a/include/uapi/linux/prctl.h +++ b/include/uapi/linux/prctl.h @@ -284,6 +284,7 @@ struct prctl_mm_map { /* Memory deny write / execute */ #define PR_SET_MDWE 65 # define PR_MDWE_REFUSE_EXEC_GAIN (1UL << 0) +# define PR_MDWE_NO_INHERIT (1UL << 1) #define PR_GET_MDWE 66 diff --git a/kernel/fork.c b/kernel/fork.c index 1779183a7cb3..e45a4457ba83 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1288,7 +1288,7 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p, hugetlb_count_init(mm); if (current->mm) { - mm->flags = current->mm->flags & MMF_INIT_MASK; + mm->flags = mmf_init_flags(current->mm->flags); mm->def_flags = current->mm->def_flags & VM_INIT_DEF_MASK; } else { mm->flags = default_dump_filter; diff --git a/kernel/sys.c b/kernel/sys.c index 2410e3999ebe..4a8073c1b255 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -2368,19 +2368,41 @@ static int prctl_set_vma(unsigned long opt, unsigned long start, } #endif /* CONFIG_ANON_VMA_NAME */ +static inline unsigned long get_current_mdwe(void) +{ + unsigned long ret = 0; + + if (test_bit(MMF_HAS_MDWE, ¤t->mm->flags)) + ret |= PR_MDWE_REFUSE_EXEC_GAIN; + if (test_bit(MMF_HAS_MDWE_NO_INHERIT, ¤t->mm->flags)) + ret |= PR_MDWE_NO_INHERIT; + + return ret; +} + static inline int prctl_set_mdwe(unsigned long bits, unsigned long arg3, unsigned long arg4, unsigned long arg5) { + unsigned long current_bits; + if (arg3 || arg4 || arg5) return -EINVAL; - if (bits & ~(PR_MDWE_REFUSE_EXEC_GAIN)) + if (bits & ~(PR_MDWE_REFUSE_EXEC_GAIN | PR_MDWE_NO_INHERIT)) + return -EINVAL; + + /* NO_INHERIT only makes sense with REFUSE_EXEC_GAIN */ + if (bits & PR_MDWE_NO_INHERIT && !(bits & PR_MDWE_REFUSE_EXEC_GAIN)) return -EINVAL; + current_bits = get_current_mdwe(); + if (current_bits && current_bits != bits) + return -EPERM; /* Cannot unset the flags */ + + if (bits & PR_MDWE_NO_INHERIT) + set_bit(MMF_HAS_MDWE_NO_INHERIT, ¤t->mm->flags); if (bits & PR_MDWE_REFUSE_EXEC_GAIN) set_bit(MMF_HAS_MDWE, ¤t->mm->flags); - else if (test_bit(MMF_HAS_MDWE, ¤t->mm->flags)) - return -EPERM; /* Cannot unset the flag */ return 0; } @@ -2390,9 +2412,7 @@ static inline int prctl_get_mdwe(unsigned long arg2, unsigned long arg3, { if (arg2 || arg3 || arg4 || arg5) return -EINVAL; - - return test_bit(MMF_HAS_MDWE, ¤t->mm->flags) ? - PR_MDWE_REFUSE_EXEC_GAIN : 0; + return get_current_mdwe(); } static int prctl_get_auxv(void __user *addr, unsigned long len) diff --git a/tools/include/uapi/linux/prctl.h b/tools/include/uapi/linux/prctl.h index 9a85c69782bd..370ed14b1ae0 100644 --- a/tools/include/uapi/linux/prctl.h +++ b/tools/include/uapi/linux/prctl.h @@ -284,6 +284,7 @@ struct prctl_mm_map { /* Memory deny write / execute */ #define PR_SET_MDWE 65 # define PR_MDWE_REFUSE_EXEC_GAIN (1UL << 0) +# define PR_MDWE_NO_INHERIT (1UL << 1) #define PR_GET_MDWE 66 -- cgit v1.2.3 From 7e845ecb2fbfa1bf800e703df29ee2e06592c2a0 Mon Sep 17 00:00:00 2001 From: Sui Jingfeng Date: Wed, 30 Aug 2023 19:15:28 +0800 Subject: PCI: Add pci_is_vga() helper The PCI Code and ID Assignment spec, r1.15, secs 1.4 and 1.1, define VGA Base Class and Sub-Classes: 03 00 PCI_CLASS_DISPLAY_VGA VGA-compatible or 8514-compatible 00 01 PCI_CLASS_NOT_DEFINED_VGA VGA-compatible (before Class Code) Add a pci_is_vga() helper to return true if a device is in either category. These VGA devices use the hardwired legacy VGA resources ([mem 0xa0000-0xbffff], [io 0x3b0-0x3bb], [io 0x3c0-0x3df] and aliases), so they require special handling if more than one is present in the system. Link: https://lore.kernel.org/r/20230830111532.444535-2-sui.jingfeng@linux.dev Signed-off-by: Sui Jingfeng [bhelgaas: commit log, drop !pdev test] Signed-off-by: Bjorn Helgaas Cc: "Maciej W. Rozycki" --- include/linux/pci.h | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 8c7c2c3c6c65..7bab234391cb 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -713,6 +713,30 @@ static inline bool pci_is_bridge(struct pci_dev *dev) dev->hdr_type == PCI_HEADER_TYPE_CARDBUS; } +/** + * pci_is_vga - check if the PCI device is a VGA device + * + * The PCI Code and ID Assignment spec, r1.15, secs 1.4 and 1.1, define + * VGA Base Class and Sub-Classes: + * + * 03 00 PCI_CLASS_DISPLAY_VGA VGA-compatible or 8514-compatible + * 00 01 PCI_CLASS_NOT_DEFINED_VGA VGA-compatible (before Class Code) + * + * Return true if the PCI device is a VGA device and uses the legacy VGA + * resources ([mem 0xa0000-0xbffff], [io 0x3b0-0x3bb], [io 0x3c0-0x3df] and + * aliases). + */ +static inline bool pci_is_vga(struct pci_dev *pdev) +{ + if ((pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA) + return true; + + if ((pdev->class >> 8) == PCI_CLASS_NOT_DEFINED_VGA) + return true; + + return false; +} + #define for_each_pci_bridge(dev, bus) \ list_for_each_entry(dev, &bus->devices, bus_list) \ if (!pci_is_bridge(dev)) {} else -- cgit v1.2.3 From 3abbd0699b678fc48e0100704338cff9180fe4bb Mon Sep 17 00:00:00 2001 From: Giulio Benetti Date: Thu, 5 Oct 2023 20:29:15 +0200 Subject: net: phy: broadcom: add support for BCM5221 phy This patch adds the BCM5221 PHY support by reusing brcm_fet_*() callbacks and adding quirks for BCM5221 when needed. Cc: Jim Reinhart Cc: James Autry Cc: Matthew Maron Signed-off-by: Giulio Benetti Reviewed-by: Florian Fainelli Link: https://lore.kernel.org/r/20231005182915.153815-1-giulio.benetti@benettiengineering.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/broadcom.c | 154 +++++++++++++++++++++++++++++++++++---------- include/linux/brcmphy.h | 10 +++ 2 files changed, 131 insertions(+), 33 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/phy/broadcom.c b/drivers/net/phy/broadcom.c index 04b2e6eeb195..3a627105675a 100644 --- a/drivers/net/phy/broadcom.c +++ b/drivers/net/phy/broadcom.c @@ -704,16 +704,21 @@ static int brcm_fet_config_init(struct phy_device *phydev) if (err < 0 && err != -EIO) return err; + /* Read to clear status bits */ reg = phy_read(phydev, MII_BRCM_FET_INTREG); if (reg < 0) return reg; /* Unmask events we are interested in and mask interrupts globally. */ - reg = MII_BRCM_FET_IR_DUPLEX_EN | - MII_BRCM_FET_IR_SPEED_EN | - MII_BRCM_FET_IR_LINK_EN | - MII_BRCM_FET_IR_ENABLE | - MII_BRCM_FET_IR_MASK; + if (phydev->phy_id == PHY_ID_BCM5221) + reg = MII_BRCM_FET_IR_ENABLE | + MII_BRCM_FET_IR_MASK; + else + reg = MII_BRCM_FET_IR_DUPLEX_EN | + MII_BRCM_FET_IR_SPEED_EN | + MII_BRCM_FET_IR_LINK_EN | + MII_BRCM_FET_IR_ENABLE | + MII_BRCM_FET_IR_MASK; err = phy_write(phydev, MII_BRCM_FET_INTREG, reg); if (err < 0) @@ -726,42 +731,49 @@ static int brcm_fet_config_init(struct phy_device *phydev) reg = brcmtest | MII_BRCM_FET_BT_SRE; - err = phy_write(phydev, MII_BRCM_FET_BRCMTEST, reg); - if (err < 0) - return err; + phy_lock_mdio_bus(phydev); - /* Set the LED mode */ - reg = phy_read(phydev, MII_BRCM_FET_SHDW_AUXMODE4); - if (reg < 0) { - err = reg; - goto done; + err = __phy_write(phydev, MII_BRCM_FET_BRCMTEST, reg); + if (err < 0) { + phy_unlock_mdio_bus(phydev); + return err; } - reg &= ~MII_BRCM_FET_SHDW_AM4_LED_MASK; - reg |= MII_BRCM_FET_SHDW_AM4_LED_MODE1; + if (phydev->phy_id != PHY_ID_BCM5221) { + /* Set the LED mode */ + reg = __phy_read(phydev, MII_BRCM_FET_SHDW_AUXMODE4); + if (reg < 0) { + err = reg; + goto done; + } - err = phy_write(phydev, MII_BRCM_FET_SHDW_AUXMODE4, reg); - if (err < 0) - goto done; + err = __phy_modify(phydev, MII_BRCM_FET_SHDW_AUXMODE4, + MII_BRCM_FET_SHDW_AM4_LED_MASK, + MII_BRCM_FET_SHDW_AM4_LED_MODE1); + if (err < 0) + goto done; - /* Enable auto MDIX */ - err = phy_set_bits(phydev, MII_BRCM_FET_SHDW_MISCCTRL, - MII_BRCM_FET_SHDW_MC_FAME); - if (err < 0) - goto done; + /* Enable auto MDIX */ + err = __phy_set_bits(phydev, MII_BRCM_FET_SHDW_MISCCTRL, + MII_BRCM_FET_SHDW_MC_FAME); + if (err < 0) + goto done; + } if (phydev->dev_flags & PHY_BRCM_AUTO_PWRDWN_ENABLE) { /* Enable auto power down */ - err = phy_set_bits(phydev, MII_BRCM_FET_SHDW_AUXSTAT2, - MII_BRCM_FET_SHDW_AS2_APDE); + err = __phy_set_bits(phydev, MII_BRCM_FET_SHDW_AUXSTAT2, + MII_BRCM_FET_SHDW_AS2_APDE); } done: /* Disable shadow register access */ - err2 = phy_write(phydev, MII_BRCM_FET_BRCMTEST, brcmtest); + err2 = __phy_write(phydev, MII_BRCM_FET_BRCMTEST, brcmtest); if (!err) err = err2; + phy_unlock_mdio_bus(phydev); + return err; } @@ -840,23 +852,86 @@ static int brcm_fet_suspend(struct phy_device *phydev) reg = brcmtest | MII_BRCM_FET_BT_SRE; - err = phy_write(phydev, MII_BRCM_FET_BRCMTEST, reg); - if (err < 0) + phy_lock_mdio_bus(phydev); + + err = __phy_write(phydev, MII_BRCM_FET_BRCMTEST, reg); + if (err < 0) { + phy_unlock_mdio_bus(phydev); return err; + } + + if (phydev->phy_id == PHY_ID_BCM5221) + /* Force Low Power Mode with clock enabled */ + reg = BCM5221_SHDW_AM4_EN_CLK_LPM | BCM5221_SHDW_AM4_FORCE_LPM; + else + /* Set standby mode */ + reg = MII_BRCM_FET_SHDW_AM4_STANDBY; - /* Set standby mode */ - err = phy_modify(phydev, MII_BRCM_FET_SHDW_AUXMODE4, - MII_BRCM_FET_SHDW_AM4_STANDBY, - MII_BRCM_FET_SHDW_AM4_STANDBY); + err = __phy_set_bits(phydev, MII_BRCM_FET_SHDW_AUXMODE4, reg); /* Disable shadow register access */ - err2 = phy_write(phydev, MII_BRCM_FET_BRCMTEST, brcmtest); + err2 = __phy_write(phydev, MII_BRCM_FET_BRCMTEST, brcmtest); if (!err) err = err2; + phy_unlock_mdio_bus(phydev); + return err; } +static int bcm5221_config_aneg(struct phy_device *phydev) +{ + int ret, val; + + ret = genphy_config_aneg(phydev); + if (ret) + return ret; + + switch (phydev->mdix_ctrl) { + case ETH_TP_MDI: + val = BCM5221_AEGSR_MDIX_DIS; + break; + case ETH_TP_MDI_X: + val = BCM5221_AEGSR_MDIX_DIS | BCM5221_AEGSR_MDIX_MAN_SWAP; + break; + case ETH_TP_MDI_AUTO: + val = 0; + break; + default: + return 0; + } + + return phy_modify(phydev, BCM5221_AEGSR, BCM5221_AEGSR_MDIX_MAN_SWAP | + BCM5221_AEGSR_MDIX_DIS, + val); +} + +static int bcm5221_read_status(struct phy_device *phydev) +{ + int ret; + + /* Read MDIX status */ + ret = phy_read(phydev, BCM5221_AEGSR); + if (ret < 0) + return ret; + + if (ret & BCM5221_AEGSR_MDIX_DIS) { + if (ret & BCM5221_AEGSR_MDIX_MAN_SWAP) + phydev->mdix_ctrl = ETH_TP_MDI_X; + else + phydev->mdix_ctrl = ETH_TP_MDI; + } else { + phydev->mdix_ctrl = ETH_TP_MDI_AUTO; + } + + if (ret & BCM5221_AEGSR_MDIX_STATUS) + phydev->mdix = ETH_TP_MDI_X; + else + phydev->mdix = ETH_TP_MDI; + + return genphy_read_status(phydev); +} + static void bcm54xx_phy_get_wol(struct phy_device *phydev, struct ethtool_wolinfo *wol) { @@ -1221,6 +1296,18 @@ static struct phy_driver broadcom_drivers[] = { .handle_interrupt = brcm_fet_handle_interrupt, .suspend = brcm_fet_suspend, .resume = brcm_fet_config_init, +}, { + .phy_id = PHY_ID_BCM5221, + .phy_id_mask = 0xfffffff0, + .name = "Broadcom BCM5221", + /* PHY_BASIC_FEATURES */ + .config_init = brcm_fet_config_init, + .config_intr = brcm_fet_config_intr, + .handle_interrupt = brcm_fet_handle_interrupt, + .suspend = brcm_fet_suspend, + .resume = brcm_fet_config_init, + .config_aneg = bcm5221_config_aneg, + .read_status = bcm5221_read_status, }, { .phy_id = PHY_ID_BCM5395, .phy_id_mask = 0xfffffff0, @@ -1296,6 +1383,7 @@ static struct mdio_device_id __maybe_unused broadcom_tbl[] = { { PHY_ID_BCM50610M, 0xfffffff0 }, { PHY_ID_BCM57780, 0xfffffff0 }, { PHY_ID_BCMAC131, 0xfffffff0 }, + { PHY_ID_BCM5221, 0xfffffff0 }, { PHY_ID_BCM5241, 0xfffffff0 }, { PHY_ID_BCM5395, 0xfffffff0 }, { PHY_ID_BCM53125, 0xfffffff0 }, diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h index c55810a43541..1394ba302367 100644 --- a/include/linux/brcmphy.h +++ b/include/linux/brcmphy.h @@ -11,6 +11,7 @@ #define PHY_ID_BCM50610 0x0143bd60 #define PHY_ID_BCM50610M 0x0143bd70 +#define PHY_ID_BCM5221 0x004061e0 #define PHY_ID_BCM5241 0x0143bc30 #define PHY_ID_BCMAC131 0x0143bc70 #define PHY_ID_BCM5481 0x0143bca0 @@ -331,6 +332,15 @@ #define BCM54XX_WOL_INT_STATUS (MII_BCM54XX_EXP_SEL_WOL + 0x94) +/* BCM5221 Registers */ +#define BCM5221_AEGSR 0x1C +#define BCM5221_AEGSR_MDIX_STATUS BIT(13) +#define BCM5221_AEGSR_MDIX_MAN_SWAP BIT(12) +#define BCM5221_AEGSR_MDIX_DIS BIT(11) + +#define BCM5221_SHDW_AM4_EN_CLK_LPM BIT(2) +#define BCM5221_SHDW_AM4_FORCE_LPM BIT(1) + /*****************************************************************************/ /* Fast Ethernet Transceiver definitions. */ /*****************************************************************************/ -- cgit v1.2.3 From 77bbfe607b1d306c88bf96fed00c030f6bf462f1 Mon Sep 17 00:00:00 2001 From: Peng Fan Date: Wed, 4 Oct 2023 07:42:23 +0800 Subject: firmware: arm_scmi: Add support for clock parents SCMI v3.2 spec introduces CLOCK_POSSIBLE_PARENTS_GET, CLOCK_PARENT_SET and CLOCK_PARENT_GET. Add support for these to enable clock parents and use them in the clock driver. Reviewed-by: Cristian Marussi Signed-off-by: Peng Fan Link: https://lore.kernel.org/r/20231004-scmi-clock-v3-v5-1-1b8a1435673e@nxp.com Signed-off-by: Sudeep Holla --- drivers/firmware/arm_scmi/clock.c | 181 ++++++++++++++++++++++++++++++++++++-- include/linux/scmi_protocol.h | 6 ++ 2 files changed, 181 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/drivers/firmware/arm_scmi/clock.c b/drivers/firmware/arm_scmi/clock.c index 9c0e33c1efab..42b81c181d68 100644 --- a/drivers/firmware/arm_scmi/clock.c +++ b/drivers/firmware/arm_scmi/clock.c @@ -22,6 +22,9 @@ enum scmi_clock_protocol_cmd { CLOCK_RATE_NOTIFY = 0x9, CLOCK_RATE_CHANGE_REQUESTED_NOTIFY = 0xA, CLOCK_CONFIG_GET = 0xB, + CLOCK_POSSIBLE_PARENTS_GET = 0xC, + CLOCK_PARENT_SET = 0xD, + CLOCK_PARENT_GET = 0xE, }; enum clk_state { @@ -42,10 +45,28 @@ struct scmi_msg_resp_clock_attributes { #define SUPPORTS_RATE_CHANGED_NOTIF(x) ((x) & BIT(31)) #define SUPPORTS_RATE_CHANGE_REQUESTED_NOTIF(x) ((x) & BIT(30)) #define SUPPORTS_EXTENDED_NAMES(x) ((x) & BIT(29)) +#define SUPPORTS_PARENT_CLOCK(x) ((x) & BIT(28)) u8 name[SCMI_SHORT_NAME_MAX_SIZE]; __le32 clock_enable_latency; }; +struct scmi_msg_clock_possible_parents { + __le32 id; + __le32 skip_parents; +}; + +struct scmi_msg_resp_clock_possible_parents { + __le32 num_parent_flags; +#define NUM_PARENTS_RETURNED(x) ((x) & 0xff) +#define NUM_PARENTS_REMAINING(x) ((x) >> 24) + __le32 possible_parents[]; +}; + +struct scmi_msg_clock_set_parent { + __le32 id; + __le32 parent_id; +}; + struct scmi_msg_clock_config_set { __le32 id; __le32 attributes; @@ -168,6 +189,98 @@ scmi_clock_protocol_attributes_get(const struct scmi_protocol_handle *ph, return ret; } +struct scmi_clk_ipriv { + struct device *dev; + u32 clk_id; + struct scmi_clock_info *clk; +}; + +static void iter_clk_possible_parents_prepare_message(void *message, unsigned int desc_index, + const void *priv) +{ + struct scmi_msg_clock_possible_parents *msg = message; + const struct scmi_clk_ipriv *p = priv; + + msg->id = cpu_to_le32(p->clk_id); + /* Set the number of OPPs to be skipped/already read */ + msg->skip_parents = cpu_to_le32(desc_index); +} + +static int iter_clk_possible_parents_update_state(struct scmi_iterator_state *st, + const void *response, void *priv) +{ + const struct scmi_msg_resp_clock_possible_parents *r = response; + struct scmi_clk_ipriv *p = priv; + struct device *dev = ((struct scmi_clk_ipriv *)p)->dev; + u32 flags; + + flags = le32_to_cpu(r->num_parent_flags); + st->num_returned = NUM_PARENTS_RETURNED(flags); + st->num_remaining = NUM_PARENTS_REMAINING(flags); + + /* + * num parents is not declared previously anywhere so we + * assume it's returned+remaining on first call. + */ + if (!st->max_resources) { + p->clk->num_parents = st->num_returned + st->num_remaining; + p->clk->parents = devm_kcalloc(dev, p->clk->num_parents, + sizeof(*p->clk->parents), + GFP_KERNEL); + if (!p->clk->parents) { + p->clk->num_parents = 0; + return -ENOMEM; + } + st->max_resources = st->num_returned + st->num_remaining; + } + + return 0; +} + +static int iter_clk_possible_parents_process_response(const struct scmi_protocol_handle *ph, + const void *response, + struct scmi_iterator_state *st, + void *priv) +{ + const struct scmi_msg_resp_clock_possible_parents *r = response; + struct scmi_clk_ipriv *p = priv; + + u32 *parent = &p->clk->parents[st->desc_index + st->loop_idx]; + + *parent = le32_to_cpu(r->possible_parents[st->loop_idx]); + + return 0; +} + +static int scmi_clock_possible_parents(const struct scmi_protocol_handle *ph, u32 clk_id, + struct scmi_clock_info *clk) +{ + struct scmi_iterator_ops ops = { + .prepare_message = iter_clk_possible_parents_prepare_message, + .update_state = iter_clk_possible_parents_update_state, + .process_response = iter_clk_possible_parents_process_response, + }; + + struct scmi_clk_ipriv ppriv = { + .clk_id = clk_id, + .clk = clk, + .dev = ph->dev, + }; + void *iter; + int ret; + + iter = ph->hops->iter_response_init(ph, &ops, 0, + CLOCK_POSSIBLE_PARENTS_GET, + sizeof(struct scmi_msg_clock_possible_parents), + &ppriv); + if (IS_ERR(iter)) + return PTR_ERR(iter); + + ret = ph->hops->iter_response_run(iter); + + return ret; +} + static int scmi_clock_attributes_get(const struct scmi_protocol_handle *ph, u32 clk_id, struct scmi_clock_info *clk, u32 version) @@ -212,6 +325,8 @@ static int scmi_clock_attributes_get(const struct scmi_protocol_handle *ph, clk->rate_changed_notifications = true; if (SUPPORTS_RATE_CHANGE_REQUESTED_NOTIF(attributes)) clk->rate_change_requested_notifications = true; + if (SUPPORTS_PARENT_CLOCK(attributes)) + scmi_clock_possible_parents(ph, clk_id, clk); } return ret; @@ -229,12 +344,6 @@ static int rate_cmp_func(const void *_r1, const void *_r2) return 1; } -struct scmi_clk_ipriv { - struct device *dev; - u32 clk_id; - struct scmi_clock_info *clk; -}; - static void iter_clk_describe_prepare_message(void *message, const unsigned int desc_index, const void *priv) @@ -458,6 +567,64 @@ scmi_clock_config_set(const struct scmi_protocol_handle *ph, u32 clk_id, return ret; } +static int +scmi_clock_set_parent(const struct scmi_protocol_handle *ph, u32 clk_id, + u32 parent_id) +{ + int ret; + struct scmi_xfer *t; + struct scmi_msg_clock_set_parent *cfg; + struct clock_info *ci = ph->get_priv(ph); + struct scmi_clock_info *clk; + + if (clk_id >= ci->num_clocks) + return -EINVAL; + + clk = ci->clk + clk_id; + + if (parent_id >= clk->num_parents) + return -EINVAL; + + ret = ph->xops->xfer_get_init(ph, CLOCK_PARENT_SET, + sizeof(*cfg), 0, &t); + if (ret) + return ret; + + t->hdr.poll_completion = false; + + cfg = t->tx.buf; + cfg->id = cpu_to_le32(clk_id); + cfg->parent_id = cpu_to_le32(clk->parents[parent_id]); + + ret = ph->xops->do_xfer(ph, t); + + ph->xops->xfer_put(ph, t); + + return ret; +} + +static int +scmi_clock_get_parent(const struct scmi_protocol_handle *ph, u32 clk_id, + u32 *parent_id) +{ + int ret; + struct scmi_xfer *t; + + ret = ph->xops->xfer_get_init(ph, CLOCK_PARENT_GET, + sizeof(__le32), sizeof(u32), &t); + if (ret) + return ret; + + put_unaligned_le32(clk_id, t->tx.buf); + + ret = ph->xops->do_xfer(ph, t); + if (!ret) + *parent_id = get_unaligned_le32(t->rx.buf); + + ph->xops->xfer_put(ph, t); + return ret; +} + /* For SCMI clock v2.1 and onwards */ static int scmi_clock_config_set_v2(const struct scmi_protocol_handle *ph, u32 clk_id, @@ -650,6 +817,8 @@ static const struct scmi_clk_proto_ops clk_proto_ops = { .state_get = scmi_clock_state_get, .config_oem_get = scmi_clock_config_oem_get, .config_oem_set = scmi_clock_config_oem_set, + .parent_set = scmi_clock_set_parent, + .parent_get = scmi_clock_get_parent, }; static int scmi_clk_rate_notify(const struct scmi_protocol_handle *ph, diff --git a/include/linux/scmi_protocol.h b/include/linux/scmi_protocol.h index 27bfa5a65b45..f2f05fb42d28 100644 --- a/include/linux/scmi_protocol.h +++ b/include/linux/scmi_protocol.h @@ -58,6 +58,8 @@ struct scmi_clock_info { u64 step_size; } range; }; + int num_parents; + u32 *parents; }; enum scmi_power_scale { @@ -83,6 +85,8 @@ struct scmi_protocol_handle; * @state_get: get the status of the specified clock * @config_oem_get: get the value of an OEM specific clock config * @config_oem_set: set the value of an OEM specific clock config + * @parent_get: get the parent id of a clk + * @parent_set: set the parent of a clock */ struct scmi_clk_proto_ops { int (*count_get)(const struct scmi_protocol_handle *ph); @@ -104,6 +108,8 @@ struct scmi_clk_proto_ops { bool atomic); int (*config_oem_set)(const struct scmi_protocol_handle *ph, u32 clk_id, u8 oem_type, u32 oem_val, bool atomic); + int (*parent_get)(const struct scmi_protocol_handle *ph, u32 clk_id, u32 *parent_id); + int (*parent_set)(const struct scmi_protocol_handle *ph, u32 clk_id, u32 parent_id); }; struct scmi_perf_domain_info { -- cgit v1.2.3 From 76cf932c95b9e7c07b065b5c71e56957e2826ae2 Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Thu, 5 Oct 2023 15:45:07 +0100 Subject: KVM: arm64: FFA: Remove access of endpoint memory access descriptor array FF-A v1.1 removes the fixed location of endpoint memory access descriptor array within the memory transaction descriptor structure. In preparation to remove the ep_mem_access member from the ffa_mem_region structure, provide the accessor to fetch the offset and use the same in FF-A proxy implementation. The accessor take the FF-A version as the argument from which the memory access descriptor format can be determined. v1.0 uses the old format while v1.1 onwards use the new format specified in the v1.1 specification. Cc: Oliver Upton Cc: Will Deacon Cc: Quentin Perret Acked-by: Marc Zyngier Link: https://lore.kernel.org/r/20231005-ffa_v1-1_notif-v4-14-cddd3237809c@arm.com Signed-off-by: Sudeep Holla --- arch/arm64/kvm/hyp/nvhe/ffa.c | 10 ++++++++-- include/linux/arm_ffa.h | 6 ++++++ 2 files changed, 14 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/arch/arm64/kvm/hyp/nvhe/ffa.c b/arch/arm64/kvm/hyp/nvhe/ffa.c index ab4f5d160c58..5c7b345c2cd5 100644 --- a/arch/arm64/kvm/hyp/nvhe/ffa.c +++ b/arch/arm64/kvm/hyp/nvhe/ffa.c @@ -423,6 +423,7 @@ static __always_inline void do_ffa_mem_xfer(const u64 func_id, DECLARE_REG(u32, fraglen, ctxt, 2); DECLARE_REG(u64, addr_mbz, ctxt, 3); DECLARE_REG(u32, npages_mbz, ctxt, 4); + struct ffa_mem_region_attributes *ep_mem_access; struct ffa_composite_mem_region *reg; struct ffa_mem_region *buf; u32 offset, nr_ranges; @@ -452,7 +453,9 @@ static __always_inline void do_ffa_mem_xfer(const u64 func_id, buf = hyp_buffers.tx; memcpy(buf, host_buffers.tx, fraglen); - offset = buf->ep_mem_access[0].composite_off; + ep_mem_access = (void *)buf + + ffa_mem_desc_offset(buf, 0, FFA_VERSION_1_0); + offset = ep_mem_access->composite_off; if (!offset || buf->ep_count != 1 || buf->sender_id != HOST_FFA_ID) { ret = FFA_RET_INVALID_PARAMETERS; goto out_unlock; @@ -504,6 +507,7 @@ static void do_ffa_mem_reclaim(struct arm_smccc_res *res, DECLARE_REG(u32, handle_lo, ctxt, 1); DECLARE_REG(u32, handle_hi, ctxt, 2); DECLARE_REG(u32, flags, ctxt, 3); + struct ffa_mem_region_attributes *ep_mem_access; struct ffa_composite_mem_region *reg; u32 offset, len, fraglen, fragoff; struct ffa_mem_region *buf; @@ -528,7 +532,9 @@ static void do_ffa_mem_reclaim(struct arm_smccc_res *res, len = res->a1; fraglen = res->a2; - offset = buf->ep_mem_access[0].composite_off; + ep_mem_access = (void *)buf + + ffa_mem_desc_offset(buf, 0, FFA_VERSION_1_0); + offset = ep_mem_access->composite_off; /* * We can trust the SPMD to get this right, but let's at least * check that we end up with something that doesn't look _completely_ diff --git a/include/linux/arm_ffa.h b/include/linux/arm_ffa.h index 748d0a83a4bc..2444d596b703 100644 --- a/include/linux/arm_ffa.h +++ b/include/linux/arm_ffa.h @@ -357,6 +357,12 @@ struct ffa_mem_region { #define CONSTITUENTS_OFFSET(x) \ (offsetof(struct ffa_composite_mem_region, constituents[x])) +static inline u32 +ffa_mem_desc_offset(struct ffa_mem_region *buf, int count, u32 ffa_version) +{ + return COMPOSITE_OFFSET(0); +} + struct ffa_mem_ops_args { bool use_txbuf; u32 nattrs; -- cgit v1.2.3 From 113580530ee7dc61e668b641d657920734533b9f Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Thu, 5 Oct 2023 15:45:09 +0100 Subject: firmware: arm_ffa: Update memory descriptor to support v1.1 format Update memory transaction descriptor structure to accommodate couple of new entries in v1.1 which were previously marked reserved and MBZ(must be zero). It also removes the flexible array member ep_mem_access in the memory transaction descriptor structure as it need not be at fixed offset. Also update ffa_mem_desc_offset() accessor to handle both old and new formats of memory transaction descriptors. The updated ffa_mem_region structure aligns with new format in v1.1 and hence the driver/user must take care not to use members beyond and including ep_mem_offset when using the old format. Link: https://lore.kernel.org/r/20231005-ffa_v1-1_notif-v4-16-cddd3237809c@arm.com Signed-off-by: Sudeep Holla --- drivers/firmware/arm_ffa/driver.c | 11 ++++++++--- include/linux/arm_ffa.h | 33 ++++++++++++++++++++++----------- 2 files changed, 30 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/drivers/firmware/arm_ffa/driver.c b/drivers/firmware/arm_ffa/driver.c index c79067201487..6c5c7926b8ee 100644 --- a/drivers/firmware/arm_ffa/driver.c +++ b/drivers/firmware/arm_ffa/driver.c @@ -423,7 +423,7 @@ static u32 ffa_get_num_pages_sg(struct scatterlist *sg) return num_pages; } -static u8 ffa_memory_attributes_get(u32 func_id) +static u16 ffa_memory_attributes_get(u32 func_id) { /* * For the memory lend or donate operation, if the receiver is a PE or @@ -467,9 +467,14 @@ ffa_setup_and_transmit(u32 func_id, void *buffer, u32 max_fragsize, ep_mem_access->reserved = 0; } mem_region->handle = 0; - mem_region->reserved_0 = 0; - mem_region->reserved_1 = 0; mem_region->ep_count = args->nattrs; + if (drv_info->version <= FFA_VERSION_1_0) { + mem_region->ep_mem_size = 0; + } else { + mem_region->ep_mem_size = sizeof(*ep_mem_access); + mem_region->ep_mem_offset = sizeof(*mem_region); + memset(mem_region->reserved, 0, 12); + } composite = buffer + composite_offset; composite->total_pg_cnt = ffa_get_num_pages_sg(args->sg); diff --git a/include/linux/arm_ffa.h b/include/linux/arm_ffa.h index 2444d596b703..1abedb5b2e48 100644 --- a/include/linux/arm_ffa.h +++ b/include/linux/arm_ffa.h @@ -6,6 +6,7 @@ #ifndef _LINUX_ARM_FFA_H #define _LINUX_ARM_FFA_H +#include #include #include #include @@ -298,8 +299,8 @@ struct ffa_mem_region { #define FFA_MEM_NON_SHAREABLE (0) #define FFA_MEM_OUTER_SHAREABLE (2) #define FFA_MEM_INNER_SHAREABLE (3) - u8 attributes; - u8 reserved_0; + /* Memory region attributes, upper byte MBZ pre v1.1 */ + u16 attributes; /* * Clear memory region contents after unmapping it from the sender and * before mapping it for any receiver. @@ -337,30 +338,40 @@ struct ffa_mem_region { * memory region. */ u64 tag; - u32 reserved_1; + /* Size of each endpoint memory access descriptor, MBZ pre v1.1 */ + u32 ep_mem_size; /* * The number of `ffa_mem_region_attributes` entries included in this * transaction. */ u32 ep_count; /* - * An array of endpoint memory access descriptors. - * Each one specifies a memory region offset, an endpoint and the - * attributes with which this memory region should be mapped in that - * endpoint's page table. + * 16-byte aligned offset from the base address of this descriptor + * to the first element of the endpoint memory access descriptor array + * Valid only from v1.1 */ - struct ffa_mem_region_attributes ep_mem_access[]; + u32 ep_mem_offset; + /* MBZ, valid only from v1.1 */ + u32 reserved[3]; }; -#define COMPOSITE_OFFSET(x) \ - (offsetof(struct ffa_mem_region, ep_mem_access[x])) #define CONSTITUENTS_OFFSET(x) \ (offsetof(struct ffa_composite_mem_region, constituents[x])) static inline u32 ffa_mem_desc_offset(struct ffa_mem_region *buf, int count, u32 ffa_version) { - return COMPOSITE_OFFSET(0); + u32 offset = count * sizeof(struct ffa_mem_region_attributes); + /* + * Earlier to v1.1, the endpoint memory descriptor array started at + * offset 32(i.e. offset of ep_mem_offset in the current structure) + */ + if (ffa_version <= FFA_VERSION_1_0) + offset += offsetof(struct ffa_mem_region, ep_mem_offset); + else + offset += sizeof(struct ffa_mem_region); + + return offset; } struct ffa_mem_ops_args { -- cgit v1.2.3 From 3e7807d5a7d770c59837026e9967fe99ad043174 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 4 Oct 2023 22:55:32 -0400 Subject: fscrypt: rename fscrypt_info => fscrypt_inode_info We are going to track per-extent information, so it'll be necessary to distinguish between inode infos and extent infos. Rename fscrypt_info to fscrypt_inode_info, adjusting any lines that now exceed 80 characters. Signed-off-by: Josef Bacik [ebiggers: rebased onto fscrypt tree, renamed fscrypt_get_info(), adjusted two comments, and fixed some lines over 80 characters] Link: https://lore.kernel.org/r/20231005025757.33521-1-ebiggers@kernel.org Reviewed-by: Neal Gompa Signed-off-by: Eric Biggers --- fs/crypto/bio.c | 2 +- fs/crypto/crypto.c | 21 +++++++++++---------- fs/crypto/fname.c | 6 +++--- fs/crypto/fscrypt_private.h | 42 ++++++++++++++++++++++-------------------- fs/crypto/hooks.c | 2 +- fs/crypto/inline_crypt.c | 13 +++++++------ fs/crypto/keyring.c | 4 ++-- fs/crypto/keysetup.c | 44 +++++++++++++++++++++++--------------------- fs/crypto/keysetup_v1.c | 15 +++++++++------ fs/crypto/policy.c | 10 +++++----- include/linux/fs.h | 4 ++-- include/linux/fscrypt.h | 10 ++++++---- 12 files changed, 92 insertions(+), 81 deletions(-) (limited to 'include/linux') diff --git a/fs/crypto/bio.c b/fs/crypto/bio.c index c8cf77065272..0ad8c30b8fa5 100644 --- a/fs/crypto/bio.c +++ b/fs/crypto/bio.c @@ -111,7 +111,7 @@ out: int fscrypt_zeroout_range(const struct inode *inode, pgoff_t lblk, sector_t pblk, unsigned int len) { - const struct fscrypt_info *ci = inode->i_crypt_info; + const struct fscrypt_inode_info *ci = inode->i_crypt_info; const unsigned int du_bits = ci->ci_data_unit_bits; const unsigned int du_size = 1U << du_bits; const unsigned int du_per_page_bits = PAGE_SHIFT - du_bits; diff --git a/fs/crypto/crypto.c b/fs/crypto/crypto.c index 85e2f66dd663..328470d40dec 100644 --- a/fs/crypto/crypto.c +++ b/fs/crypto/crypto.c @@ -39,7 +39,7 @@ static mempool_t *fscrypt_bounce_page_pool = NULL; static struct workqueue_struct *fscrypt_read_workqueue; static DEFINE_MUTEX(fscrypt_init_mutex); -struct kmem_cache *fscrypt_info_cachep; +struct kmem_cache *fscrypt_inode_info_cachep; void fscrypt_enqueue_decrypt_work(struct work_struct *work) { @@ -85,7 +85,7 @@ EXPORT_SYMBOL(fscrypt_free_bounce_page); * simply contain the data unit index (e.g., IV_INO_LBLK_32). */ void fscrypt_generate_iv(union fscrypt_iv *iv, u64 index, - const struct fscrypt_info *ci) + const struct fscrypt_inode_info *ci) { u8 flags = fscrypt_policy_flags(&ci->ci_policy); @@ -105,7 +105,7 @@ void fscrypt_generate_iv(union fscrypt_iv *iv, u64 index, } /* Encrypt or decrypt a single "data unit" of file contents. */ -int fscrypt_crypt_data_unit(const struct fscrypt_info *ci, +int fscrypt_crypt_data_unit(const struct fscrypt_inode_info *ci, fscrypt_direction_t rw, u64 index, struct page *src_page, struct page *dest_page, unsigned int len, unsigned int offs, @@ -184,7 +184,7 @@ struct page *fscrypt_encrypt_pagecache_blocks(struct page *page, { const struct inode *inode = page->mapping->host; - const struct fscrypt_info *ci = inode->i_crypt_info; + const struct fscrypt_inode_info *ci = inode->i_crypt_info; const unsigned int du_bits = ci->ci_data_unit_bits; const unsigned int du_size = 1U << du_bits; struct page *ciphertext_page; @@ -267,7 +267,7 @@ int fscrypt_decrypt_pagecache_blocks(struct folio *folio, size_t len, size_t offs) { const struct inode *inode = folio->mapping->host; - const struct fscrypt_info *ci = inode->i_crypt_info; + const struct fscrypt_inode_info *ci = inode->i_crypt_info; const unsigned int du_bits = ci->ci_data_unit_bits; const unsigned int du_size = 1U << du_bits; u64 index = ((u64)folio->index << (PAGE_SHIFT - du_bits)) + @@ -409,18 +409,19 @@ static int __init fscrypt_init(void) if (!fscrypt_read_workqueue) goto fail; - fscrypt_info_cachep = KMEM_CACHE(fscrypt_info, SLAB_RECLAIM_ACCOUNT); - if (!fscrypt_info_cachep) + fscrypt_inode_info_cachep = KMEM_CACHE(fscrypt_inode_info, + SLAB_RECLAIM_ACCOUNT); + if (!fscrypt_inode_info_cachep) goto fail_free_queue; err = fscrypt_init_keyring(); if (err) - goto fail_free_info; + goto fail_free_inode_info; return 0; -fail_free_info: - kmem_cache_destroy(fscrypt_info_cachep); +fail_free_inode_info: + kmem_cache_destroy(fscrypt_inode_info_cachep); fail_free_queue: destroy_workqueue(fscrypt_read_workqueue); fail: diff --git a/fs/crypto/fname.c b/fs/crypto/fname.c index 6eae3f12ad50..7b3fc189593a 100644 --- a/fs/crypto/fname.c +++ b/fs/crypto/fname.c @@ -100,7 +100,7 @@ int fscrypt_fname_encrypt(const struct inode *inode, const struct qstr *iname, { struct skcipher_request *req = NULL; DECLARE_CRYPTO_WAIT(wait); - const struct fscrypt_info *ci = inode->i_crypt_info; + const struct fscrypt_inode_info *ci = inode->i_crypt_info; struct crypto_skcipher *tfm = ci->ci_enc_key.tfm; union fscrypt_iv iv; struct scatterlist sg; @@ -157,7 +157,7 @@ static int fname_decrypt(const struct inode *inode, struct skcipher_request *req = NULL; DECLARE_CRYPTO_WAIT(wait); struct scatterlist src_sg, dst_sg; - const struct fscrypt_info *ci = inode->i_crypt_info; + const struct fscrypt_inode_info *ci = inode->i_crypt_info; struct crypto_skcipher *tfm = ci->ci_enc_key.tfm; union fscrypt_iv iv; int res; @@ -568,7 +568,7 @@ EXPORT_SYMBOL_GPL(fscrypt_match_name); */ u64 fscrypt_fname_siphash(const struct inode *dir, const struct qstr *name) { - const struct fscrypt_info *ci = dir->i_crypt_info; + const struct fscrypt_inode_info *ci = dir->i_crypt_info; WARN_ON_ONCE(!ci->ci_dirhash_key_initialized); diff --git a/fs/crypto/fscrypt_private.h b/fs/crypto/fscrypt_private.h index 9c5e83baa3f1..2fb4ba435d27 100644 --- a/fs/crypto/fscrypt_private.h +++ b/fs/crypto/fscrypt_private.h @@ -210,18 +210,18 @@ struct fscrypt_prepared_key { }; /* - * fscrypt_info - the "encryption key" for an inode + * fscrypt_inode_info - the "encryption key" for an inode * * When an encrypted file's key is made available, an instance of this struct is * allocated and stored in ->i_crypt_info. Once created, it remains until the * inode is evicted. */ -struct fscrypt_info { +struct fscrypt_inode_info { /* The key in a form prepared for actual encryption/decryption */ struct fscrypt_prepared_key ci_enc_key; - /* True if ci_enc_key should be freed when this fscrypt_info is freed */ + /* True if ci_enc_key should be freed when this struct is freed */ bool ci_owns_key; #ifdef CONFIG_FS_ENCRYPTION_INLINE_CRYPT @@ -294,9 +294,9 @@ typedef enum { } fscrypt_direction_t; /* crypto.c */ -extern struct kmem_cache *fscrypt_info_cachep; +extern struct kmem_cache *fscrypt_inode_info_cachep; int fscrypt_initialize(struct super_block *sb); -int fscrypt_crypt_data_unit(const struct fscrypt_info *ci, +int fscrypt_crypt_data_unit(const struct fscrypt_inode_info *ci, fscrypt_direction_t rw, u64 index, struct page *src_page, struct page *dest_page, unsigned int len, unsigned int offs, @@ -326,7 +326,7 @@ union fscrypt_iv { }; void fscrypt_generate_iv(union fscrypt_iv *iv, u64 index, - const struct fscrypt_info *ci); + const struct fscrypt_inode_info *ci); /* * Return the number of bits used by the maximum file data unit index that is @@ -374,17 +374,17 @@ void fscrypt_destroy_hkdf(struct fscrypt_hkdf *hkdf); /* inline_crypt.c */ #ifdef CONFIG_FS_ENCRYPTION_INLINE_CRYPT -int fscrypt_select_encryption_impl(struct fscrypt_info *ci); +int fscrypt_select_encryption_impl(struct fscrypt_inode_info *ci); static inline bool -fscrypt_using_inline_encryption(const struct fscrypt_info *ci) +fscrypt_using_inline_encryption(const struct fscrypt_inode_info *ci) { return ci->ci_inlinecrypt; } int fscrypt_prepare_inline_crypt_key(struct fscrypt_prepared_key *prep_key, const u8 *raw_key, - const struct fscrypt_info *ci); + const struct fscrypt_inode_info *ci); void fscrypt_destroy_inline_crypt_key(struct super_block *sb, struct fscrypt_prepared_key *prep_key); @@ -395,7 +395,7 @@ void fscrypt_destroy_inline_crypt_key(struct super_block *sb, */ static inline bool fscrypt_is_key_prepared(struct fscrypt_prepared_key *prep_key, - const struct fscrypt_info *ci) + const struct fscrypt_inode_info *ci) { /* * The two smp_load_acquire()'s here pair with the smp_store_release()'s @@ -412,13 +412,13 @@ fscrypt_is_key_prepared(struct fscrypt_prepared_key *prep_key, #else /* CONFIG_FS_ENCRYPTION_INLINE_CRYPT */ -static inline int fscrypt_select_encryption_impl(struct fscrypt_info *ci) +static inline int fscrypt_select_encryption_impl(struct fscrypt_inode_info *ci) { return 0; } static inline bool -fscrypt_using_inline_encryption(const struct fscrypt_info *ci) +fscrypt_using_inline_encryption(const struct fscrypt_inode_info *ci) { return false; } @@ -426,7 +426,7 @@ fscrypt_using_inline_encryption(const struct fscrypt_info *ci) static inline int fscrypt_prepare_inline_crypt_key(struct fscrypt_prepared_key *prep_key, const u8 *raw_key, - const struct fscrypt_info *ci) + const struct fscrypt_inode_info *ci) { WARN_ON_ONCE(1); return -EOPNOTSUPP; @@ -440,7 +440,7 @@ fscrypt_destroy_inline_crypt_key(struct super_block *sb, static inline bool fscrypt_is_key_prepared(struct fscrypt_prepared_key *prep_key, - const struct fscrypt_info *ci) + const struct fscrypt_inode_info *ci) { return smp_load_acquire(&prep_key->tfm) != NULL; } @@ -640,17 +640,18 @@ struct fscrypt_mode { extern struct fscrypt_mode fscrypt_modes[]; int fscrypt_prepare_key(struct fscrypt_prepared_key *prep_key, - const u8 *raw_key, const struct fscrypt_info *ci); + const u8 *raw_key, const struct fscrypt_inode_info *ci); void fscrypt_destroy_prepared_key(struct super_block *sb, struct fscrypt_prepared_key *prep_key); -int fscrypt_set_per_file_enc_key(struct fscrypt_info *ci, const u8 *raw_key); +int fscrypt_set_per_file_enc_key(struct fscrypt_inode_info *ci, + const u8 *raw_key); -int fscrypt_derive_dirhash_key(struct fscrypt_info *ci, +int fscrypt_derive_dirhash_key(struct fscrypt_inode_info *ci, const struct fscrypt_master_key *mk); -void fscrypt_hash_inode_number(struct fscrypt_info *ci, +void fscrypt_hash_inode_number(struct fscrypt_inode_info *ci, const struct fscrypt_master_key *mk); int fscrypt_get_encryption_info(struct inode *inode, bool allow_unsupported); @@ -685,10 +686,11 @@ static inline int fscrypt_require_key(struct inode *inode) void fscrypt_put_direct_key(struct fscrypt_direct_key *dk); -int fscrypt_setup_v1_file_key(struct fscrypt_info *ci, +int fscrypt_setup_v1_file_key(struct fscrypt_inode_info *ci, const u8 *raw_master_key); -int fscrypt_setup_v1_file_key_via_subscribed_keyrings(struct fscrypt_info *ci); +int fscrypt_setup_v1_file_key_via_subscribed_keyrings( + struct fscrypt_inode_info *ci); /* policy.c */ diff --git a/fs/crypto/hooks.c b/fs/crypto/hooks.c index 6238dbcadcad..85d2975b69b7 100644 --- a/fs/crypto/hooks.c +++ b/fs/crypto/hooks.c @@ -169,7 +169,7 @@ EXPORT_SYMBOL_GPL(__fscrypt_prepare_setattr); int fscrypt_prepare_setflags(struct inode *inode, unsigned int oldflags, unsigned int flags) { - struct fscrypt_info *ci; + struct fscrypt_inode_info *ci; struct fscrypt_master_key *mk; int err; diff --git a/fs/crypto/inline_crypt.c b/fs/crypto/inline_crypt.c index 8c6d37d6225a..b4002aea7cdb 100644 --- a/fs/crypto/inline_crypt.c +++ b/fs/crypto/inline_crypt.c @@ -39,7 +39,7 @@ static struct block_device **fscrypt_get_devices(struct super_block *sb, return devs; } -static unsigned int fscrypt_get_dun_bytes(const struct fscrypt_info *ci) +static unsigned int fscrypt_get_dun_bytes(const struct fscrypt_inode_info *ci) { const struct super_block *sb = ci->ci_inode->i_sb; unsigned int flags = fscrypt_policy_flags(&ci->ci_policy); @@ -89,7 +89,7 @@ static void fscrypt_log_blk_crypto_impl(struct fscrypt_mode *mode, } /* Enable inline encryption for this file if supported. */ -int fscrypt_select_encryption_impl(struct fscrypt_info *ci) +int fscrypt_select_encryption_impl(struct fscrypt_inode_info *ci) { const struct inode *inode = ci->ci_inode; struct super_block *sb = inode->i_sb; @@ -151,7 +151,7 @@ out_free_devs: int fscrypt_prepare_inline_crypt_key(struct fscrypt_prepared_key *prep_key, const u8 *raw_key, - const struct fscrypt_info *ci) + const struct fscrypt_inode_info *ci) { const struct inode *inode = ci->ci_inode; struct super_block *sb = inode->i_sb; @@ -232,7 +232,8 @@ bool __fscrypt_inode_uses_inline_crypto(const struct inode *inode) } EXPORT_SYMBOL_GPL(__fscrypt_inode_uses_inline_crypto); -static void fscrypt_generate_dun(const struct fscrypt_info *ci, u64 lblk_num, +static void fscrypt_generate_dun(const struct fscrypt_inode_info *ci, + u64 lblk_num, u64 dun[BLK_CRYPTO_DUN_ARRAY_SIZE]) { u64 index = lblk_num << ci->ci_data_units_per_block_bits; @@ -266,7 +267,7 @@ static void fscrypt_generate_dun(const struct fscrypt_info *ci, u64 lblk_num, void fscrypt_set_bio_crypt_ctx(struct bio *bio, const struct inode *inode, u64 first_lblk, gfp_t gfp_mask) { - const struct fscrypt_info *ci; + const struct fscrypt_inode_info *ci; u64 dun[BLK_CRYPTO_DUN_ARRAY_SIZE]; if (!fscrypt_inode_uses_inline_crypto(inode)) @@ -457,7 +458,7 @@ EXPORT_SYMBOL_GPL(fscrypt_dio_supported); */ u64 fscrypt_limit_io_blocks(const struct inode *inode, u64 lblk, u64 nr_blocks) { - const struct fscrypt_info *ci; + const struct fscrypt_inode_info *ci; u32 dun; if (!fscrypt_inode_uses_inline_crypto(inode)) diff --git a/fs/crypto/keyring.c b/fs/crypto/keyring.c index 7cbb1fd872ac..a51fa6a33de1 100644 --- a/fs/crypto/keyring.c +++ b/fs/crypto/keyring.c @@ -867,7 +867,7 @@ static void shrink_dcache_inode(struct inode *inode) static void evict_dentries_for_decrypted_inodes(struct fscrypt_master_key *mk) { - struct fscrypt_info *ci; + struct fscrypt_inode_info *ci; struct inode *inode; struct inode *toput_inode = NULL; @@ -917,7 +917,7 @@ static int check_for_busy_inodes(struct super_block *sb, /* select an example file to show for debugging purposes */ struct inode *inode = list_first_entry(&mk->mk_decrypted_inodes, - struct fscrypt_info, + struct fscrypt_inode_info, ci_master_key_link)->ci_inode; ino = inode->i_ino; } diff --git a/fs/crypto/keysetup.c b/fs/crypto/keysetup.c index 608599f8aa57..094d1b7a1ae6 100644 --- a/fs/crypto/keysetup.c +++ b/fs/crypto/keysetup.c @@ -148,7 +148,7 @@ err_free_tfm: * and IV generation method (@ci->ci_policy.flags). */ int fscrypt_prepare_key(struct fscrypt_prepared_key *prep_key, - const u8 *raw_key, const struct fscrypt_info *ci) + const u8 *raw_key, const struct fscrypt_inode_info *ci) { struct crypto_skcipher *tfm; @@ -178,13 +178,14 @@ void fscrypt_destroy_prepared_key(struct super_block *sb, } /* Given a per-file encryption key, set up the file's crypto transform object */ -int fscrypt_set_per_file_enc_key(struct fscrypt_info *ci, const u8 *raw_key) +int fscrypt_set_per_file_enc_key(struct fscrypt_inode_info *ci, + const u8 *raw_key) { ci->ci_owns_key = true; return fscrypt_prepare_key(&ci->ci_enc_key, raw_key, ci); } -static int setup_per_mode_enc_key(struct fscrypt_info *ci, +static int setup_per_mode_enc_key(struct fscrypt_inode_info *ci, struct fscrypt_master_key *mk, struct fscrypt_prepared_key *keys, u8 hkdf_context, bool include_fs_uuid) @@ -265,7 +266,7 @@ static int fscrypt_derive_siphash_key(const struct fscrypt_master_key *mk, return 0; } -int fscrypt_derive_dirhash_key(struct fscrypt_info *ci, +int fscrypt_derive_dirhash_key(struct fscrypt_inode_info *ci, const struct fscrypt_master_key *mk) { int err; @@ -279,7 +280,7 @@ int fscrypt_derive_dirhash_key(struct fscrypt_info *ci, return 0; } -void fscrypt_hash_inode_number(struct fscrypt_info *ci, +void fscrypt_hash_inode_number(struct fscrypt_inode_info *ci, const struct fscrypt_master_key *mk) { WARN_ON_ONCE(ci->ci_inode->i_ino == 0); @@ -289,7 +290,7 @@ void fscrypt_hash_inode_number(struct fscrypt_info *ci, &mk->mk_ino_hash_key); } -static int fscrypt_setup_iv_ino_lblk_32_key(struct fscrypt_info *ci, +static int fscrypt_setup_iv_ino_lblk_32_key(struct fscrypt_inode_info *ci, struct fscrypt_master_key *mk) { int err; @@ -329,7 +330,7 @@ unlock: return 0; } -static int fscrypt_setup_v2_file_key(struct fscrypt_info *ci, +static int fscrypt_setup_v2_file_key(struct fscrypt_inode_info *ci, struct fscrypt_master_key *mk, bool need_dirhash_key) { @@ -404,7 +405,7 @@ static int fscrypt_setup_v2_file_key(struct fscrypt_info *ci, * still allow 512-bit master keys if the user chooses to use them, though.) */ static bool fscrypt_valid_master_key_size(const struct fscrypt_master_key *mk, - const struct fscrypt_info *ci) + const struct fscrypt_inode_info *ci) { unsigned int min_keysize; @@ -430,11 +431,12 @@ static bool fscrypt_valid_master_key_size(const struct fscrypt_master_key *mk, * * If the master key is found in the filesystem-level keyring, then it is * returned in *mk_ret with its semaphore read-locked. This is needed to ensure - * that only one task links the fscrypt_info into ->mk_decrypted_inodes (as - * multiple tasks may race to create an fscrypt_info for the same inode), and to - * synchronize the master key being removed with a new inode starting to use it. + * that only one task links the fscrypt_inode_info into ->mk_decrypted_inodes + * (as multiple tasks may race to create an fscrypt_inode_info for the same + * inode), and to synchronize the master key being removed with a new inode + * starting to use it. */ -static int setup_file_encryption_key(struct fscrypt_info *ci, +static int setup_file_encryption_key(struct fscrypt_inode_info *ci, bool need_dirhash_key, struct fscrypt_master_key **mk_ret) { @@ -519,7 +521,7 @@ out_release_key: return err; } -static void put_crypt_info(struct fscrypt_info *ci) +static void put_crypt_info(struct fscrypt_inode_info *ci) { struct fscrypt_master_key *mk; @@ -546,7 +548,7 @@ static void put_crypt_info(struct fscrypt_info *ci) fscrypt_put_master_key_activeref(ci->ci_inode->i_sb, mk); } memzero_explicit(ci, sizeof(*ci)); - kmem_cache_free(fscrypt_info_cachep, ci); + kmem_cache_free(fscrypt_inode_info_cachep, ci); } static int @@ -555,7 +557,7 @@ fscrypt_setup_encryption_info(struct inode *inode, const u8 nonce[FSCRYPT_FILE_NONCE_SIZE], bool need_dirhash_key) { - struct fscrypt_info *crypt_info; + struct fscrypt_inode_info *crypt_info; struct fscrypt_mode *mode; struct fscrypt_master_key *mk = NULL; int res; @@ -564,7 +566,7 @@ fscrypt_setup_encryption_info(struct inode *inode, if (res) return res; - crypt_info = kmem_cache_zalloc(fscrypt_info_cachep, GFP_KERNEL); + crypt_info = kmem_cache_zalloc(fscrypt_inode_info_cachep, GFP_KERNEL); if (!crypt_info) return -ENOMEM; @@ -592,8 +594,8 @@ fscrypt_setup_encryption_info(struct inode *inode, /* * For existing inodes, multiple tasks may race to set ->i_crypt_info. * So use cmpxchg_release(). This pairs with the smp_load_acquire() in - * fscrypt_get_info(). I.e., here we publish ->i_crypt_info with a - * RELEASE barrier so that other tasks can ACQUIRE it. + * fscrypt_get_inode_info(). I.e., here we publish ->i_crypt_info with + * a RELEASE barrier so that other tasks can ACQUIRE it. */ if (cmpxchg_release(&inode->i_crypt_info, NULL, crypt_info) == NULL) { /* @@ -740,8 +742,8 @@ EXPORT_SYMBOL_GPL(fscrypt_prepare_new_inode); * fscrypt_put_encryption_info() - free most of an inode's fscrypt data * @inode: an inode being evicted * - * Free the inode's fscrypt_info. Filesystems must call this when the inode is - * being evicted. An RCU grace period need not have elapsed yet. + * Free the inode's fscrypt_inode_info. Filesystems must call this when the + * inode is being evicted. An RCU grace period need not have elapsed yet. */ void fscrypt_put_encryption_info(struct inode *inode) { @@ -778,7 +780,7 @@ EXPORT_SYMBOL(fscrypt_free_inode); */ int fscrypt_drop_inode(struct inode *inode) { - const struct fscrypt_info *ci = fscrypt_get_info(inode); + const struct fscrypt_inode_info *ci = fscrypt_get_inode_info(inode); /* * If ci is NULL, then the inode doesn't have an encryption key set up diff --git a/fs/crypto/keysetup_v1.c b/fs/crypto/keysetup_v1.c index 86b48a2b47d1..a10710bc8123 100644 --- a/fs/crypto/keysetup_v1.c +++ b/fs/crypto/keysetup_v1.c @@ -178,7 +178,8 @@ void fscrypt_put_direct_key(struct fscrypt_direct_key *dk) */ static struct fscrypt_direct_key * find_or_insert_direct_key(struct fscrypt_direct_key *to_insert, - const u8 *raw_key, const struct fscrypt_info *ci) + const u8 *raw_key, + const struct fscrypt_inode_info *ci) { unsigned long hash_key; struct fscrypt_direct_key *dk; @@ -218,7 +219,7 @@ find_or_insert_direct_key(struct fscrypt_direct_key *to_insert, /* Prepare to encrypt directly using the master key in the given mode */ static struct fscrypt_direct_key * -fscrypt_get_direct_key(const struct fscrypt_info *ci, const u8 *raw_key) +fscrypt_get_direct_key(const struct fscrypt_inode_info *ci, const u8 *raw_key) { struct fscrypt_direct_key *dk; int err; @@ -250,7 +251,7 @@ err_free_dk: } /* v1 policy, DIRECT_KEY: use the master key directly */ -static int setup_v1_file_key_direct(struct fscrypt_info *ci, +static int setup_v1_file_key_direct(struct fscrypt_inode_info *ci, const u8 *raw_master_key) { struct fscrypt_direct_key *dk; @@ -264,7 +265,7 @@ static int setup_v1_file_key_direct(struct fscrypt_info *ci, } /* v1 policy, !DIRECT_KEY: derive the file's encryption key */ -static int setup_v1_file_key_derived(struct fscrypt_info *ci, +static int setup_v1_file_key_derived(struct fscrypt_inode_info *ci, const u8 *raw_master_key) { u8 *derived_key; @@ -289,7 +290,8 @@ out: return err; } -int fscrypt_setup_v1_file_key(struct fscrypt_info *ci, const u8 *raw_master_key) +int fscrypt_setup_v1_file_key(struct fscrypt_inode_info *ci, + const u8 *raw_master_key) { if (ci->ci_policy.v1.flags & FSCRYPT_POLICY_FLAG_DIRECT_KEY) return setup_v1_file_key_direct(ci, raw_master_key); @@ -297,7 +299,8 @@ int fscrypt_setup_v1_file_key(struct fscrypt_info *ci, const u8 *raw_master_key) return setup_v1_file_key_derived(ci, raw_master_key); } -int fscrypt_setup_v1_file_key_via_subscribed_keyrings(struct fscrypt_info *ci) +int +fscrypt_setup_v1_file_key_via_subscribed_keyrings(struct fscrypt_inode_info *ci) { const struct super_block *sb = ci->ci_inode->i_sb; struct key *key; diff --git a/fs/crypto/policy.c b/fs/crypto/policy.c index 2fb3f6a1258e..701259991277 100644 --- a/fs/crypto/policy.c +++ b/fs/crypto/policy.c @@ -432,11 +432,11 @@ int fscrypt_policy_from_context(union fscrypt_policy *policy_u, /* Retrieve an inode's encryption policy */ static int fscrypt_get_policy(struct inode *inode, union fscrypt_policy *policy) { - const struct fscrypt_info *ci; + const struct fscrypt_inode_info *ci; union fscrypt_context ctx; int ret; - ci = fscrypt_get_info(inode); + ci = fscrypt_get_inode_info(inode); if (ci) { /* key available, use the cached policy */ *policy = ci->ci_policy; @@ -674,7 +674,7 @@ int fscrypt_has_permitted_context(struct inode *parent, struct inode *child) /* * Both parent and child are encrypted, so verify they use the same - * encryption policy. Compare the fscrypt_info structs if the keys are + * encryption policy. Compare the cached policies if the keys are * available, otherwise retrieve and compare the fscrypt_contexts. * * Note that the fscrypt_context retrieval will be required frequently @@ -744,7 +744,7 @@ const union fscrypt_policy *fscrypt_policy_to_inherit(struct inode *dir) */ int fscrypt_context_for_new_inode(void *ctx, struct inode *inode) { - struct fscrypt_info *ci = inode->i_crypt_info; + struct fscrypt_inode_info *ci = inode->i_crypt_info; BUILD_BUG_ON(sizeof(union fscrypt_context) != FSCRYPT_SET_CONTEXT_MAX_SIZE); @@ -769,7 +769,7 @@ EXPORT_SYMBOL_GPL(fscrypt_context_for_new_inode); */ int fscrypt_set_context(struct inode *inode, void *fs_data) { - struct fscrypt_info *ci = inode->i_crypt_info; + struct fscrypt_inode_info *ci = inode->i_crypt_info; union fscrypt_context ctx; int ctxsize; diff --git a/include/linux/fs.h b/include/linux/fs.h index b528f063e8ff..a3df96736473 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -67,7 +67,7 @@ struct swap_info_struct; struct seq_file; struct workqueue_struct; struct iov_iter; -struct fscrypt_info; +struct fscrypt_inode_info; struct fscrypt_operations; struct fsverity_info; struct fsverity_operations; @@ -738,7 +738,7 @@ struct inode { #endif #ifdef CONFIG_FS_ENCRYPTION - struct fscrypt_info *i_crypt_info; + struct fscrypt_inode_info *i_crypt_info; #endif #ifdef CONFIG_FS_VERITY diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h index b559e6f77707..12f9e455d569 100644 --- a/include/linux/fscrypt.h +++ b/include/linux/fscrypt.h @@ -31,7 +31,7 @@ #define FSCRYPT_CONTENTS_ALIGNMENT 16 union fscrypt_policy; -struct fscrypt_info; +struct fscrypt_inode_info; struct fs_parameter; struct seq_file; @@ -192,7 +192,8 @@ struct fscrypt_operations { unsigned int *num_devs); }; -static inline struct fscrypt_info *fscrypt_get_info(const struct inode *inode) +static inline struct fscrypt_inode_info * +fscrypt_get_inode_info(const struct inode *inode) { /* * Pairs with the cmpxchg_release() in fscrypt_setup_encryption_info(). @@ -404,7 +405,8 @@ static inline void fscrypt_set_ops(struct super_block *sb, } #else /* !CONFIG_FS_ENCRYPTION */ -static inline struct fscrypt_info *fscrypt_get_info(const struct inode *inode) +static inline struct fscrypt_inode_info * +fscrypt_get_inode_info(const struct inode *inode) { return NULL; } @@ -882,7 +884,7 @@ static inline bool fscrypt_inode_uses_fs_layer_crypto(const struct inode *inode) */ static inline bool fscrypt_has_encryption_key(const struct inode *inode) { - return fscrypt_get_info(inode) != NULL; + return fscrypt_get_inode_info(inode) != NULL; } /** -- cgit v1.2.3 From 4a530cb932af31b0c919a109bc107dd186653381 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 22 Sep 2023 10:50:53 -0700 Subject: hwmon: Annotate struct gsc_hwmon_platform_data with __counted_by Prepare for the coming implementation by GCC and Clang of the __counted_by attribute. Flexible array members annotated with __counted_by can have their accesses bounds-checked at run-time checking via CONFIG_UBSAN_BOUNDS (for array indexing) and CONFIG_FORTIFY_SOURCE (for strcpy/memcpy-family functions). As found with Coccinelle[1], add __counted_by for struct gsc_hwmon_platform_data. [1] https://github.com/kees/kernel-tools/blob/trunk/coccinelle/examples/counted_by.cocci Cc: Tim Harvey Reviewed-by: "Gustavo A. R. Silva" Link: https://lore.kernel.org/r/20230922175053.work.564-kees@kernel.org Signed-off-by: Kees Cook --- include/linux/platform_data/gsc_hwmon.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/platform_data/gsc_hwmon.h b/include/linux/platform_data/gsc_hwmon.h index f2781aa7eff8..70e8a6bec0f6 100644 --- a/include/linux/platform_data/gsc_hwmon.h +++ b/include/linux/platform_data/gsc_hwmon.h @@ -40,6 +40,6 @@ struct gsc_hwmon_platform_data { unsigned int resolution; unsigned int vreference; unsigned int fan_base; - struct gsc_hwmon_channel channels[]; + struct gsc_hwmon_channel channels[] __counted_by(nchannels); }; #endif -- cgit v1.2.3 From a48e1f656b3c9b8192b6ca6fc92ef4daa30535fb Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 22 Sep 2023 10:51:21 -0700 Subject: KVM: Annotate struct kvm_irq_routing_table with __counted_by Prepare for the coming implementation by GCC and Clang of the __counted_by attribute. Flexible array members annotated with __counted_by can have their accesses bounds-checked at run-time checking via CONFIG_UBSAN_BOUNDS (for array indexing) and CONFIG_FORTIFY_SOURCE (for strcpy/memcpy-family functions). As found with Coccinelle[1], add __counted_by for struct kvm_irq_routing_table. [1] https://github.com/kees/kernel-tools/blob/trunk/coccinelle/examples/counted_by.cocci Cc: Paolo Bonzini Cc: kvm@vger.kernel.org Reviewed-by: "Gustavo A. R. Silva" Link: https://lore.kernel.org/r/20230922175121.work.660-kees@kernel.org Signed-off-by: Kees Cook --- include/linux/kvm_host.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index fb6c6109fdca..4944136efaa2 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -664,7 +664,7 @@ struct kvm_irq_routing_table { * Array indexed by gsi. Each entry contains list of irq chips * the gsi is connected to. */ - struct hlist_head map[]; + struct hlist_head map[] __counted_by(nr_rt_entries); }; #endif -- cgit v1.2.3 From 313ebe47d75558511aa1237b6e35c663b5c0ec6f Mon Sep 17 00:00:00 2001 From: Philipp Stanner Date: Wed, 20 Sep 2023 14:36:09 +0200 Subject: string.h: add array-wrappers for (v)memdup_user() Currently, user array duplications are sometimes done without an overflow check. Sometimes the checks are done manually; sometimes the array size is calculated with array_size() and sometimes by calculating n * size directly in code. Introduce wrappers for arrays for memdup_user() and vmemdup_user() to provide a standardized and safe way for duplicating user arrays. This is both for new code as well as replacing usage of (v)memdup_user() in existing code that uses, e.g., n * size to calculate array sizes. Suggested-by: David Airlie Signed-off-by: Philipp Stanner Reviewed-by: Andy Shevchenko Reviewed-by: Kees Cook Reviewed-by: Zack Rusin Signed-off-by: Dave Airlie Link: https://patchwork.freedesktop.org/patch/msgid/20230920123612.16914-3-pstanner@redhat.com --- include/linux/string.h | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) (limited to 'include/linux') diff --git a/include/linux/string.h b/include/linux/string.h index dbfc66400050..debf4ef1098f 100644 --- a/include/linux/string.h +++ b/include/linux/string.h @@ -5,7 +5,9 @@ #include /* for inline */ #include /* for size_t */ #include /* for NULL */ +#include /* for ERR_PTR() */ #include /* for E2BIG */ +#include /* for check_mul_overflow() */ #include #include @@ -14,6 +16,44 @@ extern void *memdup_user(const void __user *, size_t); extern void *vmemdup_user(const void __user *, size_t); extern void *memdup_user_nul(const void __user *, size_t); +/** + * memdup_array_user - duplicate array from user space + * @src: source address in user space + * @n: number of array members to copy + * @size: size of one array member + * + * Return: an ERR_PTR() on failure. Result is physically + * contiguous, to be freed by kfree(). + */ +static inline void *memdup_array_user(const void __user *src, size_t n, size_t size) +{ + size_t nbytes; + + if (check_mul_overflow(n, size, &nbytes)) + return ERR_PTR(-EOVERFLOW); + + return memdup_user(src, nbytes); +} + +/** + * vmemdup_array_user - duplicate array from user space + * @src: source address in user space + * @n: number of array members to copy + * @size: size of one array member + * + * Return: an ERR_PTR() on failure. Result may be not + * physically contiguous. Use kvfree() to free. + */ +static inline void *vmemdup_array_user(const void __user *src, size_t n, size_t size) +{ + size_t nbytes; + + if (check_mul_overflow(n, size, &nbytes)) + return ERR_PTR(-EOVERFLOW); + + return vmemdup_user(src, nbytes); +} + /* * Include machine specific inline routines */ -- cgit v1.2.3 From 6d0d419914286a1b255530812a38d992c6c4e608 Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 25 Sep 2023 13:03:06 +0100 Subject: iov_iter, net: Move csum_and_copy_to/from_iter() to net/ Move csum_and_copy_to/from_iter() to net code now that the iteration framework can be #included. Signed-off-by: David Howells Link: https://lore.kernel.org/r/20230925120309.1731676-10-dhowells@redhat.com cc: Alexander Viro cc: Jens Axboe cc: Christoph Hellwig cc: Christian Brauner cc: Matthew Wilcox cc: Linus Torvalds cc: David Laight cc: "David S. Miller" cc: Eric Dumazet cc: Jakub Kicinski cc: Paolo Abeni cc: linux-block@vger.kernel.org cc: linux-fsdevel@vger.kernel.org cc: linux-mm@kvack.org cc: netdev@vger.kernel.org Signed-off-by: Christian Brauner --- include/linux/skbuff.h | 25 ++++++++++++++ include/linux/uio.h | 18 ---------- lib/iov_iter.c | 89 -------------------------------------------------- net/core/datagram.c | 50 +++++++++++++++++++++++++++- net/core/skbuff.c | 33 +++++++++++++++++++ 5 files changed, 107 insertions(+), 108 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 4174c4b82d13..d0656cc11c16 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -3679,6 +3679,31 @@ static inline int __must_check skb_put_padto(struct sk_buff *skb, unsigned int l return __skb_put_padto(skb, len, true); } +static inline __wsum csum_and_memcpy(void *to, const void *from, size_t len, + __wsum sum, size_t off) +{ + __wsum next = csum_partial_copy_nocheck(from, to, len); + return csum_block_add(sum, next, off); +} + +struct csum_state { + __wsum csum; + size_t off; +}; + +size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum, struct iov_iter *i); + +static __always_inline __must_check +bool csum_and_copy_from_iter_full(void *addr, size_t bytes, + __wsum *csum, struct iov_iter *i) +{ + size_t copied = csum_and_copy_from_iter(addr, bytes, csum, i); + if (likely(copied == bytes)) + return true; + iov_iter_revert(i, copied); + return false; +} + static inline int skb_add_data(struct sk_buff *skb, struct iov_iter *from, int copy) { diff --git a/include/linux/uio.h b/include/linux/uio.h index 65d9143f83c8..0a5426c97e02 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -338,24 +338,6 @@ iov_iter_npages_cap(struct iov_iter *i, int maxpages, size_t max_bytes) return npages; } -struct csum_state { - __wsum csum; - size_t off; -}; - -size_t csum_and_copy_to_iter(const void *addr, size_t bytes, void *csstate, struct iov_iter *i); -size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum, struct iov_iter *i); - -static __always_inline __must_check -bool csum_and_copy_from_iter_full(void *addr, size_t bytes, - __wsum *csum, struct iov_iter *i) -{ - size_t copied = csum_and_copy_from_iter(addr, bytes, csum, i); - if (likely(copied == bytes)) - return true; - iov_iter_revert(i, copied); - return false; -} size_t hash_and_copy_to_iter(const void *addr, size_t bytes, void *hashp, struct iov_iter *i); diff --git a/lib/iov_iter.c b/lib/iov_iter.c index 0a7160aa4e31..3f913616ce88 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -10,7 +10,6 @@ #include #include #include -#include #include #include #include @@ -179,13 +178,6 @@ void iov_iter_init(struct iov_iter *i, unsigned int direction, } EXPORT_SYMBOL(iov_iter_init); -static __wsum csum_and_memcpy(void *to, const void *from, size_t len, - __wsum sum, size_t off) -{ - __wsum next = csum_partial_copy_nocheck(from, to, len); - return csum_block_add(sum, next, off); -} - size_t _copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i) { if (WARN_ON_ONCE(i->data_source)) @@ -1097,87 +1089,6 @@ ssize_t iov_iter_get_pages_alloc2(struct iov_iter *i, } EXPORT_SYMBOL(iov_iter_get_pages_alloc2); -static __always_inline -size_t copy_from_user_iter_csum(void __user *iter_from, size_t progress, - size_t len, void *to, void *priv2) -{ - __wsum next, *csum = priv2; - - next = csum_and_copy_from_user(iter_from, to + progress, len); - *csum = csum_block_add(*csum, next, progress); - return next ? 0 : len; -} - -static __always_inline -size_t memcpy_from_iter_csum(void *iter_from, size_t progress, - size_t len, void *to, void *priv2) -{ - __wsum *csum = priv2; - - *csum = csum_and_memcpy(to + progress, iter_from, len, *csum, progress); - return 0; -} - -size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum, - struct iov_iter *i) -{ - if (WARN_ON_ONCE(!i->data_source)) - return 0; - return iterate_and_advance2(i, bytes, addr, csum, - copy_from_user_iter_csum, - memcpy_from_iter_csum); -} -EXPORT_SYMBOL(csum_and_copy_from_iter); - -static __always_inline -size_t copy_to_user_iter_csum(void __user *iter_to, size_t progress, - size_t len, void *from, void *priv2) -{ - __wsum next, *csum = priv2; - - next = csum_and_copy_to_user(from + progress, iter_to, len); - *csum = csum_block_add(*csum, next, progress); - return next ? 0 : len; -} - -static __always_inline -size_t memcpy_to_iter_csum(void *iter_to, size_t progress, - size_t len, void *from, void *priv2) -{ - __wsum *csum = priv2; - - *csum = csum_and_memcpy(iter_to, from + progress, len, *csum, progress); - return 0; -} - -size_t csum_and_copy_to_iter(const void *addr, size_t bytes, void *_csstate, - struct iov_iter *i) -{ - struct csum_state *csstate = _csstate; - __wsum sum; - - if (WARN_ON_ONCE(i->data_source)) - return 0; - if (unlikely(iov_iter_is_discard(i))) { - // can't use csum_memcpy() for that one - data is not copied - csstate->csum = csum_block_add(csstate->csum, - csum_partial(addr, bytes, 0), - csstate->off); - csstate->off += bytes; - return bytes; - } - - sum = csum_shift(csstate->csum, csstate->off); - - bytes = iterate_and_advance2(i, bytes, (void *)addr, &sum, - copy_to_user_iter_csum, - memcpy_to_iter_csum); - csstate->csum = csum_shift(sum, csstate->off); - csstate->off += bytes; - return bytes; -} -EXPORT_SYMBOL(csum_and_copy_to_iter); - size_t hash_and_copy_to_iter(const void *addr, size_t bytes, void *hashp, struct iov_iter *i) { diff --git a/net/core/datagram.c b/net/core/datagram.c index 176eb5834746..37c89d0933b7 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -50,7 +50,7 @@ #include #include #include -#include +#include #include #include @@ -716,6 +716,54 @@ int zerocopy_sg_from_iter(struct sk_buff *skb, struct iov_iter *from) } EXPORT_SYMBOL(zerocopy_sg_from_iter); +static __always_inline +size_t copy_to_user_iter_csum(void __user *iter_to, size_t progress, + size_t len, void *from, void *priv2) +{ + __wsum next, *csum = priv2; + + next = csum_and_copy_to_user(from + progress, iter_to, len); + *csum = csum_block_add(*csum, next, progress); + return next ? 0 : len; +} + +static __always_inline +size_t memcpy_to_iter_csum(void *iter_to, size_t progress, + size_t len, void *from, void *priv2) +{ + __wsum *csum = priv2; + + *csum = csum_and_memcpy(iter_to, from + progress, len, *csum, progress); + return 0; +} + +static size_t csum_and_copy_to_iter(const void *addr, size_t bytes, void *_csstate, + struct iov_iter *i) +{ + struct csum_state *csstate = _csstate; + __wsum sum; + + if (WARN_ON_ONCE(i->data_source)) + return 0; + if (unlikely(iov_iter_is_discard(i))) { + // can't use csum_memcpy() for that one - data is not copied + csstate->csum = csum_block_add(csstate->csum, + csum_partial(addr, bytes, 0), + csstate->off); + csstate->off += bytes; + return bytes; + } + + sum = csum_shift(csstate->csum, csstate->off); + + bytes = iterate_and_advance2(i, bytes, (void *)addr, &sum, + copy_to_user_iter_csum, + memcpy_to_iter_csum); + csstate->csum = csum_shift(sum, csstate->off); + csstate->off += bytes; + return bytes; +} + /** * skb_copy_and_csum_datagram - Copy datagram to an iovec iterator * and update a checksum. diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 4eaf7ed0d1f4..5dbdfce2d05f 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -62,6 +62,7 @@ #include #include #include +#include #include #include @@ -6931,3 +6932,35 @@ out: return spliced ?: ret; } EXPORT_SYMBOL(skb_splice_from_iter); + +static __always_inline +size_t memcpy_from_iter_csum(void *iter_from, size_t progress, + size_t len, void *to, void *priv2) +{ + __wsum *csum = priv2; + + *csum = csum_and_memcpy(to + progress, iter_from, len, *csum, progress); + return 0; +} + +static __always_inline +size_t copy_from_user_iter_csum(void __user *iter_from, size_t progress, + size_t len, void *to, void *priv2) +{ + __wsum next, *csum = priv2; + + next = csum_and_copy_from_user(iter_from, to + progress, len); + *csum = csum_block_add(*csum, next, progress); + return next ? 0 : len; +} + +size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum, + struct iov_iter *i) +{ + if (WARN_ON_ONCE(!i->data_source)) + return 0; + return iterate_and_advance2(i, bytes, addr, csum, + copy_from_user_iter_csum, + memcpy_from_iter_csum); +} +EXPORT_SYMBOL(csum_and_copy_from_iter); -- cgit v1.2.3 From dc32bff195b45e8571c442954beee259e9500dac Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 25 Sep 2023 13:03:07 +0100 Subject: iov_iter, net: Fold in csum_and_memcpy() Fold csum_and_memcpy() in to its callers. Signed-off-by: David Howells Link: https://lore.kernel.org/r/20230925120309.1731676-11-dhowells@redhat.com cc: Alexander Viro cc: Jens Axboe cc: Christoph Hellwig cc: Christian Brauner cc: Matthew Wilcox cc: Linus Torvalds cc: David Laight cc: "David S. Miller" cc: Eric Dumazet cc: Jakub Kicinski cc: Paolo Abeni cc: linux-block@vger.kernel.org cc: linux-fsdevel@vger.kernel.org cc: linux-mm@kvack.org cc: netdev@vger.kernel.org Signed-off-by: Christian Brauner --- include/linux/skbuff.h | 7 ------- net/core/datagram.c | 3 ++- net/core/skbuff.c | 3 ++- 3 files changed, 4 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index d0656cc11c16..c81ef5d76953 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -3679,13 +3679,6 @@ static inline int __must_check skb_put_padto(struct sk_buff *skb, unsigned int l return __skb_put_padto(skb, len, true); } -static inline __wsum csum_and_memcpy(void *to, const void *from, size_t len, - __wsum sum, size_t off) -{ - __wsum next = csum_partial_copy_nocheck(from, to, len); - return csum_block_add(sum, next, off); -} - struct csum_state { __wsum csum; size_t off; diff --git a/net/core/datagram.c b/net/core/datagram.c index 37c89d0933b7..452620dd41e4 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -732,8 +732,9 @@ size_t memcpy_to_iter_csum(void *iter_to, size_t progress, size_t len, void *from, void *priv2) { __wsum *csum = priv2; + __wsum next = csum_partial_copy_nocheck(from, iter_to, len); - *csum = csum_and_memcpy(iter_to, from + progress, len, *csum, progress); + *csum = csum_block_add(*csum, next, progress); return 0; } diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 5dbdfce2d05f..3efed86321db 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -6938,8 +6938,9 @@ size_t memcpy_from_iter_csum(void *iter_from, size_t progress, size_t len, void *to, void *priv2) { __wsum *csum = priv2; + __wsum next = csum_partial_copy_nocheck(iter_from, to + progress, len); - *csum = csum_and_memcpy(to + progress, iter_from, len, *csum, progress); + *csum = csum_block_add(*csum, next, progress); return 0; } -- cgit v1.2.3 From 7c6f353e8a73cbb8919a20e0912021a9f9d24170 Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 25 Sep 2023 13:03:08 +0100 Subject: iov_iter, net: Merge csum_and_copy_from_iter{,_full}() together Move csum_and_copy_from_iter_full() out of line and then merge csum_and_copy_from_iter() into its only caller. Signed-off-by: David Howells Link: https://lore.kernel.org/r/20230925120309.1731676-12-dhowells@redhat.com cc: Alexander Viro cc: Jens Axboe cc: Christoph Hellwig cc: Christian Brauner cc: Matthew Wilcox cc: Linus Torvalds cc: David Laight cc: "David S. Miller" cc: Eric Dumazet cc: Jakub Kicinski cc: Paolo Abeni cc: linux-block@vger.kernel.org cc: linux-fsdevel@vger.kernel.org cc: linux-mm@kvack.org cc: netdev@vger.kernel.org Signed-off-by: Christian Brauner --- include/linux/skbuff.h | 19 ++----------------- net/core/datagram.c | 5 +++++ net/core/skbuff.c | 20 +++++++++++++------- 3 files changed, 20 insertions(+), 24 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index c81ef5d76953..be402f55f6d6 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -3679,23 +3679,8 @@ static inline int __must_check skb_put_padto(struct sk_buff *skb, unsigned int l return __skb_put_padto(skb, len, true); } -struct csum_state { - __wsum csum; - size_t off; -}; - -size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum, struct iov_iter *i); - -static __always_inline __must_check -bool csum_and_copy_from_iter_full(void *addr, size_t bytes, - __wsum *csum, struct iov_iter *i) -{ - size_t copied = csum_and_copy_from_iter(addr, bytes, csum, i); - if (likely(copied == bytes)) - return true; - iov_iter_revert(i, copied); - return false; -} +bool csum_and_copy_from_iter_full(void *addr, size_t bytes, __wsum *csum, struct iov_iter *i) + __must_check; static inline int skb_add_data(struct sk_buff *skb, struct iov_iter *from, int copy) diff --git a/net/core/datagram.c b/net/core/datagram.c index 452620dd41e4..722311eeee18 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -738,6 +738,11 @@ size_t memcpy_to_iter_csum(void *iter_to, size_t progress, return 0; } +struct csum_state { + __wsum csum; + size_t off; +}; + static size_t csum_and_copy_to_iter(const void *addr, size_t bytes, void *_csstate, struct iov_iter *i) { diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 3efed86321db..2bfa6a7ba244 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -6955,13 +6955,19 @@ size_t copy_from_user_iter_csum(void __user *iter_from, size_t progress, return next ? 0 : len; } -size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum, - struct iov_iter *i) +bool csum_and_copy_from_iter_full(void *addr, size_t bytes, + __wsum *csum, struct iov_iter *i) { + size_t copied; + if (WARN_ON_ONCE(!i->data_source)) - return 0; - return iterate_and_advance2(i, bytes, addr, csum, - copy_from_user_iter_csum, - memcpy_from_iter_csum); + return false; + copied = iterate_and_advance2(i, bytes, addr, csum, + copy_from_user_iter_csum, + memcpy_from_iter_csum); + if (likely(copied == bytes)) + return true; + iov_iter_revert(i, copied); + return false; } -EXPORT_SYMBOL(csum_and_copy_from_iter); +EXPORT_SYMBOL(csum_and_copy_from_iter_full); -- cgit v1.2.3 From b5f0e20f444cd150121e0ce912ebd3f2dabd12bc Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 25 Sep 2023 13:03:09 +0100 Subject: iov_iter, net: Move hash_and_copy_to_iter() to net/ Move hash_and_copy_to_iter() to be with its only caller in networking code. Signed-off-by: David Howells Link: https://lore.kernel.org/r/20230925120309.1731676-13-dhowells@redhat.com cc: Alexander Viro cc: Jens Axboe cc: Christoph Hellwig cc: Christian Brauner cc: Matthew Wilcox cc: Linus Torvalds cc: David Laight cc: "David S. Miller" cc: Eric Dumazet cc: Jakub Kicinski cc: Paolo Abeni cc: linux-block@vger.kernel.org cc: linux-fsdevel@vger.kernel.org cc: linux-mm@kvack.org cc: netdev@vger.kernel.org Signed-off-by: Christian Brauner --- include/linux/uio.h | 3 --- lib/iov_iter.c | 20 -------------------- net/core/datagram.c | 19 +++++++++++++++++++ 3 files changed, 19 insertions(+), 23 deletions(-) (limited to 'include/linux') diff --git a/include/linux/uio.h b/include/linux/uio.h index 0a5426c97e02..b6214cbf2a43 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -338,9 +338,6 @@ iov_iter_npages_cap(struct iov_iter *i, int maxpages, size_t max_bytes) return npages; } -size_t hash_and_copy_to_iter(const void *addr, size_t bytes, void *hashp, - struct iov_iter *i); - struct iovec *iovec_from_user(const struct iovec __user *uvector, unsigned long nr_segs, unsigned long fast_segs, struct iovec *fast_iov, bool compat); diff --git a/lib/iov_iter.c b/lib/iov_iter.c index 3f913616ce88..de7d11cf4c63 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -1,5 +1,4 @@ // SPDX-License-Identifier: GPL-2.0-only -#include #include #include #include @@ -1089,25 +1088,6 @@ ssize_t iov_iter_get_pages_alloc2(struct iov_iter *i, } EXPORT_SYMBOL(iov_iter_get_pages_alloc2); -size_t hash_and_copy_to_iter(const void *addr, size_t bytes, void *hashp, - struct iov_iter *i) -{ -#ifdef CONFIG_CRYPTO_HASH - struct ahash_request *hash = hashp; - struct scatterlist sg; - size_t copied; - - copied = copy_to_iter(addr, bytes, i); - sg_init_one(&sg, addr, copied); - ahash_request_set_crypt(hash, &sg, NULL, copied); - crypto_ahash_update(hash); - return copied; -#else - return 0; -#endif -} -EXPORT_SYMBOL(hash_and_copy_to_iter); - static int iov_npages(const struct iov_iter *i, int maxpages) { size_t skip = i->iov_offset, size = i->count; diff --git a/net/core/datagram.c b/net/core/datagram.c index 722311eeee18..103d46fa0eeb 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -61,6 +61,7 @@ #include #include #include +#include /* * Is a socket 'connection oriented' ? @@ -489,6 +490,24 @@ short_copy: return 0; } +static size_t hash_and_copy_to_iter(const void *addr, size_t bytes, void *hashp, + struct iov_iter *i) +{ +#ifdef CONFIG_CRYPTO_HASH + struct ahash_request *hash = hashp; + struct scatterlist sg; + size_t copied; + + copied = copy_to_iter(addr, bytes, i); + sg_init_one(&sg, addr, copied); + ahash_request_set_crypt(hash, &sg, NULL, copied); + crypto_ahash_update(hash); + return copied; +#else + return 0; +#endif +} + /** * skb_copy_and_hash_datagram_iter - Copy datagram to an iovec iterator * and update a hash. -- cgit v1.2.3 From 2b76129c5ae710423cfb55806803341af6a403a7 Mon Sep 17 00:00:00 2001 From: David Meriin Date: Mon, 24 Jul 2023 23:30:44 +0300 Subject: accel/habanalabs: move cpucp interface to linux/habanalabs The CPUCP interface is moved to a shared folder outside of accel as a pre-requisite to upstream the NIC drivers that will also include this file. Signed-off-by: David Meriin Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- MAINTAINERS | 1 + drivers/accel/habanalabs/common/firmware_if.c | 2 +- drivers/accel/habanalabs/common/habanalabs.h | 2 +- drivers/accel/habanalabs/gaudi/gaudiP.h | 2 +- drivers/accel/habanalabs/gaudi2/gaudi2P.h | 2 +- drivers/accel/habanalabs/goya/goyaP.h | 2 +- drivers/accel/habanalabs/include/common/cpucp_if.h | 1402 ------------------- .../accel/habanalabs/include/common/hl_boot_if.h | 790 ----------- include/linux/habanalabs/cpucp_if.h | 1407 ++++++++++++++++++++ include/linux/habanalabs/hl_boot_if.h | 790 +++++++++++ 10 files changed, 2203 insertions(+), 2197 deletions(-) delete mode 100644 drivers/accel/habanalabs/include/common/cpucp_if.h delete mode 100644 drivers/accel/habanalabs/include/common/hl_boot_if.h create mode 100644 include/linux/habanalabs/cpucp_if.h create mode 100644 include/linux/habanalabs/hl_boot_if.h (limited to 'include/linux') diff --git a/MAINTAINERS b/MAINTAINERS index e05506ea8917..c9ff335d2c8c 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -9071,6 +9071,7 @@ T: git https://git.kernel.org/pub/scm/linux/kernel/git/ogabbay/linux.git F: Documentation/ABI/testing/debugfs-driver-habanalabs F: Documentation/ABI/testing/sysfs-driver-habanalabs F: drivers/accel/habanalabs/ +F: include/linux/habanalabs/ F: include/trace/events/habanalabs.h F: include/uapi/drm/habanalabs_accel.h diff --git a/drivers/accel/habanalabs/common/firmware_if.c b/drivers/accel/habanalabs/common/firmware_if.c index 2bc775d29854..2a6dfea3d27d 100644 --- a/drivers/accel/habanalabs/common/firmware_if.c +++ b/drivers/accel/habanalabs/common/firmware_if.c @@ -6,7 +6,7 @@ */ #include "habanalabs.h" -#include "../include/common/hl_boot_if.h" +#include #include #include diff --git a/drivers/accel/habanalabs/common/habanalabs.h b/drivers/accel/habanalabs/common/habanalabs.h index efb046370f2e..8b5fd2b92676 100644 --- a/drivers/accel/habanalabs/common/habanalabs.h +++ b/drivers/accel/habanalabs/common/habanalabs.h @@ -8,7 +8,7 @@ #ifndef HABANALABSP_H_ #define HABANALABSP_H_ -#include "../include/common/cpucp_if.h" +#include #include "../include/common/qman_if.h" #include "../include/hw_ip/mmu/mmu_general.h" #include diff --git a/drivers/accel/habanalabs/gaudi/gaudiP.h b/drivers/accel/habanalabs/gaudi/gaudiP.h index b8fa724be5a1..831be53bb9d7 100644 --- a/drivers/accel/habanalabs/gaudi/gaudiP.h +++ b/drivers/accel/habanalabs/gaudi/gaudiP.h @@ -10,7 +10,7 @@ #include #include "../common/habanalabs.h" -#include "../include/common/hl_boot_if.h" +#include #include "../include/gaudi/gaudi_packets.h" #include "../include/gaudi/gaudi.h" #include "../include/gaudi/gaudi_async_events.h" diff --git a/drivers/accel/habanalabs/gaudi2/gaudi2P.h b/drivers/accel/habanalabs/gaudi2/gaudi2P.h index 5f3ce086928e..4535aa5ab561 100644 --- a/drivers/accel/habanalabs/gaudi2/gaudi2P.h +++ b/drivers/accel/habanalabs/gaudi2/gaudi2P.h @@ -10,7 +10,7 @@ #include #include "../common/habanalabs.h" -#include "../include/common/hl_boot_if.h" +#include #include "../include/gaudi2/gaudi2.h" #include "../include/gaudi2/gaudi2_packets.h" #include "../include/gaudi2/gaudi2_fw_if.h" diff --git a/drivers/accel/habanalabs/goya/goyaP.h b/drivers/accel/habanalabs/goya/goyaP.h index 5df3d30b91fd..194c2ae157cd 100644 --- a/drivers/accel/habanalabs/goya/goyaP.h +++ b/drivers/accel/habanalabs/goya/goyaP.h @@ -9,8 +9,8 @@ #define GOYAP_H_ #include +#include #include "../common/habanalabs.h" -#include "../include/common/hl_boot_if.h" #include "../include/goya/goya_packets.h" #include "../include/goya/goya.h" #include "../include/goya/goya_async_events.h" diff --git a/drivers/accel/habanalabs/include/common/cpucp_if.h b/drivers/accel/habanalabs/include/common/cpucp_if.h deleted file mode 100644 index ef7d32224066..000000000000 --- a/drivers/accel/habanalabs/include/common/cpucp_if.h +++ /dev/null @@ -1,1402 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 - * - * Copyright 2020-2022 HabanaLabs, Ltd. - * All Rights Reserved. - * - */ - -#ifndef CPUCP_IF_H -#define CPUCP_IF_H - -#include -#include - -#include "hl_boot_if.h" - -#define NUM_HBM_PSEUDO_CH 2 -#define NUM_HBM_CH_PER_DEV 8 -#define CPUCP_PKT_HBM_ECC_INFO_WR_PAR_SHIFT 0 -#define CPUCP_PKT_HBM_ECC_INFO_WR_PAR_MASK 0x00000001 -#define CPUCP_PKT_HBM_ECC_INFO_RD_PAR_SHIFT 1 -#define CPUCP_PKT_HBM_ECC_INFO_RD_PAR_MASK 0x00000002 -#define CPUCP_PKT_HBM_ECC_INFO_CA_PAR_SHIFT 2 -#define CPUCP_PKT_HBM_ECC_INFO_CA_PAR_MASK 0x00000004 -#define CPUCP_PKT_HBM_ECC_INFO_DERR_SHIFT 3 -#define CPUCP_PKT_HBM_ECC_INFO_DERR_MASK 0x00000008 -#define CPUCP_PKT_HBM_ECC_INFO_SERR_SHIFT 4 -#define CPUCP_PKT_HBM_ECC_INFO_SERR_MASK 0x00000010 -#define CPUCP_PKT_HBM_ECC_INFO_TYPE_SHIFT 5 -#define CPUCP_PKT_HBM_ECC_INFO_TYPE_MASK 0x00000020 -#define CPUCP_PKT_HBM_ECC_INFO_HBM_CH_SHIFT 6 -#define CPUCP_PKT_HBM_ECC_INFO_HBM_CH_MASK 0x000007C0 - -#define PLL_MAP_MAX_BITS 128 -#define PLL_MAP_LEN (PLL_MAP_MAX_BITS / 8) - -/* - * info of the pkt queue pointers in the first async occurrence - */ -struct cpucp_pkt_sync_err { - __le32 pi; - __le32 ci; -}; - -struct hl_eq_hbm_ecc_data { - /* SERR counter */ - __le32 sec_cnt; - /* DERR counter */ - __le32 dec_cnt; - /* Supplemental Information according to the mask bits */ - __le32 hbm_ecc_info; - /* Address in hbm where the ecc happened */ - __le32 first_addr; - /* SERR continuous address counter */ - __le32 sec_cont_cnt; - __le32 pad; -}; - -/* - * EVENT QUEUE - */ - -struct hl_eq_header { - __le32 reserved; - __le32 ctl; -}; - -struct hl_eq_ecc_data { - __le64 ecc_address; - __le64 ecc_syndrom; - __u8 memory_wrapper_idx; - __u8 is_critical; - __le16 block_id; - __u8 pad[4]; -}; - -enum hl_sm_sei_cause { - SM_SEI_SO_OVERFLOW, - SM_SEI_LBW_4B_UNALIGNED, - SM_SEI_AXI_RESPONSE_ERR -}; - -struct hl_eq_sm_sei_data { - __le32 sei_log; - /* enum hl_sm_sei_cause */ - __u8 sei_cause; - __u8 pad[3]; -}; - -enum hl_fw_alive_severity { - FW_ALIVE_SEVERITY_MINOR, - FW_ALIVE_SEVERITY_CRITICAL -}; - -struct hl_eq_fw_alive { - __le64 uptime_seconds; - __le32 process_id; - __le32 thread_id; - /* enum hl_fw_alive_severity */ - __u8 severity; - __u8 pad[7]; -}; - -struct hl_eq_intr_cause { - __le64 intr_cause_data; -}; - -struct hl_eq_pcie_drain_ind_data { - struct hl_eq_intr_cause intr_cause; - __le64 drain_wr_addr_lbw; - __le64 drain_rd_addr_lbw; - __le64 drain_wr_addr_hbw; - __le64 drain_rd_addr_hbw; -}; - -struct hl_eq_razwi_lbw_info_regs { - __le32 rr_aw_razwi_reg; - __le32 rr_aw_razwi_id_reg; - __le32 rr_ar_razwi_reg; - __le32 rr_ar_razwi_id_reg; -}; - -struct hl_eq_razwi_hbw_info_regs { - __le32 rr_aw_razwi_hi_reg; - __le32 rr_aw_razwi_lo_reg; - __le32 rr_aw_razwi_id_reg; - __le32 rr_ar_razwi_hi_reg; - __le32 rr_ar_razwi_lo_reg; - __le32 rr_ar_razwi_id_reg; -}; - -/* razwi_happened masks */ -#define RAZWI_HAPPENED_HBW 0x1 -#define RAZWI_HAPPENED_LBW 0x2 -#define RAZWI_HAPPENED_AW 0x4 -#define RAZWI_HAPPENED_AR 0x8 - -struct hl_eq_razwi_info { - __le32 razwi_happened_mask; - union { - struct hl_eq_razwi_lbw_info_regs lbw; - struct hl_eq_razwi_hbw_info_regs hbw; - }; - __le32 pad; -}; - -struct hl_eq_razwi_with_intr_cause { - struct hl_eq_razwi_info razwi_info; - struct hl_eq_intr_cause intr_cause; -}; - -#define HBM_CA_ERR_CMD_LIFO_LEN 8 -#define HBM_RD_ERR_DATA_LIFO_LEN 8 -#define HBM_WR_PAR_CMD_LIFO_LEN 11 - -enum hl_hbm_sei_cause { - /* Command/address parity error event is split into 2 events due to - * size limitation: ODD suffix for odd HBM CK_t cycles and EVEN suffix - * for even HBM CK_t cycles - */ - HBM_SEI_CMD_PARITY_EVEN, - HBM_SEI_CMD_PARITY_ODD, - /* Read errors can be reflected as a combination of SERR/DERR/parity - * errors. Therefore, we define one event for all read error types. - * LKD will perform further proccessing. - */ - HBM_SEI_READ_ERR, - HBM_SEI_WRITE_DATA_PARITY_ERR, - HBM_SEI_CATTRIP, - HBM_SEI_MEM_BIST_FAIL, - HBM_SEI_DFI, - HBM_SEI_INV_TEMP_READ_OUT, - HBM_SEI_BIST_FAIL, -}; - -/* Masks for parsing hl_hbm_sei_headr fields */ -#define HBM_ECC_SERR_CNTR_MASK 0xFF -#define HBM_ECC_DERR_CNTR_MASK 0xFF00 -#define HBM_RD_PARITY_CNTR_MASK 0xFF0000 - -/* HBM index and MC index are known by the event_id */ -struct hl_hbm_sei_header { - union { - /* relevant only in case of HBM read error */ - struct { - __u8 ecc_serr_cnt; - __u8 ecc_derr_cnt; - __u8 read_par_cnt; - __u8 reserved; - }; - /* All other cases */ - __le32 cnt; - }; - __u8 sei_cause; /* enum hl_hbm_sei_cause */ - __u8 mc_channel; /* range: 0-3 */ - __u8 mc_pseudo_channel; /* range: 0-7 */ - __u8 is_critical; -}; - -#define HBM_RD_ADDR_SID_SHIFT 0 -#define HBM_RD_ADDR_SID_MASK 0x1 -#define HBM_RD_ADDR_BG_SHIFT 1 -#define HBM_RD_ADDR_BG_MASK 0x6 -#define HBM_RD_ADDR_BA_SHIFT 3 -#define HBM_RD_ADDR_BA_MASK 0x18 -#define HBM_RD_ADDR_COL_SHIFT 5 -#define HBM_RD_ADDR_COL_MASK 0x7E0 -#define HBM_RD_ADDR_ROW_SHIFT 11 -#define HBM_RD_ADDR_ROW_MASK 0x3FFF800 - -struct hbm_rd_addr { - union { - /* bit fields are only for FW use */ - struct { - u32 dbg_rd_err_addr_sid:1; - u32 dbg_rd_err_addr_bg:2; - u32 dbg_rd_err_addr_ba:2; - u32 dbg_rd_err_addr_col:6; - u32 dbg_rd_err_addr_row:15; - u32 reserved:6; - }; - __le32 rd_addr_val; - }; -}; - -#define HBM_RD_ERR_BEAT_SHIFT 2 -/* dbg_rd_err_misc fields: */ -/* Read parity is calculated per DW on every beat */ -#define HBM_RD_ERR_PAR_ERR_BEAT0_SHIFT 0 -#define HBM_RD_ERR_PAR_ERR_BEAT0_MASK 0x3 -#define HBM_RD_ERR_PAR_DATA_BEAT0_SHIFT 8 -#define HBM_RD_ERR_PAR_DATA_BEAT0_MASK 0x300 -/* ECC is calculated per PC on every beat */ -#define HBM_RD_ERR_SERR_BEAT0_SHIFT 16 -#define HBM_RD_ERR_SERR_BEAT0_MASK 0x10000 -#define HBM_RD_ERR_DERR_BEAT0_SHIFT 24 -#define HBM_RD_ERR_DERR_BEAT0_MASK 0x100000 - -struct hl_eq_hbm_sei_read_err_intr_info { - /* DFI_RD_ERR_REP_ADDR */ - struct hbm_rd_addr dbg_rd_err_addr; - /* DFI_RD_ERR_REP_ERR */ - union { - struct { - /* bit fields are only for FW use */ - u32 dbg_rd_err_par:8; - u32 dbg_rd_err_par_data:8; - u32 dbg_rd_err_serr:4; - u32 dbg_rd_err_derr:4; - u32 reserved:8; - }; - __le32 dbg_rd_err_misc; - }; - /* DFI_RD_ERR_REP_DM */ - __le32 dbg_rd_err_dm; - /* DFI_RD_ERR_REP_SYNDROME */ - __le32 dbg_rd_err_syndrome; - /* DFI_RD_ERR_REP_DATA */ - __le32 dbg_rd_err_data[HBM_RD_ERR_DATA_LIFO_LEN]; -}; - -struct hl_eq_hbm_sei_ca_par_intr_info { - /* 14 LSBs */ - __le16 dbg_row[HBM_CA_ERR_CMD_LIFO_LEN]; - /* 18 LSBs */ - __le32 dbg_col[HBM_CA_ERR_CMD_LIFO_LEN]; -}; - -#define WR_PAR_LAST_CMD_COL_SHIFT 0 -#define WR_PAR_LAST_CMD_COL_MASK 0x3F -#define WR_PAR_LAST_CMD_BG_SHIFT 6 -#define WR_PAR_LAST_CMD_BG_MASK 0xC0 -#define WR_PAR_LAST_CMD_BA_SHIFT 8 -#define WR_PAR_LAST_CMD_BA_MASK 0x300 -#define WR_PAR_LAST_CMD_SID_SHIFT 10 -#define WR_PAR_LAST_CMD_SID_MASK 0x400 - -/* Row address isn't latched */ -struct hbm_sei_wr_cmd_address { - /* DFI_DERR_LAST_CMD */ - union { - struct { - /* bit fields are only for FW use */ - u32 col:6; - u32 bg:2; - u32 ba:2; - u32 sid:1; - u32 reserved:21; - }; - __le32 dbg_wr_cmd_addr; - }; -}; - -struct hl_eq_hbm_sei_wr_par_intr_info { - /* entry 0: WR command address from the 1st cycle prior to the error - * entry 1: WR command address from the 2nd cycle prior to the error - * and so on... - */ - struct hbm_sei_wr_cmd_address dbg_last_wr_cmds[HBM_WR_PAR_CMD_LIFO_LEN]; - /* derr[0:1] - 1st HBM cycle DERR output - * derr[2:3] - 2nd HBM cycle DERR output - */ - __u8 dbg_derr; - /* extend to reach 8B */ - __u8 pad[3]; -}; - -/* - * this struct represents the following sei causes: - * command parity, ECC double error, ECC single error, dfi error, cattrip, - * temperature read-out, read parity error and write parity error. - * some only use the header while some have extra data. - */ -struct hl_eq_hbm_sei_data { - struct hl_hbm_sei_header hdr; - union { - struct hl_eq_hbm_sei_ca_par_intr_info ca_parity_even_info; - struct hl_eq_hbm_sei_ca_par_intr_info ca_parity_odd_info; - struct hl_eq_hbm_sei_read_err_intr_info read_err_info; - struct hl_eq_hbm_sei_wr_par_intr_info wr_parity_info; - }; -}; - -/* Engine/farm arc interrupt type */ -enum hl_engine_arc_interrupt_type { - /* Qman/farm ARC DCCM QUEUE FULL interrupt type */ - ENGINE_ARC_DCCM_QUEUE_FULL_IRQ = 1 -}; - -/* Data structure specifies details of payload of DCCM QUEUE FULL interrupt */ -struct hl_engine_arc_dccm_queue_full_irq { - /* Queue index value which caused DCCM QUEUE FULL */ - __le32 queue_index; - __le32 pad; -}; - -/* Data structure specifies details of QM/FARM ARC interrupt */ -struct hl_eq_engine_arc_intr_data { - /* ARC engine id e.g. DCORE0_TPC0_QM_ARC, DCORE0_TCP1_QM_ARC */ - __le32 engine_id; - __le32 intr_type; /* enum hl_engine_arc_interrupt_type */ - /* More info related to the interrupt e.g. queue index - * incase of DCCM_QUEUE_FULL interrupt. - */ - __le64 payload; - __le64 pad[5]; -}; - -#define ADDR_DEC_ADDRESS_COUNT_MAX 4 - -/* Data structure specifies details of ADDR_DEC interrupt */ -struct hl_eq_addr_dec_intr_data { - struct hl_eq_intr_cause intr_cause; - __le64 addr[ADDR_DEC_ADDRESS_COUNT_MAX]; - __u8 addr_cnt; - __u8 pad[7]; -}; - -struct hl_eq_entry { - struct hl_eq_header hdr; - union { - __le64 data_placeholder; - struct hl_eq_ecc_data ecc_data; - struct hl_eq_hbm_ecc_data hbm_ecc_data; /* Obsolete */ - struct hl_eq_sm_sei_data sm_sei_data; - struct cpucp_pkt_sync_err pkt_sync_err; - struct hl_eq_fw_alive fw_alive; - struct hl_eq_intr_cause intr_cause; - struct hl_eq_pcie_drain_ind_data pcie_drain_ind_data; - struct hl_eq_razwi_info razwi_info; - struct hl_eq_razwi_with_intr_cause razwi_with_intr_cause; - struct hl_eq_hbm_sei_data sei_data; /* Gaudi2 HBM */ - struct hl_eq_engine_arc_intr_data arc_data; - struct hl_eq_addr_dec_intr_data addr_dec; - __le64 data[7]; - }; -}; - -#define HL_EQ_ENTRY_SIZE sizeof(struct hl_eq_entry) - -#define EQ_CTL_READY_SHIFT 31 -#define EQ_CTL_READY_MASK 0x80000000 - -#define EQ_CTL_EVENT_TYPE_SHIFT 16 -#define EQ_CTL_EVENT_TYPE_MASK 0x0FFF0000 - -#define EQ_CTL_INDEX_SHIFT 0 -#define EQ_CTL_INDEX_MASK 0x0000FFFF - -enum pq_init_status { - PQ_INIT_STATUS_NA = 0, - PQ_INIT_STATUS_READY_FOR_CP, - PQ_INIT_STATUS_READY_FOR_HOST, - PQ_INIT_STATUS_READY_FOR_CP_SINGLE_MSI, - PQ_INIT_STATUS_LEN_NOT_POWER_OF_TWO_ERR, - PQ_INIT_STATUS_ILLEGAL_Q_ADDR_ERR -}; - -/* - * CpuCP Primary Queue Packets - * - * During normal operation, the host's kernel driver needs to send various - * messages to CpuCP, usually either to SET some value into a H/W periphery or - * to GET the current value of some H/W periphery. For example, SET the - * frequency of MME/TPC and GET the value of the thermal sensor. - * - * These messages can be initiated either by the User application or by the - * host's driver itself, e.g. power management code. In either case, the - * communication from the host's driver to CpuCP will *always* be in - * synchronous mode, meaning that the host will send a single message and poll - * until the message was acknowledged and the results are ready (if results are - * needed). - * - * This means that only a single message can be sent at a time and the host's - * driver must wait for its result before sending the next message. Having said - * that, because these are control messages which are sent in a relatively low - * frequency, this limitation seems acceptable. It's important to note that - * in case of multiple devices, messages to different devices *can* be sent - * at the same time. - * - * The message, inputs/outputs (if relevant) and fence object will be located - * on the device DDR at an address that will be determined by the host's driver. - * During device initialization phase, the host will pass to CpuCP that address. - * Most of the message types will contain inputs/outputs inside the message - * itself. The common part of each message will contain the opcode of the - * message (its type) and a field representing a fence object. - * - * When the host's driver wishes to send a message to CPU CP, it will write the - * message contents to the device DDR, clear the fence object and then write to - * the PSOC_ARC1_AUX_SW_INTR, to issue interrupt 121 to ARC Management CPU. - * - * Upon receiving the interrupt (#121), CpuCP will read the message from the - * DDR. In case the message is a SET operation, CpuCP will first perform the - * operation and then write to the fence object on the device DDR. In case the - * message is a GET operation, CpuCP will first fill the results section on the - * device DDR and then write to the fence object. If an error occurred, CpuCP - * will fill the rc field with the right error code. - * - * In the meantime, the host's driver will poll on the fence object. Once the - * host sees that the fence object is signaled, it will read the results from - * the device DDR (if relevant) and resume the code execution in the host's - * driver. - * - * To use QMAN packets, the opcode must be the QMAN opcode, shifted by 8 - * so the value being put by the host's driver matches the value read by CpuCP - * - * Non-QMAN packets should be limited to values 1 through (2^8 - 1) - * - * Detailed description: - * - * CPUCP_PACKET_DISABLE_PCI_ACCESS - - * After receiving this packet the embedded CPU must NOT issue PCI - * transactions (read/write) towards the Host CPU. This also include - * sending MSI-X interrupts. - * This packet is usually sent before the device is moved to D3Hot state. - * - * CPUCP_PACKET_ENABLE_PCI_ACCESS - - * After receiving this packet the embedded CPU is allowed to issue PCI - * transactions towards the Host CPU, including sending MSI-X interrupts. - * This packet is usually send after the device is moved to D0 state. - * - * CPUCP_PACKET_TEMPERATURE_GET - - * Fetch the current temperature / Max / Max Hyst / Critical / - * Critical Hyst of a specified thermal sensor. The packet's - * arguments specify the desired sensor and the field to get. - * - * CPUCP_PACKET_VOLTAGE_GET - - * Fetch the voltage / Max / Min of a specified sensor. The packet's - * arguments specify the sensor and type. - * - * CPUCP_PACKET_CURRENT_GET - - * Fetch the current / Max / Min of a specified sensor. The packet's - * arguments specify the sensor and type. - * - * CPUCP_PACKET_FAN_SPEED_GET - - * Fetch the speed / Max / Min of a specified fan. The packet's - * arguments specify the sensor and type. - * - * CPUCP_PACKET_PWM_GET - - * Fetch the pwm value / mode of a specified pwm. The packet's - * arguments specify the sensor and type. - * - * CPUCP_PACKET_PWM_SET - - * Set the pwm value / mode of a specified pwm. The packet's - * arguments specify the sensor, type and value. - * - * CPUCP_PACKET_FREQUENCY_SET - - * Set the frequency of a specified PLL. The packet's arguments specify - * the PLL and the desired frequency. The actual frequency in the device - * might differ from the requested frequency. - * - * CPUCP_PACKET_FREQUENCY_GET - - * Fetch the frequency of a specified PLL. The packet's arguments specify - * the PLL. - * - * CPUCP_PACKET_LED_SET - - * Set the state of a specified led. The packet's arguments - * specify the led and the desired state. - * - * CPUCP_PACKET_I2C_WR - - * Write 32-bit value to I2C device. The packet's arguments specify the - * I2C bus, address and value. - * - * CPUCP_PACKET_I2C_RD - - * Read 32-bit value from I2C device. The packet's arguments specify the - * I2C bus and address. - * - * CPUCP_PACKET_INFO_GET - - * Fetch information from the device as specified in the packet's - * structure. The host's driver passes the max size it allows the CpuCP to - * write to the structure, to prevent data corruption in case of - * mismatched driver/FW versions. - * - * CPUCP_PACKET_FLASH_PROGRAM_REMOVED - this packet was removed - * - * CPUCP_PACKET_UNMASK_RAZWI_IRQ - - * Unmask the given IRQ. The IRQ number is specified in the value field. - * The packet is sent after receiving an interrupt and printing its - * relevant information. - * - * CPUCP_PACKET_UNMASK_RAZWI_IRQ_ARRAY - - * Unmask the given IRQs. The IRQs numbers are specified in an array right - * after the cpucp_packet structure, where its first element is the array - * length. The packet is sent after a soft reset was done in order to - * handle any interrupts that were sent during the reset process. - * - * CPUCP_PACKET_TEST - - * Test packet for CpuCP connectivity. The CPU will put the fence value - * in the result field. - * - * CPUCP_PACKET_FREQUENCY_CURR_GET - - * Fetch the current frequency of a specified PLL. The packet's arguments - * specify the PLL. - * - * CPUCP_PACKET_MAX_POWER_GET - - * Fetch the maximal power of the device. - * - * CPUCP_PACKET_MAX_POWER_SET - - * Set the maximal power of the device. The packet's arguments specify - * the power. - * - * CPUCP_PACKET_EEPROM_DATA_GET - - * Get EEPROM data from the CpuCP kernel. The buffer is specified in the - * addr field. The CPU will put the returned data size in the result - * field. In addition, the host's driver passes the max size it allows the - * CpuCP to write to the structure, to prevent data corruption in case of - * mismatched driver/FW versions. - * - * CPUCP_PACKET_NIC_INFO_GET - - * Fetch information from the device regarding the NIC. the host's driver - * passes the max size it allows the CpuCP to write to the structure, to - * prevent data corruption in case of mismatched driver/FW versions. - * - * CPUCP_PACKET_TEMPERATURE_SET - - * Set the value of the offset property of a specified thermal sensor. - * The packet's arguments specify the desired sensor and the field to - * set. - * - * CPUCP_PACKET_VOLTAGE_SET - - * Trigger the reset_history property of a specified voltage sensor. - * The packet's arguments specify the desired sensor and the field to - * set. - * - * CPUCP_PACKET_CURRENT_SET - - * Trigger the reset_history property of a specified current sensor. - * The packet's arguments specify the desired sensor and the field to - * set. - * - * CPUCP_PACKET_PCIE_THROUGHPUT_GET - - * Get throughput of PCIe. - * The packet's arguments specify the transaction direction (TX/RX). - * The window measurement is 10[msec], and the return value is in KB/sec. - * - * CPUCP_PACKET_PCIE_REPLAY_CNT_GET - * Replay count measures number of "replay" events, which is basicly - * number of retries done by PCIe. - * - * CPUCP_PACKET_TOTAL_ENERGY_GET - - * Total Energy is measurement of energy from the time FW Linux - * is loaded. It is calculated by multiplying the average power - * by time (passed from armcp start). The units are in MilliJouls. - * - * CPUCP_PACKET_PLL_INFO_GET - - * Fetch frequencies of PLL from the required PLL IP. - * The packet's arguments specify the device PLL type - * Pll type is the PLL from device pll_index enum. - * The result is composed of 4 outputs, each is 16-bit - * frequency in MHz. - * - * CPUCP_PACKET_POWER_GET - - * Fetch the present power consumption of the device (Current * Voltage). - * - * CPUCP_PACKET_NIC_PFC_SET - - * Enable/Disable the NIC PFC feature. The packet's arguments specify the - * NIC port, relevant lanes to configure and one bit indication for - * enable/disable. - * - * CPUCP_PACKET_NIC_FAULT_GET - - * Fetch the current indication for local/remote faults from the NIC MAC. - * The result is 32-bit value of the relevant register. - * - * CPUCP_PACKET_NIC_LPBK_SET - - * Enable/Disable the MAC loopback feature. The packet's arguments specify - * the NIC port, relevant lanes to configure and one bit indication for - * enable/disable. - * - * CPUCP_PACKET_NIC_MAC_INIT - - * Configure the NIC MAC channels. The packet's arguments specify the - * NIC port and the speed. - * - * CPUCP_PACKET_MSI_INFO_SET - - * set the index number for each supported msi type going from - * host to device - * - * CPUCP_PACKET_NIC_XPCS91_REGS_GET - - * Fetch the un/correctable counters values from the NIC MAC. - * - * CPUCP_PACKET_NIC_STAT_REGS_GET - - * Fetch various NIC MAC counters from the NIC STAT. - * - * CPUCP_PACKET_NIC_STAT_REGS_CLR - - * Clear the various NIC MAC counters in the NIC STAT. - * - * CPUCP_PACKET_NIC_STAT_REGS_ALL_GET - - * Fetch all NIC MAC counters from the NIC STAT. - * - * CPUCP_PACKET_IS_IDLE_CHECK - - * Check if the device is IDLE in regard to the DMA/compute engines - * and QMANs. The f/w will return a bitmask where each bit represents - * a different engine or QMAN according to enum cpucp_idle_mask. - * The bit will be 1 if the engine is NOT idle. - * - * CPUCP_PACKET_HBM_REPLACED_ROWS_INFO_GET - - * Fetch all HBM replaced-rows and prending to be replaced rows data. - * - * CPUCP_PACKET_HBM_PENDING_ROWS_STATUS - - * Fetch status of HBM rows pending replacement and need a reboot to - * be replaced. - * - * CPUCP_PACKET_POWER_SET - - * Resets power history of device to 0 - * - * CPUCP_PACKET_ENGINE_CORE_ASID_SET - - * Packet to perform engine core ASID configuration - * - * CPUCP_PACKET_SEC_ATTEST_GET - - * Get the attestaion data that is collected during various stages of the - * boot sequence. the attestation data is also hashed with some unique - * number (nonce) provided by the host to prevent replay attacks. - * public key and certificate also provided as part of the FW response. - * - * CPUCP_PACKET_MONITOR_DUMP_GET - - * Get monitors registers dump from the CpuCP kernel. - * The CPU will put the registers dump in the a buffer allocated by the driver - * which address is passed via the CpuCp packet. In addition, the host's driver - * passes the max size it allows the CpuCP to write to the structure, to prevent - * data corruption in case of mismatched driver/FW versions. - * Obsolete. - * - * CPUCP_PACKET_GENERIC_PASSTHROUGH - - * Generic opcode for all firmware info that is only passed to host - * through the LKD, without getting parsed there. - * - * CPUCP_PACKET_ACTIVE_STATUS_SET - - * LKD sends FW indication whether device is free or in use, this indication is reported - * also to the BMC. - * - * CPUCP_PACKET_REGISTER_INTERRUPTS - - * Packet to register interrupts indicating LKD is ready to receive events from FW. - * - * CPUCP_PACKET_SOFT_RESET - - * Packet to perform soft-reset. - */ - -enum cpucp_packet_id { - CPUCP_PACKET_DISABLE_PCI_ACCESS = 1, /* internal */ - CPUCP_PACKET_ENABLE_PCI_ACCESS, /* internal */ - CPUCP_PACKET_TEMPERATURE_GET, /* sysfs */ - CPUCP_PACKET_VOLTAGE_GET, /* sysfs */ - CPUCP_PACKET_CURRENT_GET, /* sysfs */ - CPUCP_PACKET_FAN_SPEED_GET, /* sysfs */ - CPUCP_PACKET_PWM_GET, /* sysfs */ - CPUCP_PACKET_PWM_SET, /* sysfs */ - CPUCP_PACKET_FREQUENCY_SET, /* sysfs */ - CPUCP_PACKET_FREQUENCY_GET, /* sysfs */ - CPUCP_PACKET_LED_SET, /* debugfs */ - CPUCP_PACKET_I2C_WR, /* debugfs */ - CPUCP_PACKET_I2C_RD, /* debugfs */ - CPUCP_PACKET_INFO_GET, /* IOCTL */ - CPUCP_PACKET_FLASH_PROGRAM_REMOVED, - CPUCP_PACKET_UNMASK_RAZWI_IRQ, /* internal */ - CPUCP_PACKET_UNMASK_RAZWI_IRQ_ARRAY, /* internal */ - CPUCP_PACKET_TEST, /* internal */ - CPUCP_PACKET_FREQUENCY_CURR_GET, /* sysfs */ - CPUCP_PACKET_MAX_POWER_GET, /* sysfs */ - CPUCP_PACKET_MAX_POWER_SET, /* sysfs */ - CPUCP_PACKET_EEPROM_DATA_GET, /* sysfs */ - CPUCP_PACKET_NIC_INFO_GET, /* internal */ - CPUCP_PACKET_TEMPERATURE_SET, /* sysfs */ - CPUCP_PACKET_VOLTAGE_SET, /* sysfs */ - CPUCP_PACKET_CURRENT_SET, /* sysfs */ - CPUCP_PACKET_PCIE_THROUGHPUT_GET, /* internal */ - CPUCP_PACKET_PCIE_REPLAY_CNT_GET, /* internal */ - CPUCP_PACKET_TOTAL_ENERGY_GET, /* internal */ - CPUCP_PACKET_PLL_INFO_GET, /* internal */ - CPUCP_PACKET_NIC_STATUS, /* internal */ - CPUCP_PACKET_POWER_GET, /* internal */ - CPUCP_PACKET_NIC_PFC_SET, /* internal */ - CPUCP_PACKET_NIC_FAULT_GET, /* internal */ - CPUCP_PACKET_NIC_LPBK_SET, /* internal */ - CPUCP_PACKET_NIC_MAC_CFG, /* internal */ - CPUCP_PACKET_MSI_INFO_SET, /* internal */ - CPUCP_PACKET_NIC_XPCS91_REGS_GET, /* internal */ - CPUCP_PACKET_NIC_STAT_REGS_GET, /* internal */ - CPUCP_PACKET_NIC_STAT_REGS_CLR, /* internal */ - CPUCP_PACKET_NIC_STAT_REGS_ALL_GET, /* internal */ - CPUCP_PACKET_IS_IDLE_CHECK, /* internal */ - CPUCP_PACKET_HBM_REPLACED_ROWS_INFO_GET,/* internal */ - CPUCP_PACKET_HBM_PENDING_ROWS_STATUS, /* internal */ - CPUCP_PACKET_POWER_SET, /* internal */ - CPUCP_PACKET_RESERVED, /* not used */ - CPUCP_PACKET_ENGINE_CORE_ASID_SET, /* internal */ - CPUCP_PACKET_RESERVED2, /* not used */ - CPUCP_PACKET_SEC_ATTEST_GET, /* internal */ - CPUCP_PACKET_RESERVED3, /* not used */ - CPUCP_PACKET_RESERVED4, /* not used */ - CPUCP_PACKET_MONITOR_DUMP_GET, /* debugfs */ - CPUCP_PACKET_RESERVED5, /* not used */ - CPUCP_PACKET_RESERVED6, /* not used */ - CPUCP_PACKET_RESERVED7, /* not used */ - CPUCP_PACKET_GENERIC_PASSTHROUGH, /* IOCTL */ - CPUCP_PACKET_RESERVED8, /* not used */ - CPUCP_PACKET_ACTIVE_STATUS_SET, /* internal */ - CPUCP_PACKET_RESERVED9, /* not used */ - CPUCP_PACKET_RESERVED10, /* not used */ - CPUCP_PACKET_RESERVED11, /* not used */ - CPUCP_PACKET_RESERVED12, /* internal */ - CPUCP_PACKET_REGISTER_INTERRUPTS, /* internal */ - CPUCP_PACKET_SOFT_RESET, /* internal */ - CPUCP_PACKET_ID_MAX /* must be last */ -}; - -#define CPUCP_PACKET_FENCE_VAL 0xFE8CE7A5 - -#define CPUCP_PKT_CTL_RC_SHIFT 12 -#define CPUCP_PKT_CTL_RC_MASK 0x0000F000 - -#define CPUCP_PKT_CTL_OPCODE_SHIFT 16 -#define CPUCP_PKT_CTL_OPCODE_MASK 0x1FFF0000 - -#define CPUCP_PKT_RES_PLL_OUT0_SHIFT 0 -#define CPUCP_PKT_RES_PLL_OUT0_MASK 0x000000000000FFFFull -#define CPUCP_PKT_RES_PLL_OUT1_SHIFT 16 -#define CPUCP_PKT_RES_PLL_OUT1_MASK 0x00000000FFFF0000ull -#define CPUCP_PKT_RES_PLL_OUT2_SHIFT 32 -#define CPUCP_PKT_RES_PLL_OUT2_MASK 0x0000FFFF00000000ull -#define CPUCP_PKT_RES_PLL_OUT3_SHIFT 48 -#define CPUCP_PKT_RES_PLL_OUT3_MASK 0xFFFF000000000000ull - -#define CPUCP_PKT_RES_EEPROM_OUT0_SHIFT 0 -#define CPUCP_PKT_RES_EEPROM_OUT0_MASK 0x000000000000FFFFull -#define CPUCP_PKT_RES_EEPROM_OUT1_SHIFT 16 -#define CPUCP_PKT_RES_EEPROM_OUT1_MASK 0x0000000000FF0000ull - -#define CPUCP_PKT_VAL_PFC_IN1_SHIFT 0 -#define CPUCP_PKT_VAL_PFC_IN1_MASK 0x0000000000000001ull -#define CPUCP_PKT_VAL_PFC_IN2_SHIFT 1 -#define CPUCP_PKT_VAL_PFC_IN2_MASK 0x000000000000001Eull - -#define CPUCP_PKT_VAL_LPBK_IN1_SHIFT 0 -#define CPUCP_PKT_VAL_LPBK_IN1_MASK 0x0000000000000001ull -#define CPUCP_PKT_VAL_LPBK_IN2_SHIFT 1 -#define CPUCP_PKT_VAL_LPBK_IN2_MASK 0x000000000000001Eull - -#define CPUCP_PKT_VAL_MAC_CNT_IN1_SHIFT 0 -#define CPUCP_PKT_VAL_MAC_CNT_IN1_MASK 0x0000000000000001ull -#define CPUCP_PKT_VAL_MAC_CNT_IN2_SHIFT 1 -#define CPUCP_PKT_VAL_MAC_CNT_IN2_MASK 0x00000000FFFFFFFEull - -/* heartbeat status bits */ -#define CPUCP_PKT_HB_STATUS_EQ_FAULT_SHIFT 0 -#define CPUCP_PKT_HB_STATUS_EQ_FAULT_MASK 0x00000001 - -struct cpucp_packet { - union { - __le64 value; /* For SET packets */ - __le64 result; /* For GET packets */ - __le64 addr; /* For PQ */ - }; - - __le32 ctl; - - __le32 fence; /* Signal to host that message is completed */ - - union { - struct {/* For temperature/current/voltage/fan/pwm get/set */ - __le16 sensor_index; - __le16 type; - }; - - struct { /* For I2C read/write */ - __u8 i2c_bus; - __u8 i2c_addr; - __u8 i2c_reg; - /* - * In legacy implemetations, i2c_len was not present, - * was unused and just added as pad. - * So if i2c_len is 0, it is treated as legacy - * and r/w 1 Byte, else if i2c_len is specified, - * its treated as new multibyte r/w support. - */ - __u8 i2c_len; - }; - - struct {/* For PLL info fetch */ - __le16 pll_type; - /* TODO pll_reg is kept temporary before removal */ - __le16 pll_reg; - }; - - /* For any general request */ - __le32 index; - - /* For frequency get/set */ - __le32 pll_index; - - /* For led set */ - __le32 led_index; - - /* For get CpuCP info/EEPROM data/NIC info */ - __le32 data_max_size; - - /* - * For any general status bitmask. Shall be used whenever the - * result cannot be used to hold general purpose data. - */ - __le32 status_mask; - - /* random, used once number, for security packets */ - __le32 nonce; - }; - - union { - /* For NIC requests */ - __le32 port_index; - - /* For Generic packet sub index */ - __le32 pkt_subidx; - }; -}; - -struct cpucp_unmask_irq_arr_packet { - struct cpucp_packet cpucp_pkt; - __le32 length; - __le32 irqs[]; -}; - -struct cpucp_nic_status_packet { - struct cpucp_packet cpucp_pkt; - __le32 length; - __le32 data[]; -}; - -struct cpucp_array_data_packet { - struct cpucp_packet cpucp_pkt; - __le32 length; - __le32 data[]; -}; - -enum cpucp_led_index { - CPUCP_LED0_INDEX = 0, - CPUCP_LED1_INDEX, - CPUCP_LED2_INDEX, - CPUCP_LED_MAX_INDEX = CPUCP_LED2_INDEX -}; - -/* - * enum cpucp_packet_rc - Error return code - * @cpucp_packet_success -> in case of success. - * @cpucp_packet_invalid -> this is to support first generation platforms. - * @cpucp_packet_fault -> in case of processing error like failing to - * get device binding or semaphore etc. - * @cpucp_packet_invalid_pkt -> when cpucp packet is un-supported. - * @cpucp_packet_invalid_params -> when checking parameter like length of buffer - * or attribute value etc. - * @cpucp_packet_rc_max -> It indicates size of enum so should be at last. - */ -enum cpucp_packet_rc { - cpucp_packet_success, - cpucp_packet_invalid, - cpucp_packet_fault, - cpucp_packet_invalid_pkt, - cpucp_packet_invalid_params, - cpucp_packet_rc_max -}; - -/* - * cpucp_temp_type should adhere to hwmon_temp_attributes - * defined in Linux kernel hwmon.h file - */ -enum cpucp_temp_type { - cpucp_temp_input, - cpucp_temp_min = 4, - cpucp_temp_min_hyst, - cpucp_temp_max = 6, - cpucp_temp_max_hyst, - cpucp_temp_crit, - cpucp_temp_crit_hyst, - cpucp_temp_offset = 19, - cpucp_temp_lowest = 21, - cpucp_temp_highest = 22, - cpucp_temp_reset_history = 23, - cpucp_temp_warn = 24, - cpucp_temp_max_crit = 25, - cpucp_temp_max_warn = 26, -}; - -enum cpucp_in_attributes { - cpucp_in_input, - cpucp_in_min, - cpucp_in_max, - cpucp_in_lowest = 6, - cpucp_in_highest = 7, - cpucp_in_reset_history, - cpucp_in_intr_alarm_a, - cpucp_in_intr_alarm_b, -}; - -enum cpucp_curr_attributes { - cpucp_curr_input, - cpucp_curr_min, - cpucp_curr_max, - cpucp_curr_lowest = 6, - cpucp_curr_highest = 7, - cpucp_curr_reset_history -}; - -enum cpucp_fan_attributes { - cpucp_fan_input, - cpucp_fan_min = 2, - cpucp_fan_max -}; - -enum cpucp_pwm_attributes { - cpucp_pwm_input, - cpucp_pwm_enable -}; - -enum cpucp_pcie_throughput_attributes { - cpucp_pcie_throughput_tx, - cpucp_pcie_throughput_rx -}; - -/* TODO temporary kept before removal */ -enum cpucp_pll_reg_attributes { - cpucp_pll_nr_reg, - cpucp_pll_nf_reg, - cpucp_pll_od_reg, - cpucp_pll_div_factor_reg, - cpucp_pll_div_sel_reg -}; - -/* TODO temporary kept before removal */ -enum cpucp_pll_type_attributes { - cpucp_pll_cpu, - cpucp_pll_pci, -}; - -/* - * cpucp_power_type aligns with hwmon_power_attributes - * defined in Linux kernel hwmon.h file - */ -enum cpucp_power_type { - CPUCP_POWER_INPUT = 8, - CPUCP_POWER_INPUT_HIGHEST = 9, - CPUCP_POWER_RESET_INPUT_HISTORY = 11 -}; - -/* - * MSI type enumeration table for all ASICs and future SW versions. - * For future ASIC-LKD compatibility, we can only add new enumerations. - * at the end of the table (before CPUCP_NUM_OF_MSI_TYPES). - * Changing the order of entries or removing entries is not allowed. - */ -enum cpucp_msi_type { - CPUCP_EVENT_QUEUE_MSI_TYPE, - CPUCP_NIC_PORT1_MSI_TYPE, - CPUCP_NIC_PORT3_MSI_TYPE, - CPUCP_NIC_PORT5_MSI_TYPE, - CPUCP_NIC_PORT7_MSI_TYPE, - CPUCP_NIC_PORT9_MSI_TYPE, - CPUCP_NUM_OF_MSI_TYPES -}; - -/* - * PLL enumeration table used for all ASICs and future SW versions. - * For future ASIC-LKD compatibility, we can only add new enumerations. - * at the end of the table. - * Changing the order of entries or removing entries is not allowed. - */ -enum pll_index { - CPU_PLL = 0, - PCI_PLL = 1, - NIC_PLL = 2, - DMA_PLL = 3, - MESH_PLL = 4, - MME_PLL = 5, - TPC_PLL = 6, - IF_PLL = 7, - SRAM_PLL = 8, - NS_PLL = 9, - HBM_PLL = 10, - MSS_PLL = 11, - DDR_PLL = 12, - VID_PLL = 13, - BANK_PLL = 14, - MMU_PLL = 15, - IC_PLL = 16, - MC_PLL = 17, - EMMC_PLL = 18, - D2D_PLL = 19, - CS_PLL = 20, - C2C_PLL = 21, - NCH_PLL = 22, - C2M_PLL = 23, - PLL_MAX -}; - -enum rl_index { - TPC_RL = 0, - MME_RL, - EDMA_RL, -}; - -enum pvt_index { - PVT_SW, - PVT_SE, - PVT_NW, - PVT_NE -}; - -/* Event Queue Packets */ - -struct eq_generic_event { - __le64 data[7]; -}; - -/* - * CpuCP info - */ - -#define CARD_NAME_MAX_LEN 16 -#define CPUCP_MAX_SENSORS 128 -#define CPUCP_MAX_NICS 128 -#define CPUCP_LANES_PER_NIC 4 -#define CPUCP_NIC_QSFP_EEPROM_MAX_LEN 1024 -#define CPUCP_MAX_NIC_LANES (CPUCP_MAX_NICS * CPUCP_LANES_PER_NIC) -#define CPUCP_NIC_MASK_ARR_LEN ((CPUCP_MAX_NICS + 63) / 64) -#define CPUCP_NIC_POLARITY_ARR_LEN ((CPUCP_MAX_NIC_LANES + 63) / 64) -#define CPUCP_HBM_ROW_REPLACE_MAX 32 - -struct cpucp_sensor { - __le32 type; - __le32 flags; -}; - -/** - * struct cpucp_card_types - ASIC card type. - * @cpucp_card_type_pci: PCI card. - * @cpucp_card_type_pmc: PCI Mezzanine Card. - */ -enum cpucp_card_types { - cpucp_card_type_pci, - cpucp_card_type_pmc -}; - -#define CPUCP_SEC_CONF_ENABLED_SHIFT 0 -#define CPUCP_SEC_CONF_ENABLED_MASK 0x00000001 - -#define CPUCP_SEC_CONF_FLASH_WP_SHIFT 1 -#define CPUCP_SEC_CONF_FLASH_WP_MASK 0x00000002 - -#define CPUCP_SEC_CONF_EEPROM_WP_SHIFT 2 -#define CPUCP_SEC_CONF_EEPROM_WP_MASK 0x00000004 - -/** - * struct cpucp_security_info - Security information. - * @config: configuration bit field - * @keys_num: number of stored keys - * @revoked_keys: revoked keys bit field - * @min_svn: minimal security version - */ -struct cpucp_security_info { - __u8 config; - __u8 keys_num; - __u8 revoked_keys; - __u8 min_svn; -}; - -/** - * struct cpucp_info - Info from CpuCP that is necessary to the host's driver - * @sensors: available sensors description. - * @kernel_version: CpuCP linux kernel version. - * @reserved: reserved field. - * @card_type: card configuration type. - * @card_location: in a server, each card has different connections topology - * depending on its location (relevant for PMC card type) - * @cpld_version: CPLD programmed F/W version. - * @infineon_version: Infineon main DC-DC version. - * @fuse_version: silicon production FUSE information. - * @thermal_version: thermald S/W version. - * @cpucp_version: CpuCP S/W version. - * @infineon_second_stage_version: Infineon 2nd stage DC-DC version. - * @dram_size: available DRAM size. - * @card_name: card name that will be displayed in HWMON subsystem on the host - * @tpc_binning_mask: TPC binning mask, 1 bit per TPC instance - * (0 = functional, 1 = binned) - * @decoder_binning_mask: Decoder binning mask, 1 bit per decoder instance - * (0 = functional, 1 = binned), maximum 1 per dcore - * @sram_binning: Categorize SRAM functionality - * (0 = fully functional, 1 = lower-half is not functional, - * 2 = upper-half is not functional) - * @sec_info: security information - * @pll_map: Bit map of supported PLLs for current ASIC version. - * @mme_binning_mask: MME binning mask, - * bits [0:6] <==> dcore0 mme fma - * bits [7:13] <==> dcore1 mme fma - * bits [14:20] <==> dcore0 mme ima - * bits [21:27] <==> dcore1 mme ima - * For each group, if the 6th bit is set then first 5 bits - * represent the col's idx [0-31], otherwise these bits are - * ignored, and col idx 32 is binned. 7th bit is don't care. - * @dram_binning_mask: DRAM binning mask, 1 bit per dram instance - * (0 = functional 1 = binned) - * @memory_repair_flag: eFuse flag indicating memory repair - * @edma_binning_mask: EDMA binning mask, 1 bit per EDMA instance - * (0 = functional 1 = binned) - * @xbar_binning_mask: Xbar binning mask, 1 bit per Xbar instance - * (0 = functional 1 = binned) - * @interposer_version: Interposer version programmed in eFuse - * @substrate_version: Substrate version programmed in eFuse - * @fw_hbm_region_size: Size in bytes of FW reserved region in HBM. - * @fw_os_version: Firmware OS Version - */ -struct cpucp_info { - struct cpucp_sensor sensors[CPUCP_MAX_SENSORS]; - __u8 kernel_version[VERSION_MAX_LEN]; - __le32 reserved; - __le32 card_type; - __le32 card_location; - __le32 cpld_version; - __le32 infineon_version; - __u8 fuse_version[VERSION_MAX_LEN]; - __u8 thermal_version[VERSION_MAX_LEN]; - __u8 cpucp_version[VERSION_MAX_LEN]; - __le32 infineon_second_stage_version; - __le64 dram_size; - char card_name[CARD_NAME_MAX_LEN]; - __le64 tpc_binning_mask; - __le64 decoder_binning_mask; - __u8 sram_binning; - __u8 dram_binning_mask; - __u8 memory_repair_flag; - __u8 edma_binning_mask; - __u8 xbar_binning_mask; - __u8 interposer_version; - __u8 substrate_version; - __u8 reserved2; - struct cpucp_security_info sec_info; - __le32 fw_hbm_region_size; - __u8 pll_map[PLL_MAP_LEN]; - __le64 mme_binning_mask; - __u8 fw_os_version[VERSION_MAX_LEN]; -}; - -struct cpucp_mac_addr { - __u8 mac_addr[ETH_ALEN]; -}; - -enum cpucp_serdes_type { - TYPE_1_SERDES_TYPE, - TYPE_2_SERDES_TYPE, - HLS1_SERDES_TYPE, - HLS1H_SERDES_TYPE, - HLS2_SERDES_TYPE, - HLS2_TYPE_1_SERDES_TYPE, - MAX_NUM_SERDES_TYPE, /* number of types */ - UNKNOWN_SERDES_TYPE = 0xFFFF /* serdes_type is u16 */ -}; - -struct cpucp_nic_info { - struct cpucp_mac_addr mac_addrs[CPUCP_MAX_NICS]; - __le64 link_mask[CPUCP_NIC_MASK_ARR_LEN]; - __le64 pol_tx_mask[CPUCP_NIC_POLARITY_ARR_LEN]; - __le64 pol_rx_mask[CPUCP_NIC_POLARITY_ARR_LEN]; - __le64 link_ext_mask[CPUCP_NIC_MASK_ARR_LEN]; - __u8 qsfp_eeprom[CPUCP_NIC_QSFP_EEPROM_MAX_LEN]; - __le64 auto_neg_mask[CPUCP_NIC_MASK_ARR_LEN]; - __le16 serdes_type; /* enum cpucp_serdes_type */ - __le16 tx_swap_map[CPUCP_MAX_NICS]; - __u8 reserved[6]; -}; - -#define PAGE_DISCARD_MAX 64 - -struct page_discard_info { - __u8 num_entries; - __u8 reserved[7]; - __le32 mmu_page_idx[PAGE_DISCARD_MAX]; -}; - -/* - * struct frac_val - fracture value represented by "integer.frac". - * @integer: the integer part of the fracture value; - * @frac: the fracture part of the fracture value. - */ -struct frac_val { - union { - struct { - __le16 integer; - __le16 frac; - }; - __le32 val; - }; -}; - -/* - * struct ser_val - the SER (symbol error rate) value is represented by "integer * 10 ^ -exp". - * @integer: the integer part of the SER value; - * @exp: the exponent part of the SER value. - */ -struct ser_val { - __le16 integer; - __le16 exp; -}; - -/* - * struct cpucp_nic_status - describes the status of a NIC port. - * @port: NIC port index. - * @bad_format_cnt: e.g. CRC. - * @responder_out_of_sequence_psn_cnt: e.g NAK. - * @high_ber_reinit_cnt: link reinit due to high BER. - * @correctable_err_cnt: e.g. bit-flip. - * @uncorrectable_err_cnt: e.g. MAC errors. - * @retraining_cnt: re-training counter. - * @up: is port up. - * @pcs_link: has PCS link. - * @phy_ready: is PHY ready. - * @auto_neg: is Autoneg enabled. - * @timeout_retransmission_cnt: timeout retransmission events. - * @high_ber_cnt: high ber events. - * @pre_fec_ser: pre FEC SER value. - * @post_fec_ser: post FEC SER value. - * @throughput: measured throughput. - * @latency: measured latency. - */ -struct cpucp_nic_status { - __le32 port; - __le32 bad_format_cnt; - __le32 responder_out_of_sequence_psn_cnt; - __le32 high_ber_reinit; - __le32 correctable_err_cnt; - __le32 uncorrectable_err_cnt; - __le32 retraining_cnt; - __u8 up; - __u8 pcs_link; - __u8 phy_ready; - __u8 auto_neg; - __le32 timeout_retransmission_cnt; - __le32 high_ber_cnt; - struct ser_val pre_fec_ser; - struct ser_val post_fec_ser; - struct frac_val bandwidth; - struct frac_val lat; -}; - -enum cpucp_hbm_row_replace_cause { - REPLACE_CAUSE_DOUBLE_ECC_ERR, - REPLACE_CAUSE_MULTI_SINGLE_ECC_ERR, -}; - -struct cpucp_hbm_row_info { - __u8 hbm_idx; - __u8 pc; - __u8 sid; - __u8 bank_idx; - __le16 row_addr; - __u8 replaced_row_cause; /* enum cpucp_hbm_row_replace_cause */ - __u8 pad; -}; - -struct cpucp_hbm_row_replaced_rows_info { - __le16 num_replaced_rows; - __u8 pad[6]; - struct cpucp_hbm_row_info replaced_rows[CPUCP_HBM_ROW_REPLACE_MAX]; -}; - -enum cpu_reset_status { - CPU_RST_STATUS_NA = 0, - CPU_RST_STATUS_SOFT_RST_DONE = 1, -}; - -#define SEC_PCR_DATA_BUF_SZ 256 -#define SEC_PCR_QUOTE_BUF_SZ 510 /* (512 - 2) 2 bytes used for size */ -#define SEC_SIGNATURE_BUF_SZ 255 /* (256 - 1) 1 byte used for size */ -#define SEC_PUB_DATA_BUF_SZ 510 /* (512 - 2) 2 bytes used for size */ -#define SEC_CERTIFICATE_BUF_SZ 2046 /* (2048 - 2) 2 bytes used for size */ - -/* - * struct cpucp_sec_attest_info - attestation report of the boot - * @pcr_data: raw values of the PCR registers - * @pcr_num_reg: number of PCR registers in the pcr_data array - * @pcr_reg_len: length of each PCR register in the pcr_data array (bytes) - * @nonce: number only used once. random number provided by host. this also - * passed to the quote command as a qualifying data. - * @pcr_quote_len: length of the attestation quote data (bytes) - * @pcr_quote: attestation report data structure - * @quote_sig_len: length of the attestation report signature (bytes) - * @quote_sig: signature structure of the attestation report - * @pub_data_len: length of the public data (bytes) - * @public_data: public key for the signed attestation - * (outPublic + name + qualifiedName) - * @certificate_len: length of the certificate (bytes) - * @certificate: certificate for the attestation signing key - */ -struct cpucp_sec_attest_info { - __u8 pcr_data[SEC_PCR_DATA_BUF_SZ]; - __u8 pcr_num_reg; - __u8 pcr_reg_len; - __le16 pad0; - __le32 nonce; - __le16 pcr_quote_len; - __u8 pcr_quote[SEC_PCR_QUOTE_BUF_SZ]; - __u8 quote_sig_len; - __u8 quote_sig[SEC_SIGNATURE_BUF_SZ]; - __le16 pub_data_len; - __u8 public_data[SEC_PUB_DATA_BUF_SZ]; - __le16 certificate_len; - __u8 certificate[SEC_CERTIFICATE_BUF_SZ]; -}; - -/* - * struct cpucp_dev_info_signed - device information signed by a secured device - * @info: device information structure as defined above - * @nonce: number only used once. random number provided by host. this number is - * hashed and signed along with the device information. - * @info_sig_len: length of the attestation signature (bytes) - * @info_sig: signature of the info + nonce data. - * @pub_data_len: length of the public data (bytes) - * @public_data: public key info signed info data - * (outPublic + name + qualifiedName) - * @certificate_len: length of the certificate (bytes) - * @certificate: certificate for the signing key - */ -struct cpucp_dev_info_signed { - struct cpucp_info info; /* assumed to be 64bit aligned */ - __le32 nonce; - __le32 pad0; - __u8 info_sig_len; - __u8 info_sig[SEC_SIGNATURE_BUF_SZ]; - __le16 pub_data_len; - __u8 public_data[SEC_PUB_DATA_BUF_SZ]; - __le16 certificate_len; - __u8 certificate[SEC_CERTIFICATE_BUF_SZ]; -}; - -#define DCORE_MON_REGS_SZ 512 -/* - * struct dcore_monitor_regs_data - DCORE monitor regs data. - * the structure follows sync manager block layout. Obsolete. - * @mon_pay_addrl: array of payload address low bits. - * @mon_pay_addrh: array of payload address high bits. - * @mon_pay_data: array of payload data. - * @mon_arm: array of monitor arm. - * @mon_status: array of monitor status. - */ -struct dcore_monitor_regs_data { - __le32 mon_pay_addrl[DCORE_MON_REGS_SZ]; - __le32 mon_pay_addrh[DCORE_MON_REGS_SZ]; - __le32 mon_pay_data[DCORE_MON_REGS_SZ]; - __le32 mon_arm[DCORE_MON_REGS_SZ]; - __le32 mon_status[DCORE_MON_REGS_SZ]; -}; - -/* contains SM data for each SYNC_MNGR (Obsolete) */ -struct cpucp_monitor_dump { - struct dcore_monitor_regs_data sync_mngr_w_s; - struct dcore_monitor_regs_data sync_mngr_e_s; - struct dcore_monitor_regs_data sync_mngr_w_n; - struct dcore_monitor_regs_data sync_mngr_e_n; -}; - -/* - * The Type of the generic request (and other input arguments) will be fetched from user by reading - * from "pkt_subidx" field in struct cpucp_packet. - * - * HL_PASSTHROUGHT_VERSIONS - Fetch all firmware versions. - */ -enum hl_passthrough_type { - HL_PASSTHROUGH_VERSIONS, -}; - -#endif /* CPUCP_IF_H */ diff --git a/drivers/accel/habanalabs/include/common/hl_boot_if.h b/drivers/accel/habanalabs/include/common/hl_boot_if.h deleted file mode 100644 index 7de8a5786a36..000000000000 --- a/drivers/accel/habanalabs/include/common/hl_boot_if.h +++ /dev/null @@ -1,790 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 - * - * Copyright 2018-2020 HabanaLabs, Ltd. - * All Rights Reserved. - * - */ - -#ifndef HL_BOOT_IF_H -#define HL_BOOT_IF_H - -#define LKD_HARD_RESET_MAGIC 0xED7BD694 /* deprecated - do not use */ -#define HL_POWER9_HOST_MAGIC 0x1DA30009 - -#define BOOT_FIT_SRAM_OFFSET 0x200000 - -#define VERSION_MAX_LEN 128 - -enum cpu_boot_err { - CPU_BOOT_ERR_DRAM_INIT_FAIL = 0, - CPU_BOOT_ERR_FIT_CORRUPTED = 1, - CPU_BOOT_ERR_TS_INIT_FAIL = 2, - CPU_BOOT_ERR_DRAM_SKIPPED = 3, - CPU_BOOT_ERR_BMC_WAIT_SKIPPED = 4, - CPU_BOOT_ERR_NIC_DATA_NOT_RDY = 5, - CPU_BOOT_ERR_NIC_FW_FAIL = 6, - CPU_BOOT_ERR_SECURITY_NOT_RDY = 7, - CPU_BOOT_ERR_SECURITY_FAIL = 8, - CPU_BOOT_ERR_EFUSE_FAIL = 9, - CPU_BOOT_ERR_PRI_IMG_VER_FAIL = 10, - CPU_BOOT_ERR_SEC_IMG_VER_FAIL = 11, - CPU_BOOT_ERR_PLL_FAIL = 12, - CPU_BOOT_ERR_DEVICE_UNUSABLE_FAIL = 13, - CPU_BOOT_ERR_BOOT_FW_CRIT_ERR = 18, - CPU_BOOT_ERR_BINNING_FAIL = 19, - CPU_BOOT_ERR_TPM_FAIL = 20, - CPU_BOOT_ERR_TMP_THRESH_INIT_FAIL = 21, - CPU_BOOT_ERR_EEPROM_FAIL = 22, - CPU_BOOT_ERR_ENG_ARC_MEM_SCRUB_FAIL = 23, - CPU_BOOT_ERR_ENABLED = 31, - CPU_BOOT_ERR_SCND_EN = 63, - CPU_BOOT_ERR_LAST = 64 /* we have 2 registers of 32 bits */ -}; - -/* - * Mask for fatal failures - * This mask contains all possible fatal failures, and a dynamic code - * will clear the non-relevant ones. - */ -#define CPU_BOOT_ERR_FATAL_MASK \ - ((1 << CPU_BOOT_ERR_DRAM_INIT_FAIL) | \ - (1 << CPU_BOOT_ERR_PLL_FAIL) | \ - (1 << CPU_BOOT_ERR_DEVICE_UNUSABLE_FAIL) | \ - (1 << CPU_BOOT_ERR_BINNING_FAIL) | \ - (1 << CPU_BOOT_ERR_DRAM_SKIPPED) | \ - (1 << CPU_BOOT_ERR_ENG_ARC_MEM_SCRUB_FAIL) | \ - (1 << CPU_BOOT_ERR_EEPROM_FAIL)) - -/* - * CPU error bits in BOOT_ERROR registers - * - * CPU_BOOT_ERR0_DRAM_INIT_FAIL DRAM initialization failed. - * DRAM is not reliable to use. - * - * CPU_BOOT_ERR0_FIT_CORRUPTED FIT data integrity verification of the - * image provided by the host has failed. - * - * CPU_BOOT_ERR0_TS_INIT_FAIL Thermal Sensor initialization failed. - * Boot continues as usual, but keep in - * mind this is a warning. - * - * CPU_BOOT_ERR0_DRAM_SKIPPED DRAM initialization has been skipped. - * Skipping DRAM initialization has been - * requested (e.g. strap, command, etc.) - * and FW skipped the DRAM initialization. - * Host can initialize the DRAM. - * - * CPU_BOOT_ERR0_BMC_WAIT_SKIPPED Waiting for BMC data will be skipped. - * Meaning the BMC data might not be - * available until reset. - * - * CPU_BOOT_ERR0_NIC_DATA_NOT_RDY NIC data from BMC is not ready. - * BMC has not provided the NIC data yet. - * Once provided this bit will be cleared. - * - * CPU_BOOT_ERR0_NIC_FW_FAIL NIC FW loading failed. - * The NIC FW loading and initialization - * failed. This means NICs are not usable. - * - * CPU_BOOT_ERR0_SECURITY_NOT_RDY Chip security initialization has been - * started, but is not ready yet - chip - * cannot be accessed. - * - * CPU_BOOT_ERR0_SECURITY_FAIL Security related tasks have failed. - * The tasks are security init (root of - * trust), boot authentication (chain of - * trust), data packets authentication. - * - * CPU_BOOT_ERR0_EFUSE_FAIL Reading from eFuse failed. - * The PCI device ID might be wrong. - * - * CPU_BOOT_ERR0_PRI_IMG_VER_FAIL Verification of primary image failed. - * It mean that ppboot checksum - * verification for the preboot primary - * image has failed to match expected - * checksum. Trying to program image again - * might solve this. - * - * CPU_BOOT_ERR0_SEC_IMG_VER_FAIL Verification of secondary image failed. - * It mean that ppboot checksum - * verification for the preboot secondary - * image has failed to match expected - * checksum. Trying to program image again - * might solve this. - * - * CPU_BOOT_ERR0_PLL_FAIL PLL settings failed, meaning that one - * of the PLLs remains in REF_CLK - * - * CPU_BOOT_ERR0_DEVICE_UNUSABLE_FAIL Device is unusable and customer support - * should be contacted. - * - * CPU_BOOT_ERR0_BOOT_FW_CRIT_ERR Critical error was detected during - * the execution of ppboot or preboot. - * for example: stack overflow. - * - * CPU_BOOT_ERR0_BINNING_FAIL Binning settings failed, meaning - * malfunctioning components might still be - * in use. - * - * CPU_BOOT_ERR0_TPM_FAIL TPM verification flow failed. - * - * CPU_BOOT_ERR0_TMP_THRESH_INIT_FAIL Failed to set threshold for tmperature - * sensor. - * - * CPU_BOOT_ERR_EEPROM_FAIL Failed reading EEPROM data. Defaults - * are used. - * - * CPU_BOOT_ERR_ENG_ARC_MEM_SCRUB_FAIL Failed scrubbing the Engines/ARCFarm - * memories. Boot disabled until reset. - * - * CPU_BOOT_ERR0_ENABLED Error registers enabled. - * This is a main indication that the - * running FW populates the error - * registers. Meaning the error bits are - * not garbage, but actual error statuses. - */ -#define CPU_BOOT_ERR0_DRAM_INIT_FAIL (1 << CPU_BOOT_ERR_DRAM_INIT_FAIL) -#define CPU_BOOT_ERR0_FIT_CORRUPTED (1 << CPU_BOOT_ERR_FIT_CORRUPTED) -#define CPU_BOOT_ERR0_TS_INIT_FAIL (1 << CPU_BOOT_ERR_TS_INIT_FAIL) -#define CPU_BOOT_ERR0_DRAM_SKIPPED (1 << CPU_BOOT_ERR_DRAM_SKIPPED) -#define CPU_BOOT_ERR0_BMC_WAIT_SKIPPED (1 << CPU_BOOT_ERR_BMC_WAIT_SKIPPED) -#define CPU_BOOT_ERR0_NIC_DATA_NOT_RDY (1 << CPU_BOOT_ERR_NIC_DATA_NOT_RDY) -#define CPU_BOOT_ERR0_NIC_FW_FAIL (1 << CPU_BOOT_ERR_NIC_FW_FAIL) -#define CPU_BOOT_ERR0_SECURITY_NOT_RDY (1 << CPU_BOOT_ERR_SECURITY_NOT_RDY) -#define CPU_BOOT_ERR0_SECURITY_FAIL (1 << CPU_BOOT_ERR_SECURITY_FAIL) -#define CPU_BOOT_ERR0_EFUSE_FAIL (1 << CPU_BOOT_ERR_EFUSE_FAIL) -#define CPU_BOOT_ERR0_PRI_IMG_VER_FAIL (1 << CPU_BOOT_ERR_PRI_IMG_VER_FAIL) -#define CPU_BOOT_ERR0_SEC_IMG_VER_FAIL (1 << CPU_BOOT_ERR_SEC_IMG_VER_FAIL) -#define CPU_BOOT_ERR0_PLL_FAIL (1 << CPU_BOOT_ERR_PLL_FAIL) -#define CPU_BOOT_ERR0_DEVICE_UNUSABLE_FAIL (1 << CPU_BOOT_ERR_DEVICE_UNUSABLE_FAIL) -#define CPU_BOOT_ERR0_BOOT_FW_CRIT_ERR (1 << CPU_BOOT_ERR_BOOT_FW_CRIT_ERR) -#define CPU_BOOT_ERR0_BINNING_FAIL (1 << CPU_BOOT_ERR_BINNING_FAIL) -#define CPU_BOOT_ERR0_TPM_FAIL (1 << CPU_BOOT_ERR_TPM_FAIL) -#define CPU_BOOT_ERR0_TMP_THRESH_INIT_FAIL (1 << CPU_BOOT_ERR_TMP_THRESH_INIT_FAIL) -#define CPU_BOOT_ERR0_EEPROM_FAIL (1 << CPU_BOOT_ERR_EEPROM_FAIL) -#define CPU_BOOT_ERR0_ENG_ARC_MEM_SCRUB_FAIL (1 << CPU_BOOT_ERR_ENG_ARC_MEM_SCRUB_FAIL) -#define CPU_BOOT_ERR0_ENABLED (1 << CPU_BOOT_ERR_ENABLED) -#define CPU_BOOT_ERR1_ENABLED (1 << CPU_BOOT_ERR_ENABLED) - -enum cpu_boot_dev_sts { - CPU_BOOT_DEV_STS_SECURITY_EN = 0, - CPU_BOOT_DEV_STS_DEBUG_EN = 1, - CPU_BOOT_DEV_STS_WATCHDOG_EN = 2, - CPU_BOOT_DEV_STS_DRAM_INIT_EN = 3, - CPU_BOOT_DEV_STS_BMC_WAIT_EN = 4, - CPU_BOOT_DEV_STS_E2E_CRED_EN = 5, - CPU_BOOT_DEV_STS_HBM_CRED_EN = 6, - CPU_BOOT_DEV_STS_RL_EN = 7, - CPU_BOOT_DEV_STS_SRAM_SCR_EN = 8, - CPU_BOOT_DEV_STS_DRAM_SCR_EN = 9, - CPU_BOOT_DEV_STS_FW_HARD_RST_EN = 10, - CPU_BOOT_DEV_STS_PLL_INFO_EN = 11, - CPU_BOOT_DEV_STS_SP_SRAM_EN = 12, - CPU_BOOT_DEV_STS_CLK_GATE_EN = 13, - CPU_BOOT_DEV_STS_HBM_ECC_EN = 14, - CPU_BOOT_DEV_STS_PKT_PI_ACK_EN = 15, - CPU_BOOT_DEV_STS_FW_LD_COM_EN = 16, - CPU_BOOT_DEV_STS_FW_IATU_CONF_EN = 17, - CPU_BOOT_DEV_STS_FW_NIC_MAC_EN = 18, - CPU_BOOT_DEV_STS_DYN_PLL_EN = 19, - CPU_BOOT_DEV_STS_GIC_PRIVILEGED_EN = 20, - CPU_BOOT_DEV_STS_EQ_INDEX_EN = 21, - CPU_BOOT_DEV_STS_MULTI_IRQ_POLL_EN = 22, - CPU_BOOT_DEV_STS_FW_NIC_STAT_XPCS91_EN = 23, - CPU_BOOT_DEV_STS_FW_NIC_STAT_EXT_EN = 24, - CPU_BOOT_DEV_STS_IS_IDLE_CHECK_EN = 25, - CPU_BOOT_DEV_STS_MAP_HWMON_EN = 26, - CPU_BOOT_DEV_STS_ENABLED = 31, - CPU_BOOT_DEV_STS_SCND_EN = 63, - CPU_BOOT_DEV_STS_LAST = 64 /* we have 2 registers of 32 bits */ -}; - -/* - * BOOT DEVICE STATUS bits in BOOT_DEVICE_STS registers - * - * CPU_BOOT_DEV_STS0_SECURITY_EN Security is Enabled. - * This is an indication for security - * enabled in FW, which means that - * all conditions for security are met: - * device is indicated as security enabled, - * registers are protected, and device - * uses keys for image verification. - * Initialized in: preboot - * - * CPU_BOOT_DEV_STS0_DEBUG_EN Debug is enabled. - * Enabled when JTAG or DEBUG is enabled - * in FW. - * Initialized in: preboot - * - * CPU_BOOT_DEV_STS0_WATCHDOG_EN Watchdog is enabled. - * Watchdog is enabled in FW. - * Initialized in: preboot - * - * CPU_BOOT_DEV_STS0_DRAM_INIT_EN DRAM initialization is enabled. - * DRAM initialization has been done in FW. - * Initialized in: u-boot - * - * CPU_BOOT_DEV_STS0_BMC_WAIT_EN Waiting for BMC data enabled. - * If set, it means that during boot, - * FW waited for BMC data. - * Initialized in: u-boot - * - * CPU_BOOT_DEV_STS0_E2E_CRED_EN E2E credits initialized. - * FW initialized E2E credits. - * Initialized in: u-boot - * - * CPU_BOOT_DEV_STS0_HBM_CRED_EN HBM credits initialized. - * FW initialized HBM credits. - * Initialized in: u-boot - * - * CPU_BOOT_DEV_STS0_RL_EN Rate limiter initialized. - * FW initialized rate limiter. - * Initialized in: u-boot - * - * CPU_BOOT_DEV_STS0_SRAM_SCR_EN SRAM scrambler enabled. - * FW initialized SRAM scrambler. - * Initialized in: linux - * - * CPU_BOOT_DEV_STS0_DRAM_SCR_EN DRAM scrambler enabled. - * FW initialized DRAM scrambler. - * Initialized in: u-boot - * - * CPU_BOOT_DEV_STS0_FW_HARD_RST_EN FW hard reset procedure is enabled. - * FW has the hard reset procedure - * implemented. This means that FW will - * perform hard reset procedure on - * receiving the halt-machine event. - * Initialized in: preboot, u-boot, linux - * - * CPU_BOOT_DEV_STS0_PLL_INFO_EN FW retrieval of PLL info is enabled. - * Initialized in: linux - * - * CPU_BOOT_DEV_STS0_SP_SRAM_EN SP SRAM is initialized and available - * for use. - * Initialized in: preboot - * - * CPU_BOOT_DEV_STS0_CLK_GATE_EN Clock Gating enabled. - * FW initialized Clock Gating. - * Initialized in: preboot - * - * CPU_BOOT_DEV_STS0_HBM_ECC_EN HBM ECC handling Enabled. - * FW handles HBM ECC indications. - * Initialized in: linux - * - * CPU_BOOT_DEV_STS0_PKT_PI_ACK_EN Packets ack value used in the armcpd - * is set to the PI counter. - * Initialized in: linux - * - * CPU_BOOT_DEV_STS0_FW_LD_COM_EN Flexible FW loading communication - * protocol is enabled. - * Initialized in: preboot - * - * CPU_BOOT_DEV_STS0_FW_IATU_CONF_EN FW iATU configuration is enabled. - * This bit if set, means the iATU has been - * configured and is ready for use. - * Initialized in: ppboot - * - * CPU_BOOT_DEV_STS0_FW_NIC_MAC_EN NIC MAC channels init is done by FW and - * any access to them is done via the FW. - * Initialized in: linux - * - * CPU_BOOT_DEV_STS0_DYN_PLL_EN Dynamic PLL configuration is enabled. - * FW sends to host a bitmap of supported - * PLLs. - * Initialized in: linux - * - * CPU_BOOT_DEV_STS0_GIC_PRIVILEGED_EN GIC access permission only from - * previleged entity. FW sets this status - * bit for host. If this bit is set then - * GIC can not be accessed from host. - * Initialized in: linux - * - * CPU_BOOT_DEV_STS0_EQ_INDEX_EN Event Queue (EQ) index is a running - * index for each new event sent to host. - * This is used as a method in host to - * identify that the waiting event in - * queue is actually a new event which - * was not served before. - * Initialized in: linux - * - * CPU_BOOT_DEV_STS0_MULTI_IRQ_POLL_EN Use multiple scratchpad interfaces to - * prevent IRQs overriding each other. - * Initialized in: linux - * - * CPU_BOOT_DEV_STS0_FW_NIC_STAT_XPCS91_EN - * NIC STAT and XPCS91 access is restricted - * and is done via FW only. - * Initialized in: linux - * - * CPU_BOOT_DEV_STS0_FW_NIC_STAT_EXT_EN - * NIC STAT get all is supported. - * Initialized in: linux - * - * CPU_BOOT_DEV_STS0_IS_IDLE_CHECK_EN - * F/W checks if the device is idle by reading defined set - * of registers. It returns a bitmask of all the engines, - * where a bit is set if the engine is not idle. - * Initialized in: linux - * - * CPU_BOOT_DEV_STS0_MAP_HWMON_EN - * If set, means f/w supports proprietary - * HWMON enum mapping to cpucp enums. - * Initialized in: linux - * - * CPU_BOOT_DEV_STS0_ENABLED Device status register enabled. - * This is a main indication that the - * running FW populates the device status - * register. Meaning the device status - * bits are not garbage, but actual - * statuses. - * Initialized in: preboot - * - */ -#define CPU_BOOT_DEV_STS0_SECURITY_EN (1 << CPU_BOOT_DEV_STS_SECURITY_EN) -#define CPU_BOOT_DEV_STS0_DEBUG_EN (1 << CPU_BOOT_DEV_STS_DEBUG_EN) -#define CPU_BOOT_DEV_STS0_WATCHDOG_EN (1 << CPU_BOOT_DEV_STS_WATCHDOG_EN) -#define CPU_BOOT_DEV_STS0_DRAM_INIT_EN (1 << CPU_BOOT_DEV_STS_DRAM_INIT_EN) -#define CPU_BOOT_DEV_STS0_BMC_WAIT_EN (1 << CPU_BOOT_DEV_STS_BMC_WAIT_EN) -#define CPU_BOOT_DEV_STS0_E2E_CRED_EN (1 << CPU_BOOT_DEV_STS_E2E_CRED_EN) -#define CPU_BOOT_DEV_STS0_HBM_CRED_EN (1 << CPU_BOOT_DEV_STS_HBM_CRED_EN) -#define CPU_BOOT_DEV_STS0_RL_EN (1 << CPU_BOOT_DEV_STS_RL_EN) -#define CPU_BOOT_DEV_STS0_SRAM_SCR_EN (1 << CPU_BOOT_DEV_STS_SRAM_SCR_EN) -#define CPU_BOOT_DEV_STS0_DRAM_SCR_EN (1 << CPU_BOOT_DEV_STS_DRAM_SCR_EN) -#define CPU_BOOT_DEV_STS0_FW_HARD_RST_EN (1 << CPU_BOOT_DEV_STS_FW_HARD_RST_EN) -#define CPU_BOOT_DEV_STS0_PLL_INFO_EN (1 << CPU_BOOT_DEV_STS_PLL_INFO_EN) -#define CPU_BOOT_DEV_STS0_SP_SRAM_EN (1 << CPU_BOOT_DEV_STS_SP_SRAM_EN) -#define CPU_BOOT_DEV_STS0_CLK_GATE_EN (1 << CPU_BOOT_DEV_STS_CLK_GATE_EN) -#define CPU_BOOT_DEV_STS0_HBM_ECC_EN (1 << CPU_BOOT_DEV_STS_HBM_ECC_EN) -#define CPU_BOOT_DEV_STS0_PKT_PI_ACK_EN (1 << CPU_BOOT_DEV_STS_PKT_PI_ACK_EN) -#define CPU_BOOT_DEV_STS0_FW_LD_COM_EN (1 << CPU_BOOT_DEV_STS_FW_LD_COM_EN) -#define CPU_BOOT_DEV_STS0_FW_IATU_CONF_EN (1 << CPU_BOOT_DEV_STS_FW_IATU_CONF_EN) -#define CPU_BOOT_DEV_STS0_FW_NIC_MAC_EN (1 << CPU_BOOT_DEV_STS_FW_NIC_MAC_EN) -#define CPU_BOOT_DEV_STS0_DYN_PLL_EN (1 << CPU_BOOT_DEV_STS_DYN_PLL_EN) -#define CPU_BOOT_DEV_STS0_GIC_PRIVILEGED_EN (1 << CPU_BOOT_DEV_STS_GIC_PRIVILEGED_EN) -#define CPU_BOOT_DEV_STS0_EQ_INDEX_EN (1 << CPU_BOOT_DEV_STS_EQ_INDEX_EN) -#define CPU_BOOT_DEV_STS0_MULTI_IRQ_POLL_EN (1 << CPU_BOOT_DEV_STS_MULTI_IRQ_POLL_EN) -#define CPU_BOOT_DEV_STS0_FW_NIC_STAT_XPCS91_EN (1 << CPU_BOOT_DEV_STS_FW_NIC_STAT_XPCS91_EN) -#define CPU_BOOT_DEV_STS0_FW_NIC_STAT_EXT_EN (1 << CPU_BOOT_DEV_STS_FW_NIC_STAT_EXT_EN) -#define CPU_BOOT_DEV_STS0_IS_IDLE_CHECK_EN (1 << CPU_BOOT_DEV_STS_IS_IDLE_CHECK_EN) -#define CPU_BOOT_DEV_STS0_MAP_HWMON_EN (1 << CPU_BOOT_DEV_STS_MAP_HWMON_EN) -#define CPU_BOOT_DEV_STS0_ENABLED (1 << CPU_BOOT_DEV_STS_ENABLED) -#define CPU_BOOT_DEV_STS1_ENABLED (1 << CPU_BOOT_DEV_STS_ENABLED) - -enum cpu_boot_status { - CPU_BOOT_STATUS_NA = 0, /* Default value after reset of chip */ - CPU_BOOT_STATUS_IN_WFE = 1, - CPU_BOOT_STATUS_DRAM_RDY = 2, - CPU_BOOT_STATUS_SRAM_AVAIL = 3, - CPU_BOOT_STATUS_IN_BTL = 4, /* BTL is H/W FSM */ - CPU_BOOT_STATUS_IN_PREBOOT = 5, - CPU_BOOT_STATUS_IN_SPL, /* deprecated - not reported */ - CPU_BOOT_STATUS_IN_UBOOT = 7, - CPU_BOOT_STATUS_DRAM_INIT_FAIL, /* deprecated - will be removed */ - CPU_BOOT_STATUS_FIT_CORRUPTED, /* deprecated - will be removed */ - /* U-Boot console prompt activated, commands are not processed */ - CPU_BOOT_STATUS_UBOOT_NOT_READY = 10, - /* Finished NICs init, reported after DRAM and NICs */ - CPU_BOOT_STATUS_NIC_FW_RDY = 11, - CPU_BOOT_STATUS_TS_INIT_FAIL, /* deprecated - will be removed */ - CPU_BOOT_STATUS_DRAM_SKIPPED, /* deprecated - will be removed */ - CPU_BOOT_STATUS_BMC_WAITING_SKIPPED, /* deprecated - will be removed */ - /* Last boot loader progress status, ready to receive commands */ - CPU_BOOT_STATUS_READY_TO_BOOT = 15, - /* Internal Boot finished, ready for boot-fit */ - CPU_BOOT_STATUS_WAITING_FOR_BOOT_FIT = 16, - /* Internal Security has been initialized, device can be accessed */ - CPU_BOOT_STATUS_SECURITY_READY = 17, -}; - -enum kmd_msg { - KMD_MSG_NA = 0, - KMD_MSG_GOTO_WFE, - KMD_MSG_FIT_RDY, - KMD_MSG_SKIP_BMC, - RESERVED, - KMD_MSG_RST_DEV, - KMD_MSG_LAST -}; - -enum cpu_msg_status { - CPU_MSG_CLR = 0, - CPU_MSG_OK, - CPU_MSG_ERR, -}; - -/* communication registers mapping - consider ABI when changing */ -struct cpu_dyn_regs { - __le32 cpu_pq_base_addr_low; - __le32 cpu_pq_base_addr_high; - __le32 cpu_pq_length; - __le32 cpu_pq_init_status; - __le32 cpu_eq_base_addr_low; - __le32 cpu_eq_base_addr_high; - __le32 cpu_eq_length; - __le32 cpu_eq_ci; - __le32 cpu_cq_base_addr_low; - __le32 cpu_cq_base_addr_high; - __le32 cpu_cq_length; - __le32 cpu_pf_pq_pi; - __le32 cpu_boot_dev_sts0; - __le32 cpu_boot_dev_sts1; - __le32 cpu_boot_err0; - __le32 cpu_boot_err1; - __le32 cpu_boot_status; - __le32 fw_upd_sts; - __le32 fw_upd_cmd; - __le32 fw_upd_pending_sts; - __le32 fuse_ver_offset; - __le32 preboot_ver_offset; - __le32 uboot_ver_offset; - __le32 hw_state; - __le32 kmd_msg_to_cpu; - __le32 cpu_cmd_status_to_host; - __le32 gic_host_pi_upd_irq; - __le32 gic_tpc_qm_irq_ctrl; - __le32 gic_mme_qm_irq_ctrl; - __le32 gic_dma_qm_irq_ctrl; - __le32 gic_nic_qm_irq_ctrl; - __le32 gic_dma_core_irq_ctrl; - __le32 gic_host_halt_irq; - __le32 gic_host_ints_irq; - __le32 gic_host_soft_rst_irq; - __le32 gic_rot_qm_irq_ctrl; - __le32 cpu_rst_status; - __le32 eng_arc_irq_ctrl; - __le32 reserved1[20]; /* reserve for future use */ -}; - -/* TODO: remove the desc magic after the code is updated to use message */ -/* HCDM - Habana Communications Descriptor Magic */ -#define HL_COMMS_DESC_MAGIC 0x4843444D -#define HL_COMMS_DESC_VER 3 - -/* HCMv - Habana Communications Message + header version */ -#define HL_COMMS_MSG_MAGIC_VALUE 0x48434D00 -#define HL_COMMS_MSG_MAGIC_MASK 0xFFFFFF00 -#define HL_COMMS_MSG_MAGIC_VER_MASK 0xFF - -#define HL_COMMS_MSG_MAGIC_VER(ver) (HL_COMMS_MSG_MAGIC_VALUE | \ - ((ver) & HL_COMMS_MSG_MAGIC_VER_MASK)) -#define HL_COMMS_MSG_MAGIC_V0 HL_COMMS_DESC_MAGIC -#define HL_COMMS_MSG_MAGIC_V1 HL_COMMS_MSG_MAGIC_VER(1) -#define HL_COMMS_MSG_MAGIC_V2 HL_COMMS_MSG_MAGIC_VER(2) -#define HL_COMMS_MSG_MAGIC_V3 HL_COMMS_MSG_MAGIC_VER(3) - -#define HL_COMMS_MSG_MAGIC HL_COMMS_MSG_MAGIC_V3 - -#define HL_COMMS_MSG_MAGIC_VALIDATE_MAGIC(magic) \ - (((magic) & HL_COMMS_MSG_MAGIC_MASK) == \ - HL_COMMS_MSG_MAGIC_VALUE) - -#define HL_COMMS_MSG_MAGIC_VALIDATE_VERSION(magic, ver) \ - (((magic) & HL_COMMS_MSG_MAGIC_VER_MASK) >= \ - ((ver) & HL_COMMS_MSG_MAGIC_VER_MASK)) - -#define HL_COMMS_MSG_MAGIC_VALIDATE(magic, ver) \ - (HL_COMMS_MSG_MAGIC_VALIDATE_MAGIC((magic)) && \ - HL_COMMS_MSG_MAGIC_VALIDATE_VERSION((magic), (ver))) - -enum comms_msg_type { - HL_COMMS_DESC_TYPE = 0, - HL_COMMS_RESET_CAUSE_TYPE = 1, - HL_COMMS_FW_CFG_SKIP_TYPE = 2, - HL_COMMS_BINNING_CONF_TYPE = 3, -}; - -/* - * Binning information shared between LKD and FW - * @tpc_mask_l - TPC binning information lower 64 bit - * @dec_mask - Decoder binning information - * @dram_mask - DRAM binning information - * @edma_mask - EDMA binning information - * @mme_mask_l - MME binning information lower 32 - * @mme_mask_h - MME binning information upper 32 - * @rot_mask - Rotator binning information - * @xbar_mask - xBAR binning information - * @reserved - reserved field for future binning info w/o ABI change - * @tpc_mask_h - TPC binning information upper 64 bit - * @nic_mask - NIC binning information - */ -struct lkd_fw_binning_info { - __le64 tpc_mask_l; - __le32 dec_mask; - __le32 dram_mask; - __le32 edma_mask; - __le32 mme_mask_l; - __le32 mme_mask_h; - __le32 rot_mask; - __le32 xbar_mask; - __le32 reserved0; - __le64 tpc_mask_h; - __le64 nic_mask; - __le32 reserved1[8]; -}; - -/* TODO: remove this struct after the code is updated to use message */ -/* this is the comms descriptor header - meta data */ -struct comms_desc_header { - __le32 magic; /* magic for validation */ - __le32 crc32; /* CRC32 of the descriptor w/o header */ - __le16 size; /* size of the descriptor w/o header */ - __u8 version; /* descriptor version */ - __u8 reserved[5]; /* pad to 64 bit */ -}; - -/* this is the comms message header - meta data */ -struct comms_msg_header { - __le32 magic; /* magic for validation */ - __le32 crc32; /* CRC32 of the message w/o header */ - __le16 size; /* size of the message w/o header */ - __u8 version; /* message payload version */ - __u8 type; /* message type */ - __u8 reserved[4]; /* pad to 64 bit */ -}; - -enum lkd_fw_ascii_msg_lvls { - LKD_FW_ASCII_MSG_ERR = 0, - LKD_FW_ASCII_MSG_WRN = 1, - LKD_FW_ASCII_MSG_INF = 2, - LKD_FW_ASCII_MSG_DBG = 3, -}; - -#define LKD_FW_ASCII_MSG_MAX_LEN 128 -#define LKD_FW_ASCII_MSG_MAX 4 /* consider ABI when changing */ - -struct lkd_fw_ascii_msg { - __u8 valid; - __u8 msg_lvl; - __u8 reserved[6]; - char msg[LKD_FW_ASCII_MSG_MAX_LEN]; -}; - -/* this is the main FW descriptor - consider ABI when changing */ -struct lkd_fw_comms_desc { - struct comms_desc_header header; - struct cpu_dyn_regs cpu_dyn_regs; - char fuse_ver[VERSION_MAX_LEN]; - char cur_fw_ver[VERSION_MAX_LEN]; - /* can be used for 1 more version w/o ABI change */ - char reserved0[VERSION_MAX_LEN]; - __le64 img_addr; /* address for next FW component load */ - struct lkd_fw_binning_info binning_info; - struct lkd_fw_ascii_msg ascii_msg[LKD_FW_ASCII_MSG_MAX]; - __le32 rsvd_mem_size_mb; /* reserved memory size [MB] for FW/SVE */ - char reserved1[4]; -}; - -enum comms_reset_cause { - HL_RESET_CAUSE_UNKNOWN = 0, - HL_RESET_CAUSE_HEARTBEAT = 1, - HL_RESET_CAUSE_TDR = 2, -}; - -/* TODO: remove define after struct name is aligned on all projects */ -#define lkd_msg_comms lkd_fw_comms_msg - -/* this is the comms message descriptor */ -struct lkd_fw_comms_msg { - struct comms_msg_header header; - /* union for future expantions of new messages */ - union { - struct { - struct cpu_dyn_regs cpu_dyn_regs; - char fuse_ver[VERSION_MAX_LEN]; - char cur_fw_ver[VERSION_MAX_LEN]; - /* can be used for 1 more version w/o ABI change */ - char reserved0[VERSION_MAX_LEN]; - /* address for next FW component load */ - __le64 img_addr; - struct lkd_fw_binning_info binning_info; - struct lkd_fw_ascii_msg ascii_msg[LKD_FW_ASCII_MSG_MAX]; - /* reserved memory size [MB] for FW/SVE */ - __le32 rsvd_mem_size_mb; - char reserved1[4]; - }; - struct { - __u8 reset_cause; - }; - struct { - __u8 fw_cfg_skip; /* 1 - skip, 0 - don't skip */ - }; - struct lkd_fw_binning_info binning_conf; - }; -}; - -/* - * LKD commands: - * - * COMMS_NOOP Used to clear the command register and no actual - * command is send. - * - * COMMS_CLR_STS Clear status command - FW should clear the - * status register. Used for synchronization - * between the commands as part of the race free - * protocol. - * - * COMMS_RST_STATE Reset the current communication state which is - * kept by FW for proper responses. - * Should be used in the beginning of the - * communication cycle to clean any leftovers from - * previous communication attempts. - * - * COMMS_PREP_DESC Prepare descriptor for setting up the - * communication and other dynamic data: - * struct lkd_fw_comms_desc. - * This command has a parameter stating the next FW - * component size, so the FW can actually prepare a - * space for it and in the status response provide - * the descriptor offset. The Offset of the next FW - * data component is a part of the descriptor - * structure. - * - * COMMS_DATA_RDY The FW data has been uploaded and is ready for - * validation. - * - * COMMS_EXEC Execute the next FW component. - * - * COMMS_RST_DEV Reset the device. - * - * COMMS_GOTO_WFE Execute WFE command. Allowed only on non-secure - * devices. - * - * COMMS_SKIP_BMC Perform actions required for BMC-less servers. - * Do not wait for BMC response. - * - * COMMS_PREP_DESC_ELBI Same as COMMS_PREP_DESC only that the memory - * space is allocated in a ELBI access only - * address range. - * - */ -enum comms_cmd { - COMMS_NOOP = 0, - COMMS_CLR_STS = 1, - COMMS_RST_STATE = 2, - COMMS_PREP_DESC = 3, - COMMS_DATA_RDY = 4, - COMMS_EXEC = 5, - COMMS_RST_DEV = 6, - COMMS_GOTO_WFE = 7, - COMMS_SKIP_BMC = 8, - COMMS_PREP_DESC_ELBI = 10, - COMMS_INVLD_LAST -}; - -#define COMMS_COMMAND_SIZE_SHIFT 0 -#define COMMS_COMMAND_SIZE_MASK 0x1FFFFFF -#define COMMS_COMMAND_CMD_SHIFT 27 -#define COMMS_COMMAND_CMD_MASK 0xF8000000 - -/* - * LKD command to FW register structure - * @size - FW component size - * @cmd - command from enum comms_cmd - */ -struct comms_command { - union { /* bit fields are only for FW use */ - struct { - u32 size :25; /* 32MB max. */ - u32 reserved :2; - enum comms_cmd cmd :5; /* 32 commands */ - }; - __le32 val; - }; -}; - -/* - * FW status - * - * COMMS_STS_NOOP Used to clear the status register and no actual - * status is provided. - * - * COMMS_STS_ACK Command has been received and recognized. - * - * COMMS_STS_OK Command execution has finished successfully. - * - * COMMS_STS_ERR Command execution was unsuccessful and resulted - * in error. - * - * COMMS_STS_VALID_ERR FW validation has failed. - * - * COMMS_STS_TIMEOUT_ERR Command execution has timed out. - */ -enum comms_sts { - COMMS_STS_NOOP = 0, - COMMS_STS_ACK = 1, - COMMS_STS_OK = 2, - COMMS_STS_ERR = 3, - COMMS_STS_VALID_ERR = 4, - COMMS_STS_TIMEOUT_ERR = 5, - COMMS_STS_INVLD_LAST -}; - -/* RAM types for FW components loading - defines the base address */ -enum comms_ram_types { - COMMS_SRAM = 0, - COMMS_DRAM = 1, -}; - -#define COMMS_STATUS_OFFSET_SHIFT 0 -#define COMMS_STATUS_OFFSET_MASK 0x03FFFFFF -#define COMMS_STATUS_OFFSET_ALIGN_SHIFT 2 -#define COMMS_STATUS_RAM_TYPE_SHIFT 26 -#define COMMS_STATUS_RAM_TYPE_MASK 0x0C000000 -#define COMMS_STATUS_STATUS_SHIFT 28 -#define COMMS_STATUS_STATUS_MASK 0xF0000000 - -/* - * FW status to LKD register structure - * @offset - an offset from the base of the ram_type shifted right by - * 2 bits (always aligned to 32 bits). - * Allows a maximum addressable offset of 256MB from RAM base. - * Example: for real offset in RAM of 0x800000 (8MB), the value - * in offset field is (0x800000 >> 2) = 0x200000. - * @ram_type - the RAM type that should be used for offset from - * enum comms_ram_types - * @status - status from enum comms_sts - */ -struct comms_status { - union { /* bit fields are only for FW use */ - struct { - u32 offset :26; - enum comms_ram_types ram_type :2; - enum comms_sts status :4; /* 16 statuses */ - }; - __le32 val; - }; -}; - -#define NAME_MAX_LEN 32 /* bytes */ -struct hl_module_data { - __u8 name[NAME_MAX_LEN]; - __u8 version[VERSION_MAX_LEN]; -}; - -/** - * struct hl_component_versions - versions associated with hl component. - * @struct_size: size of all the struct (including dynamic size of modules). - * @modules_offset: offset of the modules field in this struct. - * @component: version of the component itself. - * @fw_os: Firmware OS Version. - * @comp_name: Name of the component. - * @modules_counter: number of set bits in modules_mask. - * @reserved: reserved for future use. - * @modules: versions of the component's modules. Elborated explanation in - * struct cpucp_versions. - */ -struct hl_component_versions { - __le16 struct_size; - __le16 modules_offset; - __u8 component[VERSION_MAX_LEN]; - __u8 fw_os[VERSION_MAX_LEN]; - __u8 comp_name[NAME_MAX_LEN]; - __u8 modules_counter; - __u8 reserved[3]; - struct hl_module_data modules[]; -}; - -/* Max size of fit size */ -#define HL_FW_VERSIONS_FIT_SIZE 4096 - -#endif /* HL_BOOT_IF_H */ diff --git a/include/linux/habanalabs/cpucp_if.h b/include/linux/habanalabs/cpucp_if.h new file mode 100644 index 000000000000..4cdedb603ecb --- /dev/null +++ b/include/linux/habanalabs/cpucp_if.h @@ -0,0 +1,1407 @@ +/* SPDX-License-Identifier: GPL-2.0 + * + * Copyright 2020-2022 HabanaLabs, Ltd. + * All Rights Reserved. + * + */ + +#ifndef CPUCP_IF_H +#define CPUCP_IF_H + +#include +#include + +#include "hl_boot_if.h" + +#define NUM_HBM_PSEUDO_CH 2 +#define NUM_HBM_CH_PER_DEV 8 +#define CPUCP_PKT_HBM_ECC_INFO_WR_PAR_SHIFT 0 +#define CPUCP_PKT_HBM_ECC_INFO_WR_PAR_MASK 0x00000001 +#define CPUCP_PKT_HBM_ECC_INFO_RD_PAR_SHIFT 1 +#define CPUCP_PKT_HBM_ECC_INFO_RD_PAR_MASK 0x00000002 +#define CPUCP_PKT_HBM_ECC_INFO_CA_PAR_SHIFT 2 +#define CPUCP_PKT_HBM_ECC_INFO_CA_PAR_MASK 0x00000004 +#define CPUCP_PKT_HBM_ECC_INFO_DERR_SHIFT 3 +#define CPUCP_PKT_HBM_ECC_INFO_DERR_MASK 0x00000008 +#define CPUCP_PKT_HBM_ECC_INFO_SERR_SHIFT 4 +#define CPUCP_PKT_HBM_ECC_INFO_SERR_MASK 0x00000010 +#define CPUCP_PKT_HBM_ECC_INFO_TYPE_SHIFT 5 +#define CPUCP_PKT_HBM_ECC_INFO_TYPE_MASK 0x00000020 +#define CPUCP_PKT_HBM_ECC_INFO_HBM_CH_SHIFT 6 +#define CPUCP_PKT_HBM_ECC_INFO_HBM_CH_MASK 0x000007C0 + +#define PLL_MAP_MAX_BITS 128 +#define PLL_MAP_LEN (PLL_MAP_MAX_BITS / 8) + +/* + * info of the pkt queue pointers in the first async occurrence + */ +struct cpucp_pkt_sync_err { + __le32 pi; + __le32 ci; +}; + +struct hl_eq_hbm_ecc_data { + /* SERR counter */ + __le32 sec_cnt; + /* DERR counter */ + __le32 dec_cnt; + /* Supplemental Information according to the mask bits */ + __le32 hbm_ecc_info; + /* Address in hbm where the ecc happened */ + __le32 first_addr; + /* SERR continuous address counter */ + __le32 sec_cont_cnt; + __le32 pad; +}; + +/* + * EVENT QUEUE + */ + +struct hl_eq_header { + __le32 reserved; + __le32 ctl; +}; + +struct hl_eq_ecc_data { + __le64 ecc_address; + __le64 ecc_syndrom; + __u8 memory_wrapper_idx; + __u8 is_critical; + __le16 block_id; + __u8 pad[4]; +}; + +enum hl_sm_sei_cause { + SM_SEI_SO_OVERFLOW, + SM_SEI_LBW_4B_UNALIGNED, + SM_SEI_AXI_RESPONSE_ERR +}; + +struct hl_eq_sm_sei_data { + __le32 sei_log; + /* enum hl_sm_sei_cause */ + __u8 sei_cause; + __u8 pad[3]; +}; + +enum hl_fw_alive_severity { + FW_ALIVE_SEVERITY_MINOR, + FW_ALIVE_SEVERITY_CRITICAL +}; + +struct hl_eq_fw_alive { + __le64 uptime_seconds; + __le32 process_id; + __le32 thread_id; + /* enum hl_fw_alive_severity */ + __u8 severity; + __u8 pad[7]; +}; + +struct hl_eq_intr_cause { + __le64 intr_cause_data; +}; + +struct hl_eq_pcie_drain_ind_data { + struct hl_eq_intr_cause intr_cause; + __le64 drain_wr_addr_lbw; + __le64 drain_rd_addr_lbw; + __le64 drain_wr_addr_hbw; + __le64 drain_rd_addr_hbw; +}; + +struct hl_eq_razwi_lbw_info_regs { + __le32 rr_aw_razwi_reg; + __le32 rr_aw_razwi_id_reg; + __le32 rr_ar_razwi_reg; + __le32 rr_ar_razwi_id_reg; +}; + +struct hl_eq_razwi_hbw_info_regs { + __le32 rr_aw_razwi_hi_reg; + __le32 rr_aw_razwi_lo_reg; + __le32 rr_aw_razwi_id_reg; + __le32 rr_ar_razwi_hi_reg; + __le32 rr_ar_razwi_lo_reg; + __le32 rr_ar_razwi_id_reg; +}; + +/* razwi_happened masks */ +#define RAZWI_HAPPENED_HBW 0x1 +#define RAZWI_HAPPENED_LBW 0x2 +#define RAZWI_HAPPENED_AW 0x4 +#define RAZWI_HAPPENED_AR 0x8 + +struct hl_eq_razwi_info { + __le32 razwi_happened_mask; + union { + struct hl_eq_razwi_lbw_info_regs lbw; + struct hl_eq_razwi_hbw_info_regs hbw; + }; + __le32 pad; +}; + +struct hl_eq_razwi_with_intr_cause { + struct hl_eq_razwi_info razwi_info; + struct hl_eq_intr_cause intr_cause; +}; + +#define HBM_CA_ERR_CMD_LIFO_LEN 8 +#define HBM_RD_ERR_DATA_LIFO_LEN 8 +#define HBM_WR_PAR_CMD_LIFO_LEN 11 + +enum hl_hbm_sei_cause { + /* Command/address parity error event is split into 2 events due to + * size limitation: ODD suffix for odd HBM CK_t cycles and EVEN suffix + * for even HBM CK_t cycles + */ + HBM_SEI_CMD_PARITY_EVEN, + HBM_SEI_CMD_PARITY_ODD, + /* Read errors can be reflected as a combination of SERR/DERR/parity + * errors. Therefore, we define one event for all read error types. + * LKD will perform further proccessing. + */ + HBM_SEI_READ_ERR, + HBM_SEI_WRITE_DATA_PARITY_ERR, + HBM_SEI_CATTRIP, + HBM_SEI_MEM_BIST_FAIL, + HBM_SEI_DFI, + HBM_SEI_INV_TEMP_READ_OUT, + HBM_SEI_BIST_FAIL, +}; + +/* Masks for parsing hl_hbm_sei_headr fields */ +#define HBM_ECC_SERR_CNTR_MASK 0xFF +#define HBM_ECC_DERR_CNTR_MASK 0xFF00 +#define HBM_RD_PARITY_CNTR_MASK 0xFF0000 + +/* HBM index and MC index are known by the event_id */ +struct hl_hbm_sei_header { + union { + /* relevant only in case of HBM read error */ + struct { + __u8 ecc_serr_cnt; + __u8 ecc_derr_cnt; + __u8 read_par_cnt; + __u8 reserved; + }; + /* All other cases */ + __le32 cnt; + }; + __u8 sei_cause; /* enum hl_hbm_sei_cause */ + __u8 mc_channel; /* range: 0-3 */ + __u8 mc_pseudo_channel; /* range: 0-7 */ + __u8 is_critical; +}; + +#define HBM_RD_ADDR_SID_SHIFT 0 +#define HBM_RD_ADDR_SID_MASK 0x1 +#define HBM_RD_ADDR_BG_SHIFT 1 +#define HBM_RD_ADDR_BG_MASK 0x6 +#define HBM_RD_ADDR_BA_SHIFT 3 +#define HBM_RD_ADDR_BA_MASK 0x18 +#define HBM_RD_ADDR_COL_SHIFT 5 +#define HBM_RD_ADDR_COL_MASK 0x7E0 +#define HBM_RD_ADDR_ROW_SHIFT 11 +#define HBM_RD_ADDR_ROW_MASK 0x3FFF800 + +struct hbm_rd_addr { + union { + /* bit fields are only for FW use */ + struct { + u32 dbg_rd_err_addr_sid:1; + u32 dbg_rd_err_addr_bg:2; + u32 dbg_rd_err_addr_ba:2; + u32 dbg_rd_err_addr_col:6; + u32 dbg_rd_err_addr_row:15; + u32 reserved:6; + }; + __le32 rd_addr_val; + }; +}; + +#define HBM_RD_ERR_BEAT_SHIFT 2 +/* dbg_rd_err_misc fields: */ +/* Read parity is calculated per DW on every beat */ +#define HBM_RD_ERR_PAR_ERR_BEAT0_SHIFT 0 +#define HBM_RD_ERR_PAR_ERR_BEAT0_MASK 0x3 +#define HBM_RD_ERR_PAR_DATA_BEAT0_SHIFT 8 +#define HBM_RD_ERR_PAR_DATA_BEAT0_MASK 0x300 +/* ECC is calculated per PC on every beat */ +#define HBM_RD_ERR_SERR_BEAT0_SHIFT 16 +#define HBM_RD_ERR_SERR_BEAT0_MASK 0x10000 +#define HBM_RD_ERR_DERR_BEAT0_SHIFT 24 +#define HBM_RD_ERR_DERR_BEAT0_MASK 0x100000 + +struct hl_eq_hbm_sei_read_err_intr_info { + /* DFI_RD_ERR_REP_ADDR */ + struct hbm_rd_addr dbg_rd_err_addr; + /* DFI_RD_ERR_REP_ERR */ + union { + struct { + /* bit fields are only for FW use */ + u32 dbg_rd_err_par:8; + u32 dbg_rd_err_par_data:8; + u32 dbg_rd_err_serr:4; + u32 dbg_rd_err_derr:4; + u32 reserved:8; + }; + __le32 dbg_rd_err_misc; + }; + /* DFI_RD_ERR_REP_DM */ + __le32 dbg_rd_err_dm; + /* DFI_RD_ERR_REP_SYNDROME */ + __le32 dbg_rd_err_syndrome; + /* DFI_RD_ERR_REP_DATA */ + __le32 dbg_rd_err_data[HBM_RD_ERR_DATA_LIFO_LEN]; +}; + +struct hl_eq_hbm_sei_ca_par_intr_info { + /* 14 LSBs */ + __le16 dbg_row[HBM_CA_ERR_CMD_LIFO_LEN]; + /* 18 LSBs */ + __le32 dbg_col[HBM_CA_ERR_CMD_LIFO_LEN]; +}; + +#define WR_PAR_LAST_CMD_COL_SHIFT 0 +#define WR_PAR_LAST_CMD_COL_MASK 0x3F +#define WR_PAR_LAST_CMD_BG_SHIFT 6 +#define WR_PAR_LAST_CMD_BG_MASK 0xC0 +#define WR_PAR_LAST_CMD_BA_SHIFT 8 +#define WR_PAR_LAST_CMD_BA_MASK 0x300 +#define WR_PAR_LAST_CMD_SID_SHIFT 10 +#define WR_PAR_LAST_CMD_SID_MASK 0x400 + +/* Row address isn't latched */ +struct hbm_sei_wr_cmd_address { + /* DFI_DERR_LAST_CMD */ + union { + struct { + /* bit fields are only for FW use */ + u32 col:6; + u32 bg:2; + u32 ba:2; + u32 sid:1; + u32 reserved:21; + }; + __le32 dbg_wr_cmd_addr; + }; +}; + +struct hl_eq_hbm_sei_wr_par_intr_info { + /* entry 0: WR command address from the 1st cycle prior to the error + * entry 1: WR command address from the 2nd cycle prior to the error + * and so on... + */ + struct hbm_sei_wr_cmd_address dbg_last_wr_cmds[HBM_WR_PAR_CMD_LIFO_LEN]; + /* derr[0:1] - 1st HBM cycle DERR output + * derr[2:3] - 2nd HBM cycle DERR output + */ + __u8 dbg_derr; + /* extend to reach 8B */ + __u8 pad[3]; +}; + +/* + * this struct represents the following sei causes: + * command parity, ECC double error, ECC single error, dfi error, cattrip, + * temperature read-out, read parity error and write parity error. + * some only use the header while some have extra data. + */ +struct hl_eq_hbm_sei_data { + struct hl_hbm_sei_header hdr; + union { + struct hl_eq_hbm_sei_ca_par_intr_info ca_parity_even_info; + struct hl_eq_hbm_sei_ca_par_intr_info ca_parity_odd_info; + struct hl_eq_hbm_sei_read_err_intr_info read_err_info; + struct hl_eq_hbm_sei_wr_par_intr_info wr_parity_info; + }; +}; + +/* Engine/farm arc interrupt type */ +enum hl_engine_arc_interrupt_type { + /* Qman/farm ARC DCCM QUEUE FULL interrupt type */ + ENGINE_ARC_DCCM_QUEUE_FULL_IRQ = 1 +}; + +/* Data structure specifies details of payload of DCCM QUEUE FULL interrupt */ +struct hl_engine_arc_dccm_queue_full_irq { + /* Queue index value which caused DCCM QUEUE FULL */ + __le32 queue_index; + __le32 pad; +}; + +/* Data structure specifies details of QM/FARM ARC interrupt */ +struct hl_eq_engine_arc_intr_data { + /* ARC engine id e.g. DCORE0_TPC0_QM_ARC, DCORE0_TCP1_QM_ARC */ + __le32 engine_id; + __le32 intr_type; /* enum hl_engine_arc_interrupt_type */ + /* More info related to the interrupt e.g. queue index + * incase of DCCM_QUEUE_FULL interrupt. + */ + __le64 payload; + __le64 pad[5]; +}; + +#define ADDR_DEC_ADDRESS_COUNT_MAX 4 + +/* Data structure specifies details of ADDR_DEC interrupt */ +struct hl_eq_addr_dec_intr_data { + struct hl_eq_intr_cause intr_cause; + __le64 addr[ADDR_DEC_ADDRESS_COUNT_MAX]; + __u8 addr_cnt; + __u8 pad[7]; +}; + +struct hl_eq_entry { + struct hl_eq_header hdr; + union { + __le64 data_placeholder; + struct hl_eq_ecc_data ecc_data; + struct hl_eq_hbm_ecc_data hbm_ecc_data; /* Obsolete */ + struct hl_eq_sm_sei_data sm_sei_data; + struct cpucp_pkt_sync_err pkt_sync_err; + struct hl_eq_fw_alive fw_alive; + struct hl_eq_intr_cause intr_cause; + struct hl_eq_pcie_drain_ind_data pcie_drain_ind_data; + struct hl_eq_razwi_info razwi_info; + struct hl_eq_razwi_with_intr_cause razwi_with_intr_cause; + struct hl_eq_hbm_sei_data sei_data; /* Gaudi2 HBM */ + struct hl_eq_engine_arc_intr_data arc_data; + struct hl_eq_addr_dec_intr_data addr_dec; + __le64 data[7]; + }; +}; + +#define HL_EQ_ENTRY_SIZE sizeof(struct hl_eq_entry) + +#define EQ_CTL_READY_SHIFT 31 +#define EQ_CTL_READY_MASK 0x80000000 + +#define EQ_CTL_EVENT_TYPE_SHIFT 16 +#define EQ_CTL_EVENT_TYPE_MASK 0x0FFF0000 + +#define EQ_CTL_INDEX_SHIFT 0 +#define EQ_CTL_INDEX_MASK 0x0000FFFF + +enum pq_init_status { + PQ_INIT_STATUS_NA = 0, + PQ_INIT_STATUS_READY_FOR_CP, + PQ_INIT_STATUS_READY_FOR_HOST, + PQ_INIT_STATUS_READY_FOR_CP_SINGLE_MSI, + PQ_INIT_STATUS_LEN_NOT_POWER_OF_TWO_ERR, + PQ_INIT_STATUS_ILLEGAL_Q_ADDR_ERR +}; + +/* + * CpuCP Primary Queue Packets + * + * During normal operation, the host's kernel driver needs to send various + * messages to CpuCP, usually either to SET some value into a H/W periphery or + * to GET the current value of some H/W periphery. For example, SET the + * frequency of MME/TPC and GET the value of the thermal sensor. + * + * These messages can be initiated either by the User application or by the + * host's driver itself, e.g. power management code. In either case, the + * communication from the host's driver to CpuCP will *always* be in + * synchronous mode, meaning that the host will send a single message and poll + * until the message was acknowledged and the results are ready (if results are + * needed). + * + * This means that only a single message can be sent at a time and the host's + * driver must wait for its result before sending the next message. Having said + * that, because these are control messages which are sent in a relatively low + * frequency, this limitation seems acceptable. It's important to note that + * in case of multiple devices, messages to different devices *can* be sent + * at the same time. + * + * The message, inputs/outputs (if relevant) and fence object will be located + * on the device DDR at an address that will be determined by the host's driver. + * During device initialization phase, the host will pass to CpuCP that address. + * Most of the message types will contain inputs/outputs inside the message + * itself. The common part of each message will contain the opcode of the + * message (its type) and a field representing a fence object. + * + * When the host's driver wishes to send a message to CPU CP, it will write the + * message contents to the device DDR, clear the fence object and then write to + * the PSOC_ARC1_AUX_SW_INTR, to issue interrupt 121 to ARC Management CPU. + * + * Upon receiving the interrupt (#121), CpuCP will read the message from the + * DDR. In case the message is a SET operation, CpuCP will first perform the + * operation and then write to the fence object on the device DDR. In case the + * message is a GET operation, CpuCP will first fill the results section on the + * device DDR and then write to the fence object. If an error occurred, CpuCP + * will fill the rc field with the right error code. + * + * In the meantime, the host's driver will poll on the fence object. Once the + * host sees that the fence object is signaled, it will read the results from + * the device DDR (if relevant) and resume the code execution in the host's + * driver. + * + * To use QMAN packets, the opcode must be the QMAN opcode, shifted by 8 + * so the value being put by the host's driver matches the value read by CpuCP + * + * Non-QMAN packets should be limited to values 1 through (2^8 - 1) + * + * Detailed description: + * + * CPUCP_PACKET_DISABLE_PCI_ACCESS - + * After receiving this packet the embedded CPU must NOT issue PCI + * transactions (read/write) towards the Host CPU. This also include + * sending MSI-X interrupts. + * This packet is usually sent before the device is moved to D3Hot state. + * + * CPUCP_PACKET_ENABLE_PCI_ACCESS - + * After receiving this packet the embedded CPU is allowed to issue PCI + * transactions towards the Host CPU, including sending MSI-X interrupts. + * This packet is usually send after the device is moved to D0 state. + * + * CPUCP_PACKET_TEMPERATURE_GET - + * Fetch the current temperature / Max / Max Hyst / Critical / + * Critical Hyst of a specified thermal sensor. The packet's + * arguments specify the desired sensor and the field to get. + * + * CPUCP_PACKET_VOLTAGE_GET - + * Fetch the voltage / Max / Min of a specified sensor. The packet's + * arguments specify the sensor and type. + * + * CPUCP_PACKET_CURRENT_GET - + * Fetch the current / Max / Min of a specified sensor. The packet's + * arguments specify the sensor and type. + * + * CPUCP_PACKET_FAN_SPEED_GET - + * Fetch the speed / Max / Min of a specified fan. The packet's + * arguments specify the sensor and type. + * + * CPUCP_PACKET_PWM_GET - + * Fetch the pwm value / mode of a specified pwm. The packet's + * arguments specify the sensor and type. + * + * CPUCP_PACKET_PWM_SET - + * Set the pwm value / mode of a specified pwm. The packet's + * arguments specify the sensor, type and value. + * + * CPUCP_PACKET_FREQUENCY_SET - + * Set the frequency of a specified PLL. The packet's arguments specify + * the PLL and the desired frequency. The actual frequency in the device + * might differ from the requested frequency. + * + * CPUCP_PACKET_FREQUENCY_GET - + * Fetch the frequency of a specified PLL. The packet's arguments specify + * the PLL. + * + * CPUCP_PACKET_LED_SET - + * Set the state of a specified led. The packet's arguments + * specify the led and the desired state. + * + * CPUCP_PACKET_I2C_WR - + * Write 32-bit value to I2C device. The packet's arguments specify the + * I2C bus, address and value. + * + * CPUCP_PACKET_I2C_RD - + * Read 32-bit value from I2C device. The packet's arguments specify the + * I2C bus and address. + * + * CPUCP_PACKET_INFO_GET - + * Fetch information from the device as specified in the packet's + * structure. The host's driver passes the max size it allows the CpuCP to + * write to the structure, to prevent data corruption in case of + * mismatched driver/FW versions. + * + * CPUCP_PACKET_FLASH_PROGRAM_REMOVED - this packet was removed + * + * CPUCP_PACKET_UNMASK_RAZWI_IRQ - + * Unmask the given IRQ. The IRQ number is specified in the value field. + * The packet is sent after receiving an interrupt and printing its + * relevant information. + * + * CPUCP_PACKET_UNMASK_RAZWI_IRQ_ARRAY - + * Unmask the given IRQs. The IRQs numbers are specified in an array right + * after the cpucp_packet structure, where its first element is the array + * length. The packet is sent after a soft reset was done in order to + * handle any interrupts that were sent during the reset process. + * + * CPUCP_PACKET_TEST - + * Test packet for CpuCP connectivity. The CPU will put the fence value + * in the result field. + * + * CPUCP_PACKET_FREQUENCY_CURR_GET - + * Fetch the current frequency of a specified PLL. The packet's arguments + * specify the PLL. + * + * CPUCP_PACKET_MAX_POWER_GET - + * Fetch the maximal power of the device. + * + * CPUCP_PACKET_MAX_POWER_SET - + * Set the maximal power of the device. The packet's arguments specify + * the power. + * + * CPUCP_PACKET_EEPROM_DATA_GET - + * Get EEPROM data from the CpuCP kernel. The buffer is specified in the + * addr field. The CPU will put the returned data size in the result + * field. In addition, the host's driver passes the max size it allows the + * CpuCP to write to the structure, to prevent data corruption in case of + * mismatched driver/FW versions. + * + * CPUCP_PACKET_NIC_INFO_GET - + * Fetch information from the device regarding the NIC. the host's driver + * passes the max size it allows the CpuCP to write to the structure, to + * prevent data corruption in case of mismatched driver/FW versions. + * + * CPUCP_PACKET_TEMPERATURE_SET - + * Set the value of the offset property of a specified thermal sensor. + * The packet's arguments specify the desired sensor and the field to + * set. + * + * CPUCP_PACKET_VOLTAGE_SET - + * Trigger the reset_history property of a specified voltage sensor. + * The packet's arguments specify the desired sensor and the field to + * set. + * + * CPUCP_PACKET_CURRENT_SET - + * Trigger the reset_history property of a specified current sensor. + * The packet's arguments specify the desired sensor and the field to + * set. + * + * CPUCP_PACKET_PCIE_THROUGHPUT_GET - + * Get throughput of PCIe. + * The packet's arguments specify the transaction direction (TX/RX). + * The window measurement is 10[msec], and the return value is in KB/sec. + * + * CPUCP_PACKET_PCIE_REPLAY_CNT_GET + * Replay count measures number of "replay" events, which is basicly + * number of retries done by PCIe. + * + * CPUCP_PACKET_TOTAL_ENERGY_GET - + * Total Energy is measurement of energy from the time FW Linux + * is loaded. It is calculated by multiplying the average power + * by time (passed from armcp start). The units are in MilliJouls. + * + * CPUCP_PACKET_PLL_INFO_GET - + * Fetch frequencies of PLL from the required PLL IP. + * The packet's arguments specify the device PLL type + * Pll type is the PLL from device pll_index enum. + * The result is composed of 4 outputs, each is 16-bit + * frequency in MHz. + * + * CPUCP_PACKET_POWER_GET - + * Fetch the present power consumption of the device (Current * Voltage). + * + * CPUCP_PACKET_NIC_PFC_SET - + * Enable/Disable the NIC PFC feature. The packet's arguments specify the + * NIC port, relevant lanes to configure and one bit indication for + * enable/disable. + * + * CPUCP_PACKET_NIC_FAULT_GET - + * Fetch the current indication for local/remote faults from the NIC MAC. + * The result is 32-bit value of the relevant register. + * + * CPUCP_PACKET_NIC_LPBK_SET - + * Enable/Disable the MAC loopback feature. The packet's arguments specify + * the NIC port, relevant lanes to configure and one bit indication for + * enable/disable. + * + * CPUCP_PACKET_NIC_MAC_INIT - + * Configure the NIC MAC channels. The packet's arguments specify the + * NIC port and the speed. + * + * CPUCP_PACKET_MSI_INFO_SET - + * set the index number for each supported msi type going from + * host to device + * + * CPUCP_PACKET_NIC_XPCS91_REGS_GET - + * Fetch the un/correctable counters values from the NIC MAC. + * + * CPUCP_PACKET_NIC_STAT_REGS_GET - + * Fetch various NIC MAC counters from the NIC STAT. + * + * CPUCP_PACKET_NIC_STAT_REGS_CLR - + * Clear the various NIC MAC counters in the NIC STAT. + * + * CPUCP_PACKET_NIC_STAT_REGS_ALL_GET - + * Fetch all NIC MAC counters from the NIC STAT. + * + * CPUCP_PACKET_IS_IDLE_CHECK - + * Check if the device is IDLE in regard to the DMA/compute engines + * and QMANs. The f/w will return a bitmask where each bit represents + * a different engine or QMAN according to enum cpucp_idle_mask. + * The bit will be 1 if the engine is NOT idle. + * + * CPUCP_PACKET_HBM_REPLACED_ROWS_INFO_GET - + * Fetch all HBM replaced-rows and prending to be replaced rows data. + * + * CPUCP_PACKET_HBM_PENDING_ROWS_STATUS - + * Fetch status of HBM rows pending replacement and need a reboot to + * be replaced. + * + * CPUCP_PACKET_POWER_SET - + * Resets power history of device to 0 + * + * CPUCP_PACKET_ENGINE_CORE_ASID_SET - + * Packet to perform engine core ASID configuration + * + * CPUCP_PACKET_SEC_ATTEST_GET - + * Get the attestaion data that is collected during various stages of the + * boot sequence. the attestation data is also hashed with some unique + * number (nonce) provided by the host to prevent replay attacks. + * public key and certificate also provided as part of the FW response. + * + * CPUCP_PACKET_MONITOR_DUMP_GET - + * Get monitors registers dump from the CpuCP kernel. + * The CPU will put the registers dump in the a buffer allocated by the driver + * which address is passed via the CpuCp packet. In addition, the host's driver + * passes the max size it allows the CpuCP to write to the structure, to prevent + * data corruption in case of mismatched driver/FW versions. + * Obsolete. + * + * CPUCP_PACKET_GENERIC_PASSTHROUGH - + * Generic opcode for all firmware info that is only passed to host + * through the LKD, without getting parsed there. + * + * CPUCP_PACKET_ACTIVE_STATUS_SET - + * LKD sends FW indication whether device is free or in use, this indication is reported + * also to the BMC. + * + * CPUCP_PACKET_REGISTER_INTERRUPTS - + * Packet to register interrupts indicating LKD is ready to receive events from FW. + * + * CPUCP_PACKET_SOFT_RESET - + * Packet to perform soft-reset. + * + * CPUCP_PACKET_INTS_REGISTER - + * Packet to inform FW that queues have been established and LKD is ready to receive + * EQ events. + */ + +enum cpucp_packet_id { + CPUCP_PACKET_DISABLE_PCI_ACCESS = 1, /* internal */ + CPUCP_PACKET_ENABLE_PCI_ACCESS, /* internal */ + CPUCP_PACKET_TEMPERATURE_GET, /* sysfs */ + CPUCP_PACKET_VOLTAGE_GET, /* sysfs */ + CPUCP_PACKET_CURRENT_GET, /* sysfs */ + CPUCP_PACKET_FAN_SPEED_GET, /* sysfs */ + CPUCP_PACKET_PWM_GET, /* sysfs */ + CPUCP_PACKET_PWM_SET, /* sysfs */ + CPUCP_PACKET_FREQUENCY_SET, /* sysfs */ + CPUCP_PACKET_FREQUENCY_GET, /* sysfs */ + CPUCP_PACKET_LED_SET, /* debugfs */ + CPUCP_PACKET_I2C_WR, /* debugfs */ + CPUCP_PACKET_I2C_RD, /* debugfs */ + CPUCP_PACKET_INFO_GET, /* IOCTL */ + CPUCP_PACKET_FLASH_PROGRAM_REMOVED, + CPUCP_PACKET_UNMASK_RAZWI_IRQ, /* internal */ + CPUCP_PACKET_UNMASK_RAZWI_IRQ_ARRAY, /* internal */ + CPUCP_PACKET_TEST, /* internal */ + CPUCP_PACKET_FREQUENCY_CURR_GET, /* sysfs */ + CPUCP_PACKET_MAX_POWER_GET, /* sysfs */ + CPUCP_PACKET_MAX_POWER_SET, /* sysfs */ + CPUCP_PACKET_EEPROM_DATA_GET, /* sysfs */ + CPUCP_PACKET_NIC_INFO_GET, /* internal */ + CPUCP_PACKET_TEMPERATURE_SET, /* sysfs */ + CPUCP_PACKET_VOLTAGE_SET, /* sysfs */ + CPUCP_PACKET_CURRENT_SET, /* sysfs */ + CPUCP_PACKET_PCIE_THROUGHPUT_GET, /* internal */ + CPUCP_PACKET_PCIE_REPLAY_CNT_GET, /* internal */ + CPUCP_PACKET_TOTAL_ENERGY_GET, /* internal */ + CPUCP_PACKET_PLL_INFO_GET, /* internal */ + CPUCP_PACKET_NIC_STATUS, /* internal */ + CPUCP_PACKET_POWER_GET, /* internal */ + CPUCP_PACKET_NIC_PFC_SET, /* internal */ + CPUCP_PACKET_NIC_FAULT_GET, /* internal */ + CPUCP_PACKET_NIC_LPBK_SET, /* internal */ + CPUCP_PACKET_NIC_MAC_CFG, /* internal */ + CPUCP_PACKET_MSI_INFO_SET, /* internal */ + CPUCP_PACKET_NIC_XPCS91_REGS_GET, /* internal */ + CPUCP_PACKET_NIC_STAT_REGS_GET, /* internal */ + CPUCP_PACKET_NIC_STAT_REGS_CLR, /* internal */ + CPUCP_PACKET_NIC_STAT_REGS_ALL_GET, /* internal */ + CPUCP_PACKET_IS_IDLE_CHECK, /* internal */ + CPUCP_PACKET_HBM_REPLACED_ROWS_INFO_GET,/* internal */ + CPUCP_PACKET_HBM_PENDING_ROWS_STATUS, /* internal */ + CPUCP_PACKET_POWER_SET, /* internal */ + CPUCP_PACKET_RESERVED, /* not used */ + CPUCP_PACKET_ENGINE_CORE_ASID_SET, /* internal */ + CPUCP_PACKET_RESERVED2, /* not used */ + CPUCP_PACKET_SEC_ATTEST_GET, /* internal */ + CPUCP_PACKET_RESERVED3, /* not used */ + CPUCP_PACKET_RESERVED4, /* not used */ + CPUCP_PACKET_MONITOR_DUMP_GET, /* debugfs */ + CPUCP_PACKET_RESERVED5, /* not used */ + CPUCP_PACKET_RESERVED6, /* not used */ + CPUCP_PACKET_RESERVED7, /* not used */ + CPUCP_PACKET_GENERIC_PASSTHROUGH, /* IOCTL */ + CPUCP_PACKET_RESERVED8, /* not used */ + CPUCP_PACKET_ACTIVE_STATUS_SET, /* internal */ + CPUCP_PACKET_RESERVED9, /* not used */ + CPUCP_PACKET_RESERVED10, /* not used */ + CPUCP_PACKET_RESERVED11, /* not used */ + CPUCP_PACKET_RESERVED12, /* internal */ + CPUCP_PACKET_RESERVED13, /* internal */ + CPUCP_PACKET_SOFT_RESET, /* internal */ + CPUCP_PACKET_INTS_REGISTER, /* internal */ + CPUCP_PACKET_ID_MAX /* must be last */ +}; + +#define CPUCP_PACKET_FENCE_VAL 0xFE8CE7A5 + +#define CPUCP_PKT_CTL_RC_SHIFT 12 +#define CPUCP_PKT_CTL_RC_MASK 0x0000F000 + +#define CPUCP_PKT_CTL_OPCODE_SHIFT 16 +#define CPUCP_PKT_CTL_OPCODE_MASK 0x1FFF0000 + +#define CPUCP_PKT_RES_PLL_OUT0_SHIFT 0 +#define CPUCP_PKT_RES_PLL_OUT0_MASK 0x000000000000FFFFull +#define CPUCP_PKT_RES_PLL_OUT1_SHIFT 16 +#define CPUCP_PKT_RES_PLL_OUT1_MASK 0x00000000FFFF0000ull +#define CPUCP_PKT_RES_PLL_OUT2_SHIFT 32 +#define CPUCP_PKT_RES_PLL_OUT2_MASK 0x0000FFFF00000000ull +#define CPUCP_PKT_RES_PLL_OUT3_SHIFT 48 +#define CPUCP_PKT_RES_PLL_OUT3_MASK 0xFFFF000000000000ull + +#define CPUCP_PKT_RES_EEPROM_OUT0_SHIFT 0 +#define CPUCP_PKT_RES_EEPROM_OUT0_MASK 0x000000000000FFFFull +#define CPUCP_PKT_RES_EEPROM_OUT1_SHIFT 16 +#define CPUCP_PKT_RES_EEPROM_OUT1_MASK 0x0000000000FF0000ull + +#define CPUCP_PKT_VAL_PFC_IN1_SHIFT 0 +#define CPUCP_PKT_VAL_PFC_IN1_MASK 0x0000000000000001ull +#define CPUCP_PKT_VAL_PFC_IN2_SHIFT 1 +#define CPUCP_PKT_VAL_PFC_IN2_MASK 0x000000000000001Eull + +#define CPUCP_PKT_VAL_LPBK_IN1_SHIFT 0 +#define CPUCP_PKT_VAL_LPBK_IN1_MASK 0x0000000000000001ull +#define CPUCP_PKT_VAL_LPBK_IN2_SHIFT 1 +#define CPUCP_PKT_VAL_LPBK_IN2_MASK 0x000000000000001Eull + +#define CPUCP_PKT_VAL_MAC_CNT_IN1_SHIFT 0 +#define CPUCP_PKT_VAL_MAC_CNT_IN1_MASK 0x0000000000000001ull +#define CPUCP_PKT_VAL_MAC_CNT_IN2_SHIFT 1 +#define CPUCP_PKT_VAL_MAC_CNT_IN2_MASK 0x00000000FFFFFFFEull + +/* heartbeat status bits */ +#define CPUCP_PKT_HB_STATUS_EQ_FAULT_SHIFT 0 +#define CPUCP_PKT_HB_STATUS_EQ_FAULT_MASK 0x00000001 + +struct cpucp_packet { + union { + __le64 value; /* For SET packets */ + __le64 result; /* For GET packets */ + __le64 addr; /* For PQ */ + }; + + __le32 ctl; + + __le32 fence; /* Signal to host that message is completed */ + + union { + struct {/* For temperature/current/voltage/fan/pwm get/set */ + __le16 sensor_index; + __le16 type; + }; + + struct { /* For I2C read/write */ + __u8 i2c_bus; + __u8 i2c_addr; + __u8 i2c_reg; + /* + * In legacy implemetations, i2c_len was not present, + * was unused and just added as pad. + * So if i2c_len is 0, it is treated as legacy + * and r/w 1 Byte, else if i2c_len is specified, + * its treated as new multibyte r/w support. + */ + __u8 i2c_len; + }; + + struct {/* For PLL info fetch */ + __le16 pll_type; + /* TODO pll_reg is kept temporary before removal */ + __le16 pll_reg; + }; + + /* For any general request */ + __le32 index; + + /* For frequency get/set */ + __le32 pll_index; + + /* For led set */ + __le32 led_index; + + /* For get CpuCP info/EEPROM data/NIC info */ + __le32 data_max_size; + + /* + * For any general status bitmask. Shall be used whenever the + * result cannot be used to hold general purpose data. + */ + __le32 status_mask; + + /* random, used once number, for security packets */ + __le32 nonce; + }; + + union { + /* For NIC requests */ + __le32 port_index; + + /* For Generic packet sub index */ + __le32 pkt_subidx; + }; +}; + +struct cpucp_unmask_irq_arr_packet { + struct cpucp_packet cpucp_pkt; + __le32 length; + __le32 irqs[]; +}; + +struct cpucp_nic_status_packet { + struct cpucp_packet cpucp_pkt; + __le32 length; + __le32 data[]; +}; + +struct cpucp_array_data_packet { + struct cpucp_packet cpucp_pkt; + __le32 length; + __le32 data[]; +}; + +enum cpucp_led_index { + CPUCP_LED0_INDEX = 0, + CPUCP_LED1_INDEX, + CPUCP_LED2_INDEX, + CPUCP_LED_MAX_INDEX = CPUCP_LED2_INDEX +}; + +/* + * enum cpucp_packet_rc - Error return code + * @cpucp_packet_success -> in case of success. + * @cpucp_packet_invalid -> this is to support first generation platforms. + * @cpucp_packet_fault -> in case of processing error like failing to + * get device binding or semaphore etc. + * @cpucp_packet_invalid_pkt -> when cpucp packet is un-supported. + * @cpucp_packet_invalid_params -> when checking parameter like length of buffer + * or attribute value etc. + * @cpucp_packet_rc_max -> It indicates size of enum so should be at last. + */ +enum cpucp_packet_rc { + cpucp_packet_success, + cpucp_packet_invalid, + cpucp_packet_fault, + cpucp_packet_invalid_pkt, + cpucp_packet_invalid_params, + cpucp_packet_rc_max +}; + +/* + * cpucp_temp_type should adhere to hwmon_temp_attributes + * defined in Linux kernel hwmon.h file + */ +enum cpucp_temp_type { + cpucp_temp_input, + cpucp_temp_min = 4, + cpucp_temp_min_hyst, + cpucp_temp_max = 6, + cpucp_temp_max_hyst, + cpucp_temp_crit, + cpucp_temp_crit_hyst, + cpucp_temp_offset = 19, + cpucp_temp_lowest = 21, + cpucp_temp_highest = 22, + cpucp_temp_reset_history = 23, + cpucp_temp_warn = 24, + cpucp_temp_max_crit = 25, + cpucp_temp_max_warn = 26, +}; + +enum cpucp_in_attributes { + cpucp_in_input, + cpucp_in_min, + cpucp_in_max, + cpucp_in_lowest = 6, + cpucp_in_highest = 7, + cpucp_in_reset_history, + cpucp_in_intr_alarm_a, + cpucp_in_intr_alarm_b, +}; + +enum cpucp_curr_attributes { + cpucp_curr_input, + cpucp_curr_min, + cpucp_curr_max, + cpucp_curr_lowest = 6, + cpucp_curr_highest = 7, + cpucp_curr_reset_history +}; + +enum cpucp_fan_attributes { + cpucp_fan_input, + cpucp_fan_min = 2, + cpucp_fan_max +}; + +enum cpucp_pwm_attributes { + cpucp_pwm_input, + cpucp_pwm_enable +}; + +enum cpucp_pcie_throughput_attributes { + cpucp_pcie_throughput_tx, + cpucp_pcie_throughput_rx +}; + +/* TODO temporary kept before removal */ +enum cpucp_pll_reg_attributes { + cpucp_pll_nr_reg, + cpucp_pll_nf_reg, + cpucp_pll_od_reg, + cpucp_pll_div_factor_reg, + cpucp_pll_div_sel_reg +}; + +/* TODO temporary kept before removal */ +enum cpucp_pll_type_attributes { + cpucp_pll_cpu, + cpucp_pll_pci, +}; + +/* + * cpucp_power_type aligns with hwmon_power_attributes + * defined in Linux kernel hwmon.h file + */ +enum cpucp_power_type { + CPUCP_POWER_INPUT = 8, + CPUCP_POWER_INPUT_HIGHEST = 9, + CPUCP_POWER_RESET_INPUT_HISTORY = 11 +}; + +/* + * MSI type enumeration table for all ASICs and future SW versions. + * For future ASIC-LKD compatibility, we can only add new enumerations. + * at the end of the table (before CPUCP_NUM_OF_MSI_TYPES). + * Changing the order of entries or removing entries is not allowed. + */ +enum cpucp_msi_type { + CPUCP_EVENT_QUEUE_MSI_TYPE, + CPUCP_NIC_PORT1_MSI_TYPE, + CPUCP_NIC_PORT3_MSI_TYPE, + CPUCP_NIC_PORT5_MSI_TYPE, + CPUCP_NIC_PORT7_MSI_TYPE, + CPUCP_NIC_PORT9_MSI_TYPE, + CPUCP_NUM_OF_MSI_TYPES +}; + +/* + * PLL enumeration table used for all ASICs and future SW versions. + * For future ASIC-LKD compatibility, we can only add new enumerations. + * at the end of the table. + * Changing the order of entries or removing entries is not allowed. + */ +enum pll_index { + CPU_PLL = 0, + PCI_PLL = 1, + NIC_PLL = 2, + DMA_PLL = 3, + MESH_PLL = 4, + MME_PLL = 5, + TPC_PLL = 6, + IF_PLL = 7, + SRAM_PLL = 8, + NS_PLL = 9, + HBM_PLL = 10, + MSS_PLL = 11, + DDR_PLL = 12, + VID_PLL = 13, + BANK_PLL = 14, + MMU_PLL = 15, + IC_PLL = 16, + MC_PLL = 17, + EMMC_PLL = 18, + D2D_PLL = 19, + CS_PLL = 20, + C2C_PLL = 21, + NCH_PLL = 22, + C2M_PLL = 23, + PLL_MAX +}; + +enum rl_index { + TPC_RL = 0, + MME_RL, + EDMA_RL, +}; + +enum pvt_index { + PVT_SW, + PVT_SE, + PVT_NW, + PVT_NE +}; + +/* Event Queue Packets */ + +struct eq_generic_event { + __le64 data[7]; +}; + +/* + * CpuCP info + */ + +#define CARD_NAME_MAX_LEN 16 +#define CPUCP_MAX_SENSORS 128 +#define CPUCP_MAX_NICS 128 +#define CPUCP_LANES_PER_NIC 4 +#define CPUCP_NIC_QSFP_EEPROM_MAX_LEN 1024 +#define CPUCP_MAX_NIC_LANES (CPUCP_MAX_NICS * CPUCP_LANES_PER_NIC) +#define CPUCP_NIC_MASK_ARR_LEN ((CPUCP_MAX_NICS + 63) / 64) +#define CPUCP_NIC_POLARITY_ARR_LEN ((CPUCP_MAX_NIC_LANES + 63) / 64) +#define CPUCP_HBM_ROW_REPLACE_MAX 32 + +struct cpucp_sensor { + __le32 type; + __le32 flags; +}; + +/** + * struct cpucp_card_types - ASIC card type. + * @cpucp_card_type_pci: PCI card. + * @cpucp_card_type_pmc: PCI Mezzanine Card. + */ +enum cpucp_card_types { + cpucp_card_type_pci, + cpucp_card_type_pmc +}; + +#define CPUCP_SEC_CONF_ENABLED_SHIFT 0 +#define CPUCP_SEC_CONF_ENABLED_MASK 0x00000001 + +#define CPUCP_SEC_CONF_FLASH_WP_SHIFT 1 +#define CPUCP_SEC_CONF_FLASH_WP_MASK 0x00000002 + +#define CPUCP_SEC_CONF_EEPROM_WP_SHIFT 2 +#define CPUCP_SEC_CONF_EEPROM_WP_MASK 0x00000004 + +/** + * struct cpucp_security_info - Security information. + * @config: configuration bit field + * @keys_num: number of stored keys + * @revoked_keys: revoked keys bit field + * @min_svn: minimal security version + */ +struct cpucp_security_info { + __u8 config; + __u8 keys_num; + __u8 revoked_keys; + __u8 min_svn; +}; + +/** + * struct cpucp_info - Info from CpuCP that is necessary to the host's driver + * @sensors: available sensors description. + * @kernel_version: CpuCP linux kernel version. + * @reserved: reserved field. + * @card_type: card configuration type. + * @card_location: in a server, each card has different connections topology + * depending on its location (relevant for PMC card type) + * @cpld_version: CPLD programmed F/W version. + * @infineon_version: Infineon main DC-DC version. + * @fuse_version: silicon production FUSE information. + * @thermal_version: thermald S/W version. + * @cpucp_version: CpuCP S/W version. + * @infineon_second_stage_version: Infineon 2nd stage DC-DC version. + * @dram_size: available DRAM size. + * @card_name: card name that will be displayed in HWMON subsystem on the host + * @tpc_binning_mask: TPC binning mask, 1 bit per TPC instance + * (0 = functional, 1 = binned) + * @decoder_binning_mask: Decoder binning mask, 1 bit per decoder instance + * (0 = functional, 1 = binned), maximum 1 per dcore + * @sram_binning: Categorize SRAM functionality + * (0 = fully functional, 1 = lower-half is not functional, + * 2 = upper-half is not functional) + * @sec_info: security information + * @pll_map: Bit map of supported PLLs for current ASIC version. + * @mme_binning_mask: MME binning mask, + * bits [0:6] <==> dcore0 mme fma + * bits [7:13] <==> dcore1 mme fma + * bits [14:20] <==> dcore0 mme ima + * bits [21:27] <==> dcore1 mme ima + * For each group, if the 6th bit is set then first 5 bits + * represent the col's idx [0-31], otherwise these bits are + * ignored, and col idx 32 is binned. 7th bit is don't care. + * @dram_binning_mask: DRAM binning mask, 1 bit per dram instance + * (0 = functional 1 = binned) + * @memory_repair_flag: eFuse flag indicating memory repair + * @edma_binning_mask: EDMA binning mask, 1 bit per EDMA instance + * (0 = functional 1 = binned) + * @xbar_binning_mask: Xbar binning mask, 1 bit per Xbar instance + * (0 = functional 1 = binned) + * @interposer_version: Interposer version programmed in eFuse + * @substrate_version: Substrate version programmed in eFuse + * @fw_hbm_region_size: Size in bytes of FW reserved region in HBM. + * @fw_os_version: Firmware OS Version + */ +struct cpucp_info { + struct cpucp_sensor sensors[CPUCP_MAX_SENSORS]; + __u8 kernel_version[VERSION_MAX_LEN]; + __le32 reserved; + __le32 card_type; + __le32 card_location; + __le32 cpld_version; + __le32 infineon_version; + __u8 fuse_version[VERSION_MAX_LEN]; + __u8 thermal_version[VERSION_MAX_LEN]; + __u8 cpucp_version[VERSION_MAX_LEN]; + __le32 infineon_second_stage_version; + __le64 dram_size; + char card_name[CARD_NAME_MAX_LEN]; + __le64 tpc_binning_mask; + __le64 decoder_binning_mask; + __u8 sram_binning; + __u8 dram_binning_mask; + __u8 memory_repair_flag; + __u8 edma_binning_mask; + __u8 xbar_binning_mask; + __u8 interposer_version; + __u8 substrate_version; + __u8 reserved2; + struct cpucp_security_info sec_info; + __le32 fw_hbm_region_size; + __u8 pll_map[PLL_MAP_LEN]; + __le64 mme_binning_mask; + __u8 fw_os_version[VERSION_MAX_LEN]; +}; + +struct cpucp_mac_addr { + __u8 mac_addr[ETH_ALEN]; +}; + +enum cpucp_serdes_type { + TYPE_1_SERDES_TYPE, + TYPE_2_SERDES_TYPE, + HLS1_SERDES_TYPE, + HLS1H_SERDES_TYPE, + HLS2_SERDES_TYPE, + HLS2_TYPE_1_SERDES_TYPE, + MAX_NUM_SERDES_TYPE, /* number of types */ + UNKNOWN_SERDES_TYPE = 0xFFFF /* serdes_type is u16 */ +}; + +struct cpucp_nic_info { + struct cpucp_mac_addr mac_addrs[CPUCP_MAX_NICS]; + __le64 link_mask[CPUCP_NIC_MASK_ARR_LEN]; + __le64 pol_tx_mask[CPUCP_NIC_POLARITY_ARR_LEN]; + __le64 pol_rx_mask[CPUCP_NIC_POLARITY_ARR_LEN]; + __le64 link_ext_mask[CPUCP_NIC_MASK_ARR_LEN]; + __u8 qsfp_eeprom[CPUCP_NIC_QSFP_EEPROM_MAX_LEN]; + __le64 auto_neg_mask[CPUCP_NIC_MASK_ARR_LEN]; + __le16 serdes_type; /* enum cpucp_serdes_type */ + __le16 tx_swap_map[CPUCP_MAX_NICS]; + __u8 reserved[6]; +}; + +#define PAGE_DISCARD_MAX 64 + +struct page_discard_info { + __u8 num_entries; + __u8 reserved[7]; + __le32 mmu_page_idx[PAGE_DISCARD_MAX]; +}; + +/* + * struct frac_val - fracture value represented by "integer.frac". + * @integer: the integer part of the fracture value; + * @frac: the fracture part of the fracture value. + */ +struct frac_val { + union { + struct { + __le16 integer; + __le16 frac; + }; + __le32 val; + }; +}; + +/* + * struct ser_val - the SER (symbol error rate) value is represented by "integer * 10 ^ -exp". + * @integer: the integer part of the SER value; + * @exp: the exponent part of the SER value. + */ +struct ser_val { + __le16 integer; + __le16 exp; +}; + +/* + * struct cpucp_nic_status - describes the status of a NIC port. + * @port: NIC port index. + * @bad_format_cnt: e.g. CRC. + * @responder_out_of_sequence_psn_cnt: e.g NAK. + * @high_ber_reinit_cnt: link reinit due to high BER. + * @correctable_err_cnt: e.g. bit-flip. + * @uncorrectable_err_cnt: e.g. MAC errors. + * @retraining_cnt: re-training counter. + * @up: is port up. + * @pcs_link: has PCS link. + * @phy_ready: is PHY ready. + * @auto_neg: is Autoneg enabled. + * @timeout_retransmission_cnt: timeout retransmission events. + * @high_ber_cnt: high ber events. + * @pre_fec_ser: pre FEC SER value. + * @post_fec_ser: post FEC SER value. + * @throughput: measured throughput. + * @latency: measured latency. + */ +struct cpucp_nic_status { + __le32 port; + __le32 bad_format_cnt; + __le32 responder_out_of_sequence_psn_cnt; + __le32 high_ber_reinit; + __le32 correctable_err_cnt; + __le32 uncorrectable_err_cnt; + __le32 retraining_cnt; + __u8 up; + __u8 pcs_link; + __u8 phy_ready; + __u8 auto_neg; + __le32 timeout_retransmission_cnt; + __le32 high_ber_cnt; + struct ser_val pre_fec_ser; + struct ser_val post_fec_ser; + struct frac_val bandwidth; + struct frac_val lat; +}; + +enum cpucp_hbm_row_replace_cause { + REPLACE_CAUSE_DOUBLE_ECC_ERR, + REPLACE_CAUSE_MULTI_SINGLE_ECC_ERR, +}; + +struct cpucp_hbm_row_info { + __u8 hbm_idx; + __u8 pc; + __u8 sid; + __u8 bank_idx; + __le16 row_addr; + __u8 replaced_row_cause; /* enum cpucp_hbm_row_replace_cause */ + __u8 pad; +}; + +struct cpucp_hbm_row_replaced_rows_info { + __le16 num_replaced_rows; + __u8 pad[6]; + struct cpucp_hbm_row_info replaced_rows[CPUCP_HBM_ROW_REPLACE_MAX]; +}; + +enum cpu_reset_status { + CPU_RST_STATUS_NA = 0, + CPU_RST_STATUS_SOFT_RST_DONE = 1, +}; + +#define SEC_PCR_DATA_BUF_SZ 256 +#define SEC_PCR_QUOTE_BUF_SZ 510 /* (512 - 2) 2 bytes used for size */ +#define SEC_SIGNATURE_BUF_SZ 255 /* (256 - 1) 1 byte used for size */ +#define SEC_PUB_DATA_BUF_SZ 510 /* (512 - 2) 2 bytes used for size */ +#define SEC_CERTIFICATE_BUF_SZ 2046 /* (2048 - 2) 2 bytes used for size */ + +/* + * struct cpucp_sec_attest_info - attestation report of the boot + * @pcr_data: raw values of the PCR registers + * @pcr_num_reg: number of PCR registers in the pcr_data array + * @pcr_reg_len: length of each PCR register in the pcr_data array (bytes) + * @nonce: number only used once. random number provided by host. this also + * passed to the quote command as a qualifying data. + * @pcr_quote_len: length of the attestation quote data (bytes) + * @pcr_quote: attestation report data structure + * @quote_sig_len: length of the attestation report signature (bytes) + * @quote_sig: signature structure of the attestation report + * @pub_data_len: length of the public data (bytes) + * @public_data: public key for the signed attestation + * (outPublic + name + qualifiedName) + * @certificate_len: length of the certificate (bytes) + * @certificate: certificate for the attestation signing key + */ +struct cpucp_sec_attest_info { + __u8 pcr_data[SEC_PCR_DATA_BUF_SZ]; + __u8 pcr_num_reg; + __u8 pcr_reg_len; + __le16 pad0; + __le32 nonce; + __le16 pcr_quote_len; + __u8 pcr_quote[SEC_PCR_QUOTE_BUF_SZ]; + __u8 quote_sig_len; + __u8 quote_sig[SEC_SIGNATURE_BUF_SZ]; + __le16 pub_data_len; + __u8 public_data[SEC_PUB_DATA_BUF_SZ]; + __le16 certificate_len; + __u8 certificate[SEC_CERTIFICATE_BUF_SZ]; +}; + +/* + * struct cpucp_dev_info_signed - device information signed by a secured device + * @info: device information structure as defined above + * @nonce: number only used once. random number provided by host. this number is + * hashed and signed along with the device information. + * @info_sig_len: length of the attestation signature (bytes) + * @info_sig: signature of the info + nonce data. + * @pub_data_len: length of the public data (bytes) + * @public_data: public key info signed info data + * (outPublic + name + qualifiedName) + * @certificate_len: length of the certificate (bytes) + * @certificate: certificate for the signing key + */ +struct cpucp_dev_info_signed { + struct cpucp_info info; /* assumed to be 64bit aligned */ + __le32 nonce; + __le32 pad0; + __u8 info_sig_len; + __u8 info_sig[SEC_SIGNATURE_BUF_SZ]; + __le16 pub_data_len; + __u8 public_data[SEC_PUB_DATA_BUF_SZ]; + __le16 certificate_len; + __u8 certificate[SEC_CERTIFICATE_BUF_SZ]; +}; + +#define DCORE_MON_REGS_SZ 512 +/* + * struct dcore_monitor_regs_data - DCORE monitor regs data. + * the structure follows sync manager block layout. Obsolete. + * @mon_pay_addrl: array of payload address low bits. + * @mon_pay_addrh: array of payload address high bits. + * @mon_pay_data: array of payload data. + * @mon_arm: array of monitor arm. + * @mon_status: array of monitor status. + */ +struct dcore_monitor_regs_data { + __le32 mon_pay_addrl[DCORE_MON_REGS_SZ]; + __le32 mon_pay_addrh[DCORE_MON_REGS_SZ]; + __le32 mon_pay_data[DCORE_MON_REGS_SZ]; + __le32 mon_arm[DCORE_MON_REGS_SZ]; + __le32 mon_status[DCORE_MON_REGS_SZ]; +}; + +/* contains SM data for each SYNC_MNGR (Obsolete) */ +struct cpucp_monitor_dump { + struct dcore_monitor_regs_data sync_mngr_w_s; + struct dcore_monitor_regs_data sync_mngr_e_s; + struct dcore_monitor_regs_data sync_mngr_w_n; + struct dcore_monitor_regs_data sync_mngr_e_n; +}; + +/* + * The Type of the generic request (and other input arguments) will be fetched from user by reading + * from "pkt_subidx" field in struct cpucp_packet. + * + * HL_PASSTHROUGHT_VERSIONS - Fetch all firmware versions. + */ +enum hl_passthrough_type { + HL_PASSTHROUGH_VERSIONS, +}; + +#endif /* CPUCP_IF_H */ diff --git a/include/linux/habanalabs/hl_boot_if.h b/include/linux/habanalabs/hl_boot_if.h new file mode 100644 index 000000000000..7de8a5786a36 --- /dev/null +++ b/include/linux/habanalabs/hl_boot_if.h @@ -0,0 +1,790 @@ +/* SPDX-License-Identifier: GPL-2.0 + * + * Copyright 2018-2020 HabanaLabs, Ltd. + * All Rights Reserved. + * + */ + +#ifndef HL_BOOT_IF_H +#define HL_BOOT_IF_H + +#define LKD_HARD_RESET_MAGIC 0xED7BD694 /* deprecated - do not use */ +#define HL_POWER9_HOST_MAGIC 0x1DA30009 + +#define BOOT_FIT_SRAM_OFFSET 0x200000 + +#define VERSION_MAX_LEN 128 + +enum cpu_boot_err { + CPU_BOOT_ERR_DRAM_INIT_FAIL = 0, + CPU_BOOT_ERR_FIT_CORRUPTED = 1, + CPU_BOOT_ERR_TS_INIT_FAIL = 2, + CPU_BOOT_ERR_DRAM_SKIPPED = 3, + CPU_BOOT_ERR_BMC_WAIT_SKIPPED = 4, + CPU_BOOT_ERR_NIC_DATA_NOT_RDY = 5, + CPU_BOOT_ERR_NIC_FW_FAIL = 6, + CPU_BOOT_ERR_SECURITY_NOT_RDY = 7, + CPU_BOOT_ERR_SECURITY_FAIL = 8, + CPU_BOOT_ERR_EFUSE_FAIL = 9, + CPU_BOOT_ERR_PRI_IMG_VER_FAIL = 10, + CPU_BOOT_ERR_SEC_IMG_VER_FAIL = 11, + CPU_BOOT_ERR_PLL_FAIL = 12, + CPU_BOOT_ERR_DEVICE_UNUSABLE_FAIL = 13, + CPU_BOOT_ERR_BOOT_FW_CRIT_ERR = 18, + CPU_BOOT_ERR_BINNING_FAIL = 19, + CPU_BOOT_ERR_TPM_FAIL = 20, + CPU_BOOT_ERR_TMP_THRESH_INIT_FAIL = 21, + CPU_BOOT_ERR_EEPROM_FAIL = 22, + CPU_BOOT_ERR_ENG_ARC_MEM_SCRUB_FAIL = 23, + CPU_BOOT_ERR_ENABLED = 31, + CPU_BOOT_ERR_SCND_EN = 63, + CPU_BOOT_ERR_LAST = 64 /* we have 2 registers of 32 bits */ +}; + +/* + * Mask for fatal failures + * This mask contains all possible fatal failures, and a dynamic code + * will clear the non-relevant ones. + */ +#define CPU_BOOT_ERR_FATAL_MASK \ + ((1 << CPU_BOOT_ERR_DRAM_INIT_FAIL) | \ + (1 << CPU_BOOT_ERR_PLL_FAIL) | \ + (1 << CPU_BOOT_ERR_DEVICE_UNUSABLE_FAIL) | \ + (1 << CPU_BOOT_ERR_BINNING_FAIL) | \ + (1 << CPU_BOOT_ERR_DRAM_SKIPPED) | \ + (1 << CPU_BOOT_ERR_ENG_ARC_MEM_SCRUB_FAIL) | \ + (1 << CPU_BOOT_ERR_EEPROM_FAIL)) + +/* + * CPU error bits in BOOT_ERROR registers + * + * CPU_BOOT_ERR0_DRAM_INIT_FAIL DRAM initialization failed. + * DRAM is not reliable to use. + * + * CPU_BOOT_ERR0_FIT_CORRUPTED FIT data integrity verification of the + * image provided by the host has failed. + * + * CPU_BOOT_ERR0_TS_INIT_FAIL Thermal Sensor initialization failed. + * Boot continues as usual, but keep in + * mind this is a warning. + * + * CPU_BOOT_ERR0_DRAM_SKIPPED DRAM initialization has been skipped. + * Skipping DRAM initialization has been + * requested (e.g. strap, command, etc.) + * and FW skipped the DRAM initialization. + * Host can initialize the DRAM. + * + * CPU_BOOT_ERR0_BMC_WAIT_SKIPPED Waiting for BMC data will be skipped. + * Meaning the BMC data might not be + * available until reset. + * + * CPU_BOOT_ERR0_NIC_DATA_NOT_RDY NIC data from BMC is not ready. + * BMC has not provided the NIC data yet. + * Once provided this bit will be cleared. + * + * CPU_BOOT_ERR0_NIC_FW_FAIL NIC FW loading failed. + * The NIC FW loading and initialization + * failed. This means NICs are not usable. + * + * CPU_BOOT_ERR0_SECURITY_NOT_RDY Chip security initialization has been + * started, but is not ready yet - chip + * cannot be accessed. + * + * CPU_BOOT_ERR0_SECURITY_FAIL Security related tasks have failed. + * The tasks are security init (root of + * trust), boot authentication (chain of + * trust), data packets authentication. + * + * CPU_BOOT_ERR0_EFUSE_FAIL Reading from eFuse failed. + * The PCI device ID might be wrong. + * + * CPU_BOOT_ERR0_PRI_IMG_VER_FAIL Verification of primary image failed. + * It mean that ppboot checksum + * verification for the preboot primary + * image has failed to match expected + * checksum. Trying to program image again + * might solve this. + * + * CPU_BOOT_ERR0_SEC_IMG_VER_FAIL Verification of secondary image failed. + * It mean that ppboot checksum + * verification for the preboot secondary + * image has failed to match expected + * checksum. Trying to program image again + * might solve this. + * + * CPU_BOOT_ERR0_PLL_FAIL PLL settings failed, meaning that one + * of the PLLs remains in REF_CLK + * + * CPU_BOOT_ERR0_DEVICE_UNUSABLE_FAIL Device is unusable and customer support + * should be contacted. + * + * CPU_BOOT_ERR0_BOOT_FW_CRIT_ERR Critical error was detected during + * the execution of ppboot or preboot. + * for example: stack overflow. + * + * CPU_BOOT_ERR0_BINNING_FAIL Binning settings failed, meaning + * malfunctioning components might still be + * in use. + * + * CPU_BOOT_ERR0_TPM_FAIL TPM verification flow failed. + * + * CPU_BOOT_ERR0_TMP_THRESH_INIT_FAIL Failed to set threshold for tmperature + * sensor. + * + * CPU_BOOT_ERR_EEPROM_FAIL Failed reading EEPROM data. Defaults + * are used. + * + * CPU_BOOT_ERR_ENG_ARC_MEM_SCRUB_FAIL Failed scrubbing the Engines/ARCFarm + * memories. Boot disabled until reset. + * + * CPU_BOOT_ERR0_ENABLED Error registers enabled. + * This is a main indication that the + * running FW populates the error + * registers. Meaning the error bits are + * not garbage, but actual error statuses. + */ +#define CPU_BOOT_ERR0_DRAM_INIT_FAIL (1 << CPU_BOOT_ERR_DRAM_INIT_FAIL) +#define CPU_BOOT_ERR0_FIT_CORRUPTED (1 << CPU_BOOT_ERR_FIT_CORRUPTED) +#define CPU_BOOT_ERR0_TS_INIT_FAIL (1 << CPU_BOOT_ERR_TS_INIT_FAIL) +#define CPU_BOOT_ERR0_DRAM_SKIPPED (1 << CPU_BOOT_ERR_DRAM_SKIPPED) +#define CPU_BOOT_ERR0_BMC_WAIT_SKIPPED (1 << CPU_BOOT_ERR_BMC_WAIT_SKIPPED) +#define CPU_BOOT_ERR0_NIC_DATA_NOT_RDY (1 << CPU_BOOT_ERR_NIC_DATA_NOT_RDY) +#define CPU_BOOT_ERR0_NIC_FW_FAIL (1 << CPU_BOOT_ERR_NIC_FW_FAIL) +#define CPU_BOOT_ERR0_SECURITY_NOT_RDY (1 << CPU_BOOT_ERR_SECURITY_NOT_RDY) +#define CPU_BOOT_ERR0_SECURITY_FAIL (1 << CPU_BOOT_ERR_SECURITY_FAIL) +#define CPU_BOOT_ERR0_EFUSE_FAIL (1 << CPU_BOOT_ERR_EFUSE_FAIL) +#define CPU_BOOT_ERR0_PRI_IMG_VER_FAIL (1 << CPU_BOOT_ERR_PRI_IMG_VER_FAIL) +#define CPU_BOOT_ERR0_SEC_IMG_VER_FAIL (1 << CPU_BOOT_ERR_SEC_IMG_VER_FAIL) +#define CPU_BOOT_ERR0_PLL_FAIL (1 << CPU_BOOT_ERR_PLL_FAIL) +#define CPU_BOOT_ERR0_DEVICE_UNUSABLE_FAIL (1 << CPU_BOOT_ERR_DEVICE_UNUSABLE_FAIL) +#define CPU_BOOT_ERR0_BOOT_FW_CRIT_ERR (1 << CPU_BOOT_ERR_BOOT_FW_CRIT_ERR) +#define CPU_BOOT_ERR0_BINNING_FAIL (1 << CPU_BOOT_ERR_BINNING_FAIL) +#define CPU_BOOT_ERR0_TPM_FAIL (1 << CPU_BOOT_ERR_TPM_FAIL) +#define CPU_BOOT_ERR0_TMP_THRESH_INIT_FAIL (1 << CPU_BOOT_ERR_TMP_THRESH_INIT_FAIL) +#define CPU_BOOT_ERR0_EEPROM_FAIL (1 << CPU_BOOT_ERR_EEPROM_FAIL) +#define CPU_BOOT_ERR0_ENG_ARC_MEM_SCRUB_FAIL (1 << CPU_BOOT_ERR_ENG_ARC_MEM_SCRUB_FAIL) +#define CPU_BOOT_ERR0_ENABLED (1 << CPU_BOOT_ERR_ENABLED) +#define CPU_BOOT_ERR1_ENABLED (1 << CPU_BOOT_ERR_ENABLED) + +enum cpu_boot_dev_sts { + CPU_BOOT_DEV_STS_SECURITY_EN = 0, + CPU_BOOT_DEV_STS_DEBUG_EN = 1, + CPU_BOOT_DEV_STS_WATCHDOG_EN = 2, + CPU_BOOT_DEV_STS_DRAM_INIT_EN = 3, + CPU_BOOT_DEV_STS_BMC_WAIT_EN = 4, + CPU_BOOT_DEV_STS_E2E_CRED_EN = 5, + CPU_BOOT_DEV_STS_HBM_CRED_EN = 6, + CPU_BOOT_DEV_STS_RL_EN = 7, + CPU_BOOT_DEV_STS_SRAM_SCR_EN = 8, + CPU_BOOT_DEV_STS_DRAM_SCR_EN = 9, + CPU_BOOT_DEV_STS_FW_HARD_RST_EN = 10, + CPU_BOOT_DEV_STS_PLL_INFO_EN = 11, + CPU_BOOT_DEV_STS_SP_SRAM_EN = 12, + CPU_BOOT_DEV_STS_CLK_GATE_EN = 13, + CPU_BOOT_DEV_STS_HBM_ECC_EN = 14, + CPU_BOOT_DEV_STS_PKT_PI_ACK_EN = 15, + CPU_BOOT_DEV_STS_FW_LD_COM_EN = 16, + CPU_BOOT_DEV_STS_FW_IATU_CONF_EN = 17, + CPU_BOOT_DEV_STS_FW_NIC_MAC_EN = 18, + CPU_BOOT_DEV_STS_DYN_PLL_EN = 19, + CPU_BOOT_DEV_STS_GIC_PRIVILEGED_EN = 20, + CPU_BOOT_DEV_STS_EQ_INDEX_EN = 21, + CPU_BOOT_DEV_STS_MULTI_IRQ_POLL_EN = 22, + CPU_BOOT_DEV_STS_FW_NIC_STAT_XPCS91_EN = 23, + CPU_BOOT_DEV_STS_FW_NIC_STAT_EXT_EN = 24, + CPU_BOOT_DEV_STS_IS_IDLE_CHECK_EN = 25, + CPU_BOOT_DEV_STS_MAP_HWMON_EN = 26, + CPU_BOOT_DEV_STS_ENABLED = 31, + CPU_BOOT_DEV_STS_SCND_EN = 63, + CPU_BOOT_DEV_STS_LAST = 64 /* we have 2 registers of 32 bits */ +}; + +/* + * BOOT DEVICE STATUS bits in BOOT_DEVICE_STS registers + * + * CPU_BOOT_DEV_STS0_SECURITY_EN Security is Enabled. + * This is an indication for security + * enabled in FW, which means that + * all conditions for security are met: + * device is indicated as security enabled, + * registers are protected, and device + * uses keys for image verification. + * Initialized in: preboot + * + * CPU_BOOT_DEV_STS0_DEBUG_EN Debug is enabled. + * Enabled when JTAG or DEBUG is enabled + * in FW. + * Initialized in: preboot + * + * CPU_BOOT_DEV_STS0_WATCHDOG_EN Watchdog is enabled. + * Watchdog is enabled in FW. + * Initialized in: preboot + * + * CPU_BOOT_DEV_STS0_DRAM_INIT_EN DRAM initialization is enabled. + * DRAM initialization has been done in FW. + * Initialized in: u-boot + * + * CPU_BOOT_DEV_STS0_BMC_WAIT_EN Waiting for BMC data enabled. + * If set, it means that during boot, + * FW waited for BMC data. + * Initialized in: u-boot + * + * CPU_BOOT_DEV_STS0_E2E_CRED_EN E2E credits initialized. + * FW initialized E2E credits. + * Initialized in: u-boot + * + * CPU_BOOT_DEV_STS0_HBM_CRED_EN HBM credits initialized. + * FW initialized HBM credits. + * Initialized in: u-boot + * + * CPU_BOOT_DEV_STS0_RL_EN Rate limiter initialized. + * FW initialized rate limiter. + * Initialized in: u-boot + * + * CPU_BOOT_DEV_STS0_SRAM_SCR_EN SRAM scrambler enabled. + * FW initialized SRAM scrambler. + * Initialized in: linux + * + * CPU_BOOT_DEV_STS0_DRAM_SCR_EN DRAM scrambler enabled. + * FW initialized DRAM scrambler. + * Initialized in: u-boot + * + * CPU_BOOT_DEV_STS0_FW_HARD_RST_EN FW hard reset procedure is enabled. + * FW has the hard reset procedure + * implemented. This means that FW will + * perform hard reset procedure on + * receiving the halt-machine event. + * Initialized in: preboot, u-boot, linux + * + * CPU_BOOT_DEV_STS0_PLL_INFO_EN FW retrieval of PLL info is enabled. + * Initialized in: linux + * + * CPU_BOOT_DEV_STS0_SP_SRAM_EN SP SRAM is initialized and available + * for use. + * Initialized in: preboot + * + * CPU_BOOT_DEV_STS0_CLK_GATE_EN Clock Gating enabled. + * FW initialized Clock Gating. + * Initialized in: preboot + * + * CPU_BOOT_DEV_STS0_HBM_ECC_EN HBM ECC handling Enabled. + * FW handles HBM ECC indications. + * Initialized in: linux + * + * CPU_BOOT_DEV_STS0_PKT_PI_ACK_EN Packets ack value used in the armcpd + * is set to the PI counter. + * Initialized in: linux + * + * CPU_BOOT_DEV_STS0_FW_LD_COM_EN Flexible FW loading communication + * protocol is enabled. + * Initialized in: preboot + * + * CPU_BOOT_DEV_STS0_FW_IATU_CONF_EN FW iATU configuration is enabled. + * This bit if set, means the iATU has been + * configured and is ready for use. + * Initialized in: ppboot + * + * CPU_BOOT_DEV_STS0_FW_NIC_MAC_EN NIC MAC channels init is done by FW and + * any access to them is done via the FW. + * Initialized in: linux + * + * CPU_BOOT_DEV_STS0_DYN_PLL_EN Dynamic PLL configuration is enabled. + * FW sends to host a bitmap of supported + * PLLs. + * Initialized in: linux + * + * CPU_BOOT_DEV_STS0_GIC_PRIVILEGED_EN GIC access permission only from + * previleged entity. FW sets this status + * bit for host. If this bit is set then + * GIC can not be accessed from host. + * Initialized in: linux + * + * CPU_BOOT_DEV_STS0_EQ_INDEX_EN Event Queue (EQ) index is a running + * index for each new event sent to host. + * This is used as a method in host to + * identify that the waiting event in + * queue is actually a new event which + * was not served before. + * Initialized in: linux + * + * CPU_BOOT_DEV_STS0_MULTI_IRQ_POLL_EN Use multiple scratchpad interfaces to + * prevent IRQs overriding each other. + * Initialized in: linux + * + * CPU_BOOT_DEV_STS0_FW_NIC_STAT_XPCS91_EN + * NIC STAT and XPCS91 access is restricted + * and is done via FW only. + * Initialized in: linux + * + * CPU_BOOT_DEV_STS0_FW_NIC_STAT_EXT_EN + * NIC STAT get all is supported. + * Initialized in: linux + * + * CPU_BOOT_DEV_STS0_IS_IDLE_CHECK_EN + * F/W checks if the device is idle by reading defined set + * of registers. It returns a bitmask of all the engines, + * where a bit is set if the engine is not idle. + * Initialized in: linux + * + * CPU_BOOT_DEV_STS0_MAP_HWMON_EN + * If set, means f/w supports proprietary + * HWMON enum mapping to cpucp enums. + * Initialized in: linux + * + * CPU_BOOT_DEV_STS0_ENABLED Device status register enabled. + * This is a main indication that the + * running FW populates the device status + * register. Meaning the device status + * bits are not garbage, but actual + * statuses. + * Initialized in: preboot + * + */ +#define CPU_BOOT_DEV_STS0_SECURITY_EN (1 << CPU_BOOT_DEV_STS_SECURITY_EN) +#define CPU_BOOT_DEV_STS0_DEBUG_EN (1 << CPU_BOOT_DEV_STS_DEBUG_EN) +#define CPU_BOOT_DEV_STS0_WATCHDOG_EN (1 << CPU_BOOT_DEV_STS_WATCHDOG_EN) +#define CPU_BOOT_DEV_STS0_DRAM_INIT_EN (1 << CPU_BOOT_DEV_STS_DRAM_INIT_EN) +#define CPU_BOOT_DEV_STS0_BMC_WAIT_EN (1 << CPU_BOOT_DEV_STS_BMC_WAIT_EN) +#define CPU_BOOT_DEV_STS0_E2E_CRED_EN (1 << CPU_BOOT_DEV_STS_E2E_CRED_EN) +#define CPU_BOOT_DEV_STS0_HBM_CRED_EN (1 << CPU_BOOT_DEV_STS_HBM_CRED_EN) +#define CPU_BOOT_DEV_STS0_RL_EN (1 << CPU_BOOT_DEV_STS_RL_EN) +#define CPU_BOOT_DEV_STS0_SRAM_SCR_EN (1 << CPU_BOOT_DEV_STS_SRAM_SCR_EN) +#define CPU_BOOT_DEV_STS0_DRAM_SCR_EN (1 << CPU_BOOT_DEV_STS_DRAM_SCR_EN) +#define CPU_BOOT_DEV_STS0_FW_HARD_RST_EN (1 << CPU_BOOT_DEV_STS_FW_HARD_RST_EN) +#define CPU_BOOT_DEV_STS0_PLL_INFO_EN (1 << CPU_BOOT_DEV_STS_PLL_INFO_EN) +#define CPU_BOOT_DEV_STS0_SP_SRAM_EN (1 << CPU_BOOT_DEV_STS_SP_SRAM_EN) +#define CPU_BOOT_DEV_STS0_CLK_GATE_EN (1 << CPU_BOOT_DEV_STS_CLK_GATE_EN) +#define CPU_BOOT_DEV_STS0_HBM_ECC_EN (1 << CPU_BOOT_DEV_STS_HBM_ECC_EN) +#define CPU_BOOT_DEV_STS0_PKT_PI_ACK_EN (1 << CPU_BOOT_DEV_STS_PKT_PI_ACK_EN) +#define CPU_BOOT_DEV_STS0_FW_LD_COM_EN (1 << CPU_BOOT_DEV_STS_FW_LD_COM_EN) +#define CPU_BOOT_DEV_STS0_FW_IATU_CONF_EN (1 << CPU_BOOT_DEV_STS_FW_IATU_CONF_EN) +#define CPU_BOOT_DEV_STS0_FW_NIC_MAC_EN (1 << CPU_BOOT_DEV_STS_FW_NIC_MAC_EN) +#define CPU_BOOT_DEV_STS0_DYN_PLL_EN (1 << CPU_BOOT_DEV_STS_DYN_PLL_EN) +#define CPU_BOOT_DEV_STS0_GIC_PRIVILEGED_EN (1 << CPU_BOOT_DEV_STS_GIC_PRIVILEGED_EN) +#define CPU_BOOT_DEV_STS0_EQ_INDEX_EN (1 << CPU_BOOT_DEV_STS_EQ_INDEX_EN) +#define CPU_BOOT_DEV_STS0_MULTI_IRQ_POLL_EN (1 << CPU_BOOT_DEV_STS_MULTI_IRQ_POLL_EN) +#define CPU_BOOT_DEV_STS0_FW_NIC_STAT_XPCS91_EN (1 << CPU_BOOT_DEV_STS_FW_NIC_STAT_XPCS91_EN) +#define CPU_BOOT_DEV_STS0_FW_NIC_STAT_EXT_EN (1 << CPU_BOOT_DEV_STS_FW_NIC_STAT_EXT_EN) +#define CPU_BOOT_DEV_STS0_IS_IDLE_CHECK_EN (1 << CPU_BOOT_DEV_STS_IS_IDLE_CHECK_EN) +#define CPU_BOOT_DEV_STS0_MAP_HWMON_EN (1 << CPU_BOOT_DEV_STS_MAP_HWMON_EN) +#define CPU_BOOT_DEV_STS0_ENABLED (1 << CPU_BOOT_DEV_STS_ENABLED) +#define CPU_BOOT_DEV_STS1_ENABLED (1 << CPU_BOOT_DEV_STS_ENABLED) + +enum cpu_boot_status { + CPU_BOOT_STATUS_NA = 0, /* Default value after reset of chip */ + CPU_BOOT_STATUS_IN_WFE = 1, + CPU_BOOT_STATUS_DRAM_RDY = 2, + CPU_BOOT_STATUS_SRAM_AVAIL = 3, + CPU_BOOT_STATUS_IN_BTL = 4, /* BTL is H/W FSM */ + CPU_BOOT_STATUS_IN_PREBOOT = 5, + CPU_BOOT_STATUS_IN_SPL, /* deprecated - not reported */ + CPU_BOOT_STATUS_IN_UBOOT = 7, + CPU_BOOT_STATUS_DRAM_INIT_FAIL, /* deprecated - will be removed */ + CPU_BOOT_STATUS_FIT_CORRUPTED, /* deprecated - will be removed */ + /* U-Boot console prompt activated, commands are not processed */ + CPU_BOOT_STATUS_UBOOT_NOT_READY = 10, + /* Finished NICs init, reported after DRAM and NICs */ + CPU_BOOT_STATUS_NIC_FW_RDY = 11, + CPU_BOOT_STATUS_TS_INIT_FAIL, /* deprecated - will be removed */ + CPU_BOOT_STATUS_DRAM_SKIPPED, /* deprecated - will be removed */ + CPU_BOOT_STATUS_BMC_WAITING_SKIPPED, /* deprecated - will be removed */ + /* Last boot loader progress status, ready to receive commands */ + CPU_BOOT_STATUS_READY_TO_BOOT = 15, + /* Internal Boot finished, ready for boot-fit */ + CPU_BOOT_STATUS_WAITING_FOR_BOOT_FIT = 16, + /* Internal Security has been initialized, device can be accessed */ + CPU_BOOT_STATUS_SECURITY_READY = 17, +}; + +enum kmd_msg { + KMD_MSG_NA = 0, + KMD_MSG_GOTO_WFE, + KMD_MSG_FIT_RDY, + KMD_MSG_SKIP_BMC, + RESERVED, + KMD_MSG_RST_DEV, + KMD_MSG_LAST +}; + +enum cpu_msg_status { + CPU_MSG_CLR = 0, + CPU_MSG_OK, + CPU_MSG_ERR, +}; + +/* communication registers mapping - consider ABI when changing */ +struct cpu_dyn_regs { + __le32 cpu_pq_base_addr_low; + __le32 cpu_pq_base_addr_high; + __le32 cpu_pq_length; + __le32 cpu_pq_init_status; + __le32 cpu_eq_base_addr_low; + __le32 cpu_eq_base_addr_high; + __le32 cpu_eq_length; + __le32 cpu_eq_ci; + __le32 cpu_cq_base_addr_low; + __le32 cpu_cq_base_addr_high; + __le32 cpu_cq_length; + __le32 cpu_pf_pq_pi; + __le32 cpu_boot_dev_sts0; + __le32 cpu_boot_dev_sts1; + __le32 cpu_boot_err0; + __le32 cpu_boot_err1; + __le32 cpu_boot_status; + __le32 fw_upd_sts; + __le32 fw_upd_cmd; + __le32 fw_upd_pending_sts; + __le32 fuse_ver_offset; + __le32 preboot_ver_offset; + __le32 uboot_ver_offset; + __le32 hw_state; + __le32 kmd_msg_to_cpu; + __le32 cpu_cmd_status_to_host; + __le32 gic_host_pi_upd_irq; + __le32 gic_tpc_qm_irq_ctrl; + __le32 gic_mme_qm_irq_ctrl; + __le32 gic_dma_qm_irq_ctrl; + __le32 gic_nic_qm_irq_ctrl; + __le32 gic_dma_core_irq_ctrl; + __le32 gic_host_halt_irq; + __le32 gic_host_ints_irq; + __le32 gic_host_soft_rst_irq; + __le32 gic_rot_qm_irq_ctrl; + __le32 cpu_rst_status; + __le32 eng_arc_irq_ctrl; + __le32 reserved1[20]; /* reserve for future use */ +}; + +/* TODO: remove the desc magic after the code is updated to use message */ +/* HCDM - Habana Communications Descriptor Magic */ +#define HL_COMMS_DESC_MAGIC 0x4843444D +#define HL_COMMS_DESC_VER 3 + +/* HCMv - Habana Communications Message + header version */ +#define HL_COMMS_MSG_MAGIC_VALUE 0x48434D00 +#define HL_COMMS_MSG_MAGIC_MASK 0xFFFFFF00 +#define HL_COMMS_MSG_MAGIC_VER_MASK 0xFF + +#define HL_COMMS_MSG_MAGIC_VER(ver) (HL_COMMS_MSG_MAGIC_VALUE | \ + ((ver) & HL_COMMS_MSG_MAGIC_VER_MASK)) +#define HL_COMMS_MSG_MAGIC_V0 HL_COMMS_DESC_MAGIC +#define HL_COMMS_MSG_MAGIC_V1 HL_COMMS_MSG_MAGIC_VER(1) +#define HL_COMMS_MSG_MAGIC_V2 HL_COMMS_MSG_MAGIC_VER(2) +#define HL_COMMS_MSG_MAGIC_V3 HL_COMMS_MSG_MAGIC_VER(3) + +#define HL_COMMS_MSG_MAGIC HL_COMMS_MSG_MAGIC_V3 + +#define HL_COMMS_MSG_MAGIC_VALIDATE_MAGIC(magic) \ + (((magic) & HL_COMMS_MSG_MAGIC_MASK) == \ + HL_COMMS_MSG_MAGIC_VALUE) + +#define HL_COMMS_MSG_MAGIC_VALIDATE_VERSION(magic, ver) \ + (((magic) & HL_COMMS_MSG_MAGIC_VER_MASK) >= \ + ((ver) & HL_COMMS_MSG_MAGIC_VER_MASK)) + +#define HL_COMMS_MSG_MAGIC_VALIDATE(magic, ver) \ + (HL_COMMS_MSG_MAGIC_VALIDATE_MAGIC((magic)) && \ + HL_COMMS_MSG_MAGIC_VALIDATE_VERSION((magic), (ver))) + +enum comms_msg_type { + HL_COMMS_DESC_TYPE = 0, + HL_COMMS_RESET_CAUSE_TYPE = 1, + HL_COMMS_FW_CFG_SKIP_TYPE = 2, + HL_COMMS_BINNING_CONF_TYPE = 3, +}; + +/* + * Binning information shared between LKD and FW + * @tpc_mask_l - TPC binning information lower 64 bit + * @dec_mask - Decoder binning information + * @dram_mask - DRAM binning information + * @edma_mask - EDMA binning information + * @mme_mask_l - MME binning information lower 32 + * @mme_mask_h - MME binning information upper 32 + * @rot_mask - Rotator binning information + * @xbar_mask - xBAR binning information + * @reserved - reserved field for future binning info w/o ABI change + * @tpc_mask_h - TPC binning information upper 64 bit + * @nic_mask - NIC binning information + */ +struct lkd_fw_binning_info { + __le64 tpc_mask_l; + __le32 dec_mask; + __le32 dram_mask; + __le32 edma_mask; + __le32 mme_mask_l; + __le32 mme_mask_h; + __le32 rot_mask; + __le32 xbar_mask; + __le32 reserved0; + __le64 tpc_mask_h; + __le64 nic_mask; + __le32 reserved1[8]; +}; + +/* TODO: remove this struct after the code is updated to use message */ +/* this is the comms descriptor header - meta data */ +struct comms_desc_header { + __le32 magic; /* magic for validation */ + __le32 crc32; /* CRC32 of the descriptor w/o header */ + __le16 size; /* size of the descriptor w/o header */ + __u8 version; /* descriptor version */ + __u8 reserved[5]; /* pad to 64 bit */ +}; + +/* this is the comms message header - meta data */ +struct comms_msg_header { + __le32 magic; /* magic for validation */ + __le32 crc32; /* CRC32 of the message w/o header */ + __le16 size; /* size of the message w/o header */ + __u8 version; /* message payload version */ + __u8 type; /* message type */ + __u8 reserved[4]; /* pad to 64 bit */ +}; + +enum lkd_fw_ascii_msg_lvls { + LKD_FW_ASCII_MSG_ERR = 0, + LKD_FW_ASCII_MSG_WRN = 1, + LKD_FW_ASCII_MSG_INF = 2, + LKD_FW_ASCII_MSG_DBG = 3, +}; + +#define LKD_FW_ASCII_MSG_MAX_LEN 128 +#define LKD_FW_ASCII_MSG_MAX 4 /* consider ABI when changing */ + +struct lkd_fw_ascii_msg { + __u8 valid; + __u8 msg_lvl; + __u8 reserved[6]; + char msg[LKD_FW_ASCII_MSG_MAX_LEN]; +}; + +/* this is the main FW descriptor - consider ABI when changing */ +struct lkd_fw_comms_desc { + struct comms_desc_header header; + struct cpu_dyn_regs cpu_dyn_regs; + char fuse_ver[VERSION_MAX_LEN]; + char cur_fw_ver[VERSION_MAX_LEN]; + /* can be used for 1 more version w/o ABI change */ + char reserved0[VERSION_MAX_LEN]; + __le64 img_addr; /* address for next FW component load */ + struct lkd_fw_binning_info binning_info; + struct lkd_fw_ascii_msg ascii_msg[LKD_FW_ASCII_MSG_MAX]; + __le32 rsvd_mem_size_mb; /* reserved memory size [MB] for FW/SVE */ + char reserved1[4]; +}; + +enum comms_reset_cause { + HL_RESET_CAUSE_UNKNOWN = 0, + HL_RESET_CAUSE_HEARTBEAT = 1, + HL_RESET_CAUSE_TDR = 2, +}; + +/* TODO: remove define after struct name is aligned on all projects */ +#define lkd_msg_comms lkd_fw_comms_msg + +/* this is the comms message descriptor */ +struct lkd_fw_comms_msg { + struct comms_msg_header header; + /* union for future expantions of new messages */ + union { + struct { + struct cpu_dyn_regs cpu_dyn_regs; + char fuse_ver[VERSION_MAX_LEN]; + char cur_fw_ver[VERSION_MAX_LEN]; + /* can be used for 1 more version w/o ABI change */ + char reserved0[VERSION_MAX_LEN]; + /* address for next FW component load */ + __le64 img_addr; + struct lkd_fw_binning_info binning_info; + struct lkd_fw_ascii_msg ascii_msg[LKD_FW_ASCII_MSG_MAX]; + /* reserved memory size [MB] for FW/SVE */ + __le32 rsvd_mem_size_mb; + char reserved1[4]; + }; + struct { + __u8 reset_cause; + }; + struct { + __u8 fw_cfg_skip; /* 1 - skip, 0 - don't skip */ + }; + struct lkd_fw_binning_info binning_conf; + }; +}; + +/* + * LKD commands: + * + * COMMS_NOOP Used to clear the command register and no actual + * command is send. + * + * COMMS_CLR_STS Clear status command - FW should clear the + * status register. Used for synchronization + * between the commands as part of the race free + * protocol. + * + * COMMS_RST_STATE Reset the current communication state which is + * kept by FW for proper responses. + * Should be used in the beginning of the + * communication cycle to clean any leftovers from + * previous communication attempts. + * + * COMMS_PREP_DESC Prepare descriptor for setting up the + * communication and other dynamic data: + * struct lkd_fw_comms_desc. + * This command has a parameter stating the next FW + * component size, so the FW can actually prepare a + * space for it and in the status response provide + * the descriptor offset. The Offset of the next FW + * data component is a part of the descriptor + * structure. + * + * COMMS_DATA_RDY The FW data has been uploaded and is ready for + * validation. + * + * COMMS_EXEC Execute the next FW component. + * + * COMMS_RST_DEV Reset the device. + * + * COMMS_GOTO_WFE Execute WFE command. Allowed only on non-secure + * devices. + * + * COMMS_SKIP_BMC Perform actions required for BMC-less servers. + * Do not wait for BMC response. + * + * COMMS_PREP_DESC_ELBI Same as COMMS_PREP_DESC only that the memory + * space is allocated in a ELBI access only + * address range. + * + */ +enum comms_cmd { + COMMS_NOOP = 0, + COMMS_CLR_STS = 1, + COMMS_RST_STATE = 2, + COMMS_PREP_DESC = 3, + COMMS_DATA_RDY = 4, + COMMS_EXEC = 5, + COMMS_RST_DEV = 6, + COMMS_GOTO_WFE = 7, + COMMS_SKIP_BMC = 8, + COMMS_PREP_DESC_ELBI = 10, + COMMS_INVLD_LAST +}; + +#define COMMS_COMMAND_SIZE_SHIFT 0 +#define COMMS_COMMAND_SIZE_MASK 0x1FFFFFF +#define COMMS_COMMAND_CMD_SHIFT 27 +#define COMMS_COMMAND_CMD_MASK 0xF8000000 + +/* + * LKD command to FW register structure + * @size - FW component size + * @cmd - command from enum comms_cmd + */ +struct comms_command { + union { /* bit fields are only for FW use */ + struct { + u32 size :25; /* 32MB max. */ + u32 reserved :2; + enum comms_cmd cmd :5; /* 32 commands */ + }; + __le32 val; + }; +}; + +/* + * FW status + * + * COMMS_STS_NOOP Used to clear the status register and no actual + * status is provided. + * + * COMMS_STS_ACK Command has been received and recognized. + * + * COMMS_STS_OK Command execution has finished successfully. + * + * COMMS_STS_ERR Command execution was unsuccessful and resulted + * in error. + * + * COMMS_STS_VALID_ERR FW validation has failed. + * + * COMMS_STS_TIMEOUT_ERR Command execution has timed out. + */ +enum comms_sts { + COMMS_STS_NOOP = 0, + COMMS_STS_ACK = 1, + COMMS_STS_OK = 2, + COMMS_STS_ERR = 3, + COMMS_STS_VALID_ERR = 4, + COMMS_STS_TIMEOUT_ERR = 5, + COMMS_STS_INVLD_LAST +}; + +/* RAM types for FW components loading - defines the base address */ +enum comms_ram_types { + COMMS_SRAM = 0, + COMMS_DRAM = 1, +}; + +#define COMMS_STATUS_OFFSET_SHIFT 0 +#define COMMS_STATUS_OFFSET_MASK 0x03FFFFFF +#define COMMS_STATUS_OFFSET_ALIGN_SHIFT 2 +#define COMMS_STATUS_RAM_TYPE_SHIFT 26 +#define COMMS_STATUS_RAM_TYPE_MASK 0x0C000000 +#define COMMS_STATUS_STATUS_SHIFT 28 +#define COMMS_STATUS_STATUS_MASK 0xF0000000 + +/* + * FW status to LKD register structure + * @offset - an offset from the base of the ram_type shifted right by + * 2 bits (always aligned to 32 bits). + * Allows a maximum addressable offset of 256MB from RAM base. + * Example: for real offset in RAM of 0x800000 (8MB), the value + * in offset field is (0x800000 >> 2) = 0x200000. + * @ram_type - the RAM type that should be used for offset from + * enum comms_ram_types + * @status - status from enum comms_sts + */ +struct comms_status { + union { /* bit fields are only for FW use */ + struct { + u32 offset :26; + enum comms_ram_types ram_type :2; + enum comms_sts status :4; /* 16 statuses */ + }; + __le32 val; + }; +}; + +#define NAME_MAX_LEN 32 /* bytes */ +struct hl_module_data { + __u8 name[NAME_MAX_LEN]; + __u8 version[VERSION_MAX_LEN]; +}; + +/** + * struct hl_component_versions - versions associated with hl component. + * @struct_size: size of all the struct (including dynamic size of modules). + * @modules_offset: offset of the modules field in this struct. + * @component: version of the component itself. + * @fw_os: Firmware OS Version. + * @comp_name: Name of the component. + * @modules_counter: number of set bits in modules_mask. + * @reserved: reserved for future use. + * @modules: versions of the component's modules. Elborated explanation in + * struct cpucp_versions. + */ +struct hl_component_versions { + __le16 struct_size; + __le16 modules_offset; + __u8 component[VERSION_MAX_LEN]; + __u8 fw_os[VERSION_MAX_LEN]; + __u8 comp_name[NAME_MAX_LEN]; + __u8 modules_counter; + __u8 reserved[3]; + struct hl_module_data modules[]; +}; + +/* Max size of fit size */ +#define HL_FW_VERSIONS_FIT_SIZE 4096 + +#endif /* HL_BOOT_IF_H */ -- cgit v1.2.3 From 7c4130e6ddd709be2033a6635c91d445cb2baea5 Mon Sep 17 00:00:00 2001 From: farah kassabri Date: Tue, 8 Aug 2023 12:56:47 +0300 Subject: accel/habanalabs/gaudi2: handle eq health heartbeat check Add mechanism for fw eq health check. this will be done using two flows: using the heartbeat mechanism and raising a dedicated interrupt to indicate an eq failure like EQ full. This patch will add implementation for the eq heartbeat for gaudi2 asic. More info about the heartbeat mechanism: Expand the heartbeat mechanism to monitor a new event that will be sent from FW upon receiving heartbeat message. that way driver can know that the eq is working or not. Signed-off-by: farah kassabri Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/accel/habanalabs/common/device.c | 37 ++++++++++++++++++++-- drivers/accel/habanalabs/common/habanalabs.h | 2 ++ drivers/accel/habanalabs/gaudi2/gaudi2.c | 10 ++++++ .../include/gaudi2/gaudi2_async_ids_map_extended.h | 14 ++++---- include/linux/habanalabs/cpucp_if.h | 14 +++++++- 5 files changed, 68 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/drivers/accel/habanalabs/common/device.c b/drivers/accel/habanalabs/common/device.c index e217ee6d1768..1d1ccd8d5c75 100644 --- a/drivers/accel/habanalabs/common/device.c +++ b/drivers/accel/habanalabs/common/device.c @@ -989,6 +989,25 @@ static bool is_pci_link_healthy(struct hl_device *hdev) return (vendor_id == PCI_VENDOR_ID_HABANALABS); } +static void hl_device_eq_heartbeat(struct hl_device *hdev) +{ + u64 event_mask = HL_NOTIFIER_EVENT_DEVICE_RESET | HL_NOTIFIER_EVENT_DEVICE_UNAVAILABLE; + struct asic_fixed_properties *prop = &hdev->asic_prop; + + /* + * This feature supported in FW version 1.12.0 45.2.0 and above, + * only on those FW versions eq_health_check_supported will be set. + * Start checking eq health only after driver has enabled events from FW. + */ + if (!prop->cpucp_info.eq_health_check_supported || !hdev->init_done) + return; + + if (hdev->eq_heartbeat_received) + hdev->eq_heartbeat_received = false; + else + hl_device_cond_reset(hdev, HL_DRV_RESET_HARD, event_mask); +} + static void hl_device_heartbeat(struct work_struct *work) { struct hl_device *hdev = container_of(work, struct hl_device, @@ -999,6 +1018,12 @@ static void hl_device_heartbeat(struct work_struct *work) if (!hl_device_operational(hdev, NULL)) goto reschedule; + /* + * For EQ health check need to check if driver received the heartbeat eq event + * in order to validate the eq is working. + */ + hl_device_eq_heartbeat(hdev); + if (!hdev->asic_funcs->send_heartbeat(hdev)) goto reschedule; @@ -1055,7 +1080,15 @@ static int device_late_init(struct hl_device *hdev) hdev->high_pll = hdev->asic_prop.high_pll; if (hdev->heartbeat) { + /* + * Before scheduling the heartbeat driver will check if eq event has received. + * for the first schedule we need to set the indication as true then for the next + * one this indication will be true only if eq event was sent by FW. + */ + hdev->eq_heartbeat_received = true; + INIT_DELAYED_WORK(&hdev->work_heartbeat, hl_device_heartbeat); + schedule_delayed_work(&hdev->work_heartbeat, usecs_to_jiffies(HL_HEARTBEAT_PER_USEC)); } @@ -2235,8 +2268,6 @@ int hl_device_init(struct hl_device *hdev) "Successfully added device %s to habanalabs driver\n", dev_name(&(hdev)->pdev->dev)); - hdev->init_done = true; - /* After initialization is done, we are ready to receive events from * the F/W. We can't do it before because we will ignore events and if * those events are fatal, we won't know about it and the device will @@ -2244,6 +2275,8 @@ int hl_device_init(struct hl_device *hdev) */ hdev->asic_funcs->enable_events_from_fw(hdev); + hdev->init_done = true; + return 0; cb_pool_fini: diff --git a/drivers/accel/habanalabs/common/habanalabs.h b/drivers/accel/habanalabs/common/habanalabs.h index f8c597903cac..e5b416852996 100644 --- a/drivers/accel/habanalabs/common/habanalabs.h +++ b/drivers/accel/habanalabs/common/habanalabs.h @@ -3314,6 +3314,7 @@ struct hl_reset_info { * device. * @supports_ctx_switch: true if a ctx switch is required upon first submission. * @support_preboot_binning: true if we support read binning info from preboot. + * @eq_heartbeat_received: indication that eq heartbeat event has received from FW. * @nic_ports_mask: Controls which NIC ports are enabled. Used only for testing. * @fw_components: Controls which f/w components to load to the device. There are multiple f/w * stages and sometimes we want to stop at a certain stage. Used only for testing. @@ -3474,6 +3475,7 @@ struct hl_device { u8 reset_upon_device_release; u8 supports_ctx_switch; u8 support_preboot_binning; + u8 eq_heartbeat_received; /* Parameters for bring-up to be upstreamed */ u64 nic_ports_mask; diff --git a/drivers/accel/habanalabs/gaudi2/gaudi2.c b/drivers/accel/habanalabs/gaudi2/gaudi2.c index 677900e18519..e507847bf460 100644 --- a/drivers/accel/habanalabs/gaudi2/gaudi2.c +++ b/drivers/accel/habanalabs/gaudi2/gaudi2.c @@ -7804,6 +7804,7 @@ static inline bool is_info_event(u32 event) * an indication to an error. */ case GAUDI2_EVENT_CPU0_STATUS_NIC0_ENG0 ... GAUDI2_EVENT_CPU11_STATUS_NIC11_ENG1: + case GAUDI2_EVENT_ARC_EQ_HEARTBEAT: return true; default: return false; @@ -9765,6 +9766,11 @@ static u16 event_id_to_engine_id(struct hl_device *hdev, u16 event_type) return U16_MAX; } +static void hl_eq_heartbeat_event_handle(struct hl_device *hdev) +{ + hdev->eq_heartbeat_received = true; +} + static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry) { struct gaudi2_device *gaudi2 = hdev->asic_specific; @@ -10190,6 +10196,10 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent gaudi2_irq_map_table[event_type].name); break; + case GAUDI2_EVENT_ARC_EQ_HEARTBEAT: + hl_eq_heartbeat_event_handle(hdev); + error_count = GAUDI2_NA_EVENT_CAUSE; + break; default: if (gaudi2_irq_map_table[event_type].valid) { dev_err_ratelimited(hdev->dev, "Cannot find handler for event %d\n", diff --git a/drivers/accel/habanalabs/include/gaudi2/gaudi2_async_ids_map_extended.h b/drivers/accel/habanalabs/include/gaudi2/gaudi2_async_ids_map_extended.h index 6cb0f615fc3e..57e661771b6c 100644 --- a/drivers/accel/habanalabs/include/gaudi2/gaudi2_async_ids_map_extended.h +++ b/drivers/accel/habanalabs/include/gaudi2/gaudi2_async_ids_map_extended.h @@ -2674,17 +2674,19 @@ static struct gaudi2_async_events_ids_map gaudi2_irq_map_table[] = { { .fc_id = 1321, .cpu_id = 627, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_HARD, .name = "DEV_RESET_REQ" }, { .fc_id = 1322, .cpu_id = 628, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_NONE, - .name = "PWR_BRK_ENTRY" }, + .name = "ARC_PWR_BRK_ENTRY" }, { .fc_id = 1323, .cpu_id = 629, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_NONE, - .name = "PWR_BRK_EXT" }, + .name = "ARC_PWR_BRK_EXT" }, { .fc_id = 1324, .cpu_id = 630, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_NONE, - .name = "PWR_RD_MODE0" }, + .name = "ARC_PWR_RD_MODE0" }, { .fc_id = 1325, .cpu_id = 631, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_NONE, - .name = "PWR_RD_MODE1" }, + .name = "ARC_PWR_RD_MODE1" }, { .fc_id = 1326, .cpu_id = 632, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_NONE, - .name = "PWR_RD_MODE2" }, + .name = "ARC_PWR_RD_MODE2" }, { .fc_id = 1327, .cpu_id = 633, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_NONE, - .name = "PWR_RD_MODE3" }, + .name = "ARC_PWR_RD_MODE3" }, + { .fc_id = 1328, .cpu_id = 634, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_NONE, + .name = "ARC_EQ_HEARTBEAT" }, }; #endif /* __GAUDI2_ASYNC_IDS_MAP_EVENTS_EXT_H_ */ diff --git a/include/linux/habanalabs/cpucp_if.h b/include/linux/habanalabs/cpucp_if.h index 4cdedb603ecb..a18fa81aad1f 100644 --- a/include/linux/habanalabs/cpucp_if.h +++ b/include/linux/habanalabs/cpucp_if.h @@ -33,6 +33,17 @@ #define PLL_MAP_MAX_BITS 128 #define PLL_MAP_LEN (PLL_MAP_MAX_BITS / 8) +enum eq_event_id { + EQ_EVENT_NIC_STS_REQUEST = 0, + EQ_EVENT_PWR_MODE_0, + EQ_EVENT_PWR_MODE_1, + EQ_EVENT_PWR_MODE_2, + EQ_EVENT_PWR_MODE_3, + EQ_EVENT_PWR_BRK_ENTRY, + EQ_EVENT_PWR_BRK_EXIT, + EQ_EVENT_HEARTBEAT, +}; + /* * info of the pkt queue pointers in the first async occurrence */ @@ -1143,6 +1154,7 @@ struct cpucp_security_info { * (0 = functional 1 = binned) * @interposer_version: Interposer version programmed in eFuse * @substrate_version: Substrate version programmed in eFuse + * @eq_health_check_supported: eq health check feature supported in FW. * @fw_hbm_region_size: Size in bytes of FW reserved region in HBM. * @fw_os_version: Firmware OS Version */ @@ -1169,7 +1181,7 @@ struct cpucp_info { __u8 xbar_binning_mask; __u8 interposer_version; __u8 substrate_version; - __u8 reserved2; + __u8 eq_health_check_supported; struct cpucp_security_info sec_info; __le32 fw_hbm_region_size; __u8 pll_map[PLL_MAP_LEN]; -- cgit v1.2.3 From 764bfd138f359423b299b7bf3fcbabb56b981ef5 Mon Sep 17 00:00:00 2001 From: farah kassabri Date: Wed, 23 Aug 2023 12:36:25 +0300 Subject: accel/habanalabs/gaudi2: add eq health check using irq This is the second patch for applying the eq health check mechanism which will add support for the interrupt flow for gaudi2 asic. More info about the interrupt mechanism: set a dedicated msix for the eq error interrupt, and add interrupt handler for it. when FW detects some issue with EQ like EQ_FULL, it'll raise that interrupt and driver should reset the device. Driver will inform the FW which msix index to use through the already existing handshake mechanism which will send msix info message to fw. Signed-off-by: farah kassabri Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/accel/habanalabs/common/habanalabs.h | 1 + drivers/accel/habanalabs/common/irq.c | 12 ++++++++++++ drivers/accel/habanalabs/gaudi2/gaudi2.c | 16 ++++++++++++++++ drivers/accel/habanalabs/gaudi2/gaudi2P.h | 1 + include/linux/habanalabs/cpucp_if.h | 1 + 5 files changed, 31 insertions(+) (limited to 'include/linux') diff --git a/drivers/accel/habanalabs/common/habanalabs.h b/drivers/accel/habanalabs/common/habanalabs.h index e5b416852996..6f2cbd3c2e95 100644 --- a/drivers/accel/habanalabs/common/habanalabs.h +++ b/drivers/accel/habanalabs/common/habanalabs.h @@ -3689,6 +3689,7 @@ irqreturn_t hl_irq_handler_eq(int irq, void *arg); irqreturn_t hl_irq_handler_dec_abnrm(int irq, void *arg); irqreturn_t hl_irq_handler_user_interrupt(int irq, void *arg); irqreturn_t hl_irq_user_interrupt_thread_handler(int irq, void *arg); +irqreturn_t hl_irq_eq_error_interrupt_thread_handler(int irq, void *arg); u32 hl_cq_inc_ptr(u32 ptr); int hl_asid_init(struct hl_device *hdev); diff --git a/drivers/accel/habanalabs/common/irq.c b/drivers/accel/habanalabs/common/irq.c index 10ac100bf9e2..f6b6c54bc868 100644 --- a/drivers/accel/habanalabs/common/irq.c +++ b/drivers/accel/habanalabs/common/irq.c @@ -401,6 +401,18 @@ irqreturn_t hl_irq_user_interrupt_thread_handler(int irq, void *arg) return IRQ_HANDLED; } +irqreturn_t hl_irq_eq_error_interrupt_thread_handler(int irq, void *arg) +{ + u64 event_mask = HL_NOTIFIER_EVENT_DEVICE_RESET | HL_NOTIFIER_EVENT_DEVICE_UNAVAILABLE; + struct hl_device *hdev = arg; + + dev_err(hdev->dev, "EQ error interrupt received\n"); + + hl_device_cond_reset(hdev, HL_DRV_RESET_HARD, event_mask); + + return IRQ_HANDLED; +} + /** * hl_irq_handler_eq - irq handler for event queue * diff --git a/drivers/accel/habanalabs/gaudi2/gaudi2.c b/drivers/accel/habanalabs/gaudi2/gaudi2.c index e507847bf460..b0ba62b691ec 100644 --- a/drivers/accel/habanalabs/gaudi2/gaudi2.c +++ b/drivers/accel/habanalabs/gaudi2/gaudi2.c @@ -4175,6 +4175,8 @@ static const char *gaudi2_irq_name(u16 irq_number) return "gaudi2 unexpected error"; case GAUDI2_IRQ_NUM_USER_FIRST ... GAUDI2_IRQ_NUM_USER_LAST: return "gaudi2 user completion"; + case GAUDI2_IRQ_NUM_EQ_ERROR: + return "gaudi2 eq error"; default: return "invalid"; } @@ -4317,6 +4319,15 @@ static int gaudi2_enable_msix(struct hl_device *hdev) } } + irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EQ_ERROR); + rc = request_threaded_irq(irq, NULL, hl_irq_eq_error_interrupt_thread_handler, + IRQF_ONESHOT, gaudi2_irq_name(GAUDI2_IRQ_NUM_EQ_ERROR), + hdev); + if (rc) { + dev_err(hdev->dev, "Failed to request IRQ %d", irq); + goto free_user_irq; + } + gaudi2->hw_cap_initialized |= HW_CAP_MSIX; return 0; @@ -4376,6 +4387,7 @@ static void gaudi2_sync_irqs(struct hl_device *hdev) } synchronize_irq(pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EVENT_QUEUE)); + synchronize_irq(pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EQ_ERROR)); } static void gaudi2_disable_msix(struct hl_device *hdev) @@ -4412,6 +4424,9 @@ static void gaudi2_disable_msix(struct hl_device *hdev) cq = &hdev->completion_queue[GAUDI2_RESERVED_CQ_CS_COMPLETION]; free_irq(irq, cq); + irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EQ_ERROR); + free_irq(irq, hdev); + pci_free_irq_vectors(hdev->pdev); gaudi2->hw_cap_initialized &= ~HW_CAP_MSIX; @@ -11345,6 +11360,7 @@ static int gaudi2_ack_mmu_page_fault_or_access_error(struct hl_device *hdev, u64 static void gaudi2_get_msi_info(__le32 *table) { table[CPUCP_EVENT_QUEUE_MSI_TYPE] = cpu_to_le32(GAUDI2_EVENT_QUEUE_MSIX_IDX); + table[CPUCP_EVENT_QUEUE_ERR_MSI_TYPE] = cpu_to_le32(GAUDI2_IRQ_NUM_EQ_ERROR); } static int gaudi2_map_pll_idx_to_fw_idx(u32 pll_idx) diff --git a/drivers/accel/habanalabs/gaudi2/gaudi2P.h b/drivers/accel/habanalabs/gaudi2/gaudi2P.h index 4535aa5ab561..14e281fd9895 100644 --- a/drivers/accel/habanalabs/gaudi2/gaudi2P.h +++ b/drivers/accel/habanalabs/gaudi2/gaudi2P.h @@ -419,6 +419,7 @@ enum gaudi2_irq_num { GAUDI2_IRQ_NUM_NIC_PORT_FIRST, GAUDI2_IRQ_NUM_NIC_PORT_LAST = (GAUDI2_IRQ_NUM_NIC_PORT_FIRST + NIC_NUMBER_OF_PORTS - 1), GAUDI2_IRQ_NUM_TPC_ASSERT, + GAUDI2_IRQ_NUM_EQ_ERROR, GAUDI2_IRQ_NUM_RESERVED_FIRST, GAUDI2_IRQ_NUM_RESERVED_LAST = (GAUDI2_MSIX_ENTRIES - GAUDI2_TOTAL_USER_INTERRUPTS - 1), GAUDI2_IRQ_NUM_UNEXPECTED_ERROR = RESERVED_MSIX_UNEXPECTED_USER_ERROR_INTERRUPT, diff --git a/include/linux/habanalabs/cpucp_if.h b/include/linux/habanalabs/cpucp_if.h index a18fa81aad1f..84d74c4ee4d3 100644 --- a/include/linux/habanalabs/cpucp_if.h +++ b/include/linux/habanalabs/cpucp_if.h @@ -1004,6 +1004,7 @@ enum cpucp_msi_type { CPUCP_NIC_PORT5_MSI_TYPE, CPUCP_NIC_PORT7_MSI_TYPE, CPUCP_NIC_PORT9_MSI_TYPE, + CPUCP_EVENT_QUEUE_ERR_MSI_TYPE, CPUCP_NUM_OF_MSI_TYPES }; -- cgit v1.2.3 From 9dca13141332e69fd657873194e77a1960fc9ab2 Mon Sep 17 00:00:00 2001 From: Dafna Hirschfeld Date: Thu, 7 Sep 2023 14:43:01 +0300 Subject: accel/habanalabs: add fw status SHUTDOWN_PREP update hl_boot_if.h from specs to include CPU_BOOT_STATUS_FW_SHUTDOWN_PREP Signed-off-by: Dafna Hirschfeld Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- include/linux/habanalabs/hl_boot_if.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/habanalabs/hl_boot_if.h b/include/linux/habanalabs/hl_boot_if.h index 7de8a5786a36..93366d5621fd 100644 --- a/include/linux/habanalabs/hl_boot_if.h +++ b/include/linux/habanalabs/hl_boot_if.h @@ -394,6 +394,8 @@ enum cpu_boot_status { CPU_BOOT_STATUS_WAITING_FOR_BOOT_FIT = 16, /* Internal Security has been initialized, device can be accessed */ CPU_BOOT_STATUS_SECURITY_READY = 17, + /* FW component is preparing to shutdown and communication with host is not available */ + CPU_BOOT_STATUS_FW_SHUTDOWN_PREP = 18, }; enum kmd_msg { -- cgit v1.2.3 From 7f1cd6fdd5872160da05098a84a94dee5e709e54 Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Thu, 21 Sep 2023 14:52:01 +0300 Subject: accel/habanalabs: minor cosmetics update to cpucp_if.h - Update copyright years - Align comments Signed-off-by: Oded Gabbay Reviewed-by: Ofir Bitton --- include/linux/habanalabs/cpucp_if.h | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/habanalabs/cpucp_if.h b/include/linux/habanalabs/cpucp_if.h index 84d74c4ee4d3..86ea7c63a0d2 100644 --- a/include/linux/habanalabs/cpucp_if.h +++ b/include/linux/habanalabs/cpucp_if.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 * - * Copyright 2020-2022 HabanaLabs, Ltd. + * Copyright 2020-2023 HabanaLabs, Ltd. * All Rights Reserved. * */ @@ -668,18 +668,15 @@ enum pq_init_status { * Obsolete. * * CPUCP_PACKET_GENERIC_PASSTHROUGH - - * Generic opcode for all firmware info that is only passed to host - * through the LKD, without getting parsed there. + * Generic opcode for all firmware info that is only passed to host + * through the LKD, without getting parsed there. * * CPUCP_PACKET_ACTIVE_STATUS_SET - * LKD sends FW indication whether device is free or in use, this indication is reported * also to the BMC. * - * CPUCP_PACKET_REGISTER_INTERRUPTS - - * Packet to register interrupts indicating LKD is ready to receive events from FW. - * * CPUCP_PACKET_SOFT_RESET - - * Packet to perform soft-reset. + * Packet to perform soft-reset. * * CPUCP_PACKET_INTS_REGISTER - * Packet to inform FW that queues have been established and LKD is ready to receive @@ -750,9 +747,9 @@ enum cpucp_packet_id { CPUCP_PACKET_RESERVED11, /* not used */ CPUCP_PACKET_RESERVED12, /* internal */ CPUCP_PACKET_RESERVED13, /* internal */ - CPUCP_PACKET_SOFT_RESET, /* internal */ - CPUCP_PACKET_INTS_REGISTER, /* internal */ - CPUCP_PACKET_ID_MAX /* must be last */ + CPUCP_PACKET_SOFT_RESET, /* internal */ + CPUCP_PACKET_INTS_REGISTER, /* internal */ + CPUCP_PACKET_ID_MAX /* must be last */ }; #define CPUCP_PACKET_FENCE_VAL 0xFE8CE7A5 -- cgit v1.2.3 From 74975b4f2836e3cc10eeb6c38a6da54311e1de5b Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Tue, 26 Sep 2023 16:59:43 +0200 Subject: gpio: acpi: remove acpi_get_and_request_gpiod() With no more users, we can remove acpi_get_and_request_gpiod(). Signed-off-by: Bartosz Golaszewski Reviewed-by: From: Andy Shevchenko Reviewed-by: Mika Westerberg --- drivers/gpio/gpiolib-acpi.c | 28 ---------------------------- include/linux/gpio/consumer.h | 8 -------- 2 files changed, 36 deletions(-) (limited to 'include/linux') diff --git a/drivers/gpio/gpiolib-acpi.c b/drivers/gpio/gpiolib-acpi.c index 2ad21f34ee62..25a5f07c8755 100644 --- a/drivers/gpio/gpiolib-acpi.c +++ b/drivers/gpio/gpiolib-acpi.c @@ -162,34 +162,6 @@ static struct gpio_desc *acpi_get_gpiod(char *path, unsigned int pin) return gpio_device_get_desc(gdev, pin); } -/** - * acpi_get_and_request_gpiod - Translate ACPI GPIO pin to GPIO descriptor and - * hold a refcount to the GPIO device. - * @path: ACPI GPIO controller full path name, (e.g. "\\_SB.GPO1") - * @pin: ACPI GPIO pin number (0-based, controller-relative) - * @label: Label to pass to gpiod_request() - * - * This function is a simple pass-through to acpi_get_gpiod(), except that - * as it is intended for use outside of the GPIO layer (in a similar fashion to - * gpiod_get_index() for example) it also holds a reference to the GPIO device. - */ -struct gpio_desc *acpi_get_and_request_gpiod(char *path, unsigned int pin, char *label) -{ - struct gpio_desc *gpio; - int ret; - - gpio = acpi_get_gpiod(path, pin); - if (IS_ERR(gpio)) - return gpio; - - ret = gpiod_request(gpio, label); - if (ret) - return ERR_PTR(ret); - - return gpio; -} -EXPORT_SYMBOL_GPL(acpi_get_and_request_gpiod); - static irqreturn_t acpi_gpio_irq_handler(int irq, void *data) { struct acpi_gpio_event *event = data; diff --git a/include/linux/gpio/consumer.h b/include/linux/gpio/consumer.h index 6cc345440a5b..db2dfbae8edb 100644 --- a/include/linux/gpio/consumer.h +++ b/include/linux/gpio/consumer.h @@ -606,8 +606,6 @@ void acpi_dev_remove_driver_gpios(struct acpi_device *adev); int devm_acpi_dev_add_driver_gpios(struct device *dev, const struct acpi_gpio_mapping *gpios); -struct gpio_desc *acpi_get_and_request_gpiod(char *path, unsigned int pin, char *label); - #else /* CONFIG_GPIOLIB && CONFIG_ACPI */ #include @@ -625,12 +623,6 @@ static inline int devm_acpi_dev_add_driver_gpios(struct device *dev, return -ENXIO; } -static inline struct gpio_desc *acpi_get_and_request_gpiod(char *path, unsigned int pin, - char *label) -{ - return ERR_PTR(-ENOSYS); -} - #endif /* CONFIG_GPIOLIB && CONFIG_ACPI */ -- cgit v1.2.3 From a0fddaa0b5a587cc8d185f8802fe7e48493c43ed Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Fri, 15 Sep 2023 08:22:37 -0700 Subject: rtc: Add API function to return alarm time bound by hardware limit Add rtc_bound_alarmtime() to return the requested alarm timeout bound by the maxmum alarm timeout that is supported by a given RTC. Signed-off-by: Guenter Roeck Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20230915152238.1144706-2-linux@roeck-us.net --- include/linux/rtc.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include/linux') diff --git a/include/linux/rtc.h b/include/linux/rtc.h index 4c0bcbeb1f00..5f8e438a0312 100644 --- a/include/linux/rtc.h +++ b/include/linux/rtc.h @@ -225,6 +225,23 @@ static inline bool is_leap_year(unsigned int year) return (!(year % 4) && (year % 100)) || !(year % 400); } +/** + * rtc_bound_alarmtime() - Return alarm time bound by rtc limit + * @rtc: Pointer to rtc device structure + * @requested: Requested alarm timeout + * + * Return: Alarm timeout bound by maximum alarm time supported by rtc. + */ +static inline ktime_t rtc_bound_alarmtime(struct rtc_device *rtc, + ktime_t requested) +{ + if (rtc->alarm_offset_max && + rtc->alarm_offset_max * MSEC_PER_SEC < ktime_to_ms(requested)) + return ms_to_ktime(rtc->alarm_offset_max * MSEC_PER_SEC); + + return requested; +} + #define devm_rtc_register_device(device) \ __devm_rtc_register_device(THIS_MODULE, device) -- cgit v1.2.3 From e346fb6d774abf1d9a87d39b1e3eef0b7397d154 Mon Sep 17 00:00:00 2001 From: Wedson Almeida Filho Date: Sat, 30 Sep 2023 02:00:05 -0300 Subject: xattr: make the xattr array itself const As it is currently declared, the xattr_handler structs are const but the array containing their pointers is not. This patch makes it so that fs modules can place them in .rodata, which makes it harder for accidental/malicious modifications at runtime. Signed-off-by: Wedson Almeida Filho Link: https://lore.kernel.org/r/20230930050033.41174-2-wedsonaf@gmail.com Signed-off-by: Christian Brauner --- fs/xattr.c | 6 +++--- include/linux/fs.h | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/fs/xattr.c b/fs/xattr.c index efd4736bc94b..09d927603433 100644 --- a/fs/xattr.c +++ b/fs/xattr.c @@ -56,7 +56,7 @@ strcmp_prefix(const char *a, const char *a_prefix) static const struct xattr_handler * xattr_resolve_name(struct inode *inode, const char **name) { - const struct xattr_handler **handlers = inode->i_sb->s_xattr; + const struct xattr_handler * const *handlers = inode->i_sb->s_xattr; const struct xattr_handler *handler; if (!(inode->i_opflags & IOP_XATTR)) { @@ -162,7 +162,7 @@ xattr_permission(struct mnt_idmap *idmap, struct inode *inode, int xattr_supports_user_prefix(struct inode *inode) { - const struct xattr_handler **handlers = inode->i_sb->s_xattr; + const struct xattr_handler * const *handlers = inode->i_sb->s_xattr; const struct xattr_handler *handler; if (!(inode->i_opflags & IOP_XATTR)) { @@ -999,7 +999,7 @@ int xattr_list_one(char **buffer, ssize_t *remaining_size, const char *name) ssize_t generic_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size) { - const struct xattr_handler *handler, **handlers = dentry->d_sb->s_xattr; + const struct xattr_handler *handler, * const *handlers = dentry->d_sb->s_xattr; ssize_t remaining_size = buffer_size; int err = 0; diff --git a/include/linux/fs.h b/include/linux/fs.h index 4aeb3fa11927..bba22e25664d 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1206,7 +1206,7 @@ struct super_block { #ifdef CONFIG_SECURITY void *s_security; #endif - const struct xattr_handler **s_xattr; + const struct xattr_handler * const *s_xattr; #ifdef CONFIG_FS_ENCRYPTION const struct fscrypt_operations *s_cop; struct fscrypt_keyring *s_master_keys; /* master crypto keys in use */ -- cgit v1.2.3 From e01cc1e8c2ad73cebb980878ede5584e0f2688f7 Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Mon, 25 Sep 2023 16:50:23 +0200 Subject: locking/atomic: Add generic support for sync_try_cmpxchg() and its fallback Provide the generic sync_try_cmpxchg() function from the raw_ prefixed version, also adding explicit instrumentation. The patch amends existing scripts to generate sync_try_cmpxchg() locking primitive and its raw_sync_try_cmpxchg() fallback, while leaving existing macros from the try_cmpxchg() family unchanged. The target can define its own arch_sync_try_cmpxchg() to override the generic version of raw_sync_try_cmpxchg(). This allows the target to generate more optimal assembly than the generic version. Additionally, the patch renames two scripts to better reflect whet they really do. Signed-off-by: Uros Bizjak Signed-off-by: Ingo Molnar Cc: Will Deacon Cc: Peter Zijlstra Cc: Boqun Feng Cc: Mark Rutland Cc: Linus Torvalds Cc: linux-kernel@vger.kernel.org --- include/linux/atomic/atomic-arch-fallback.h | 15 ++++++++++++- include/linux/atomic/atomic-instrumented.h | 10 ++++++++- scripts/atomic/gen-atomic-fallback.sh | 33 ++++++++++++++++------------- scripts/atomic/gen-atomic-instrumented.sh | 3 ++- 4 files changed, 43 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/include/linux/atomic/atomic-arch-fallback.h b/include/linux/atomic/atomic-arch-fallback.h index b83ef19da13d..5e95faa959c4 100644 --- a/include/linux/atomic/atomic-arch-fallback.h +++ b/include/linux/atomic/atomic-arch-fallback.h @@ -428,6 +428,19 @@ extern void raw_cmpxchg128_relaxed_not_implemented(void); #define raw_sync_cmpxchg arch_sync_cmpxchg +#ifdef arch_sync_try_cmpxchg +#define raw_sync_try_cmpxchg arch_sync_try_cmpxchg +#else +#define raw_sync_try_cmpxchg(_ptr, _oldp, _new) \ +({ \ + typeof(*(_ptr)) *___op = (_oldp), ___o = *___op, ___r; \ + ___r = raw_sync_cmpxchg((_ptr), ___o, (_new)); \ + if (unlikely(___r != ___o)) \ + *___op = ___r; \ + likely(___r == ___o); \ +}) +#endif + /** * raw_atomic_read() - atomic load with relaxed ordering * @v: pointer to atomic_t @@ -4649,4 +4662,4 @@ raw_atomic64_dec_if_positive(atomic64_t *v) } #endif /* _LINUX_ATOMIC_FALLBACK_H */ -// 2fdd6702823fa842f9cea57a002e6e4476ae780c +// eec048affea735b8464f58e6d96992101f8f85f1 diff --git a/include/linux/atomic/atomic-instrumented.h b/include/linux/atomic/atomic-instrumented.h index d401b406ef7c..54d7bbe0aeaa 100644 --- a/include/linux/atomic/atomic-instrumented.h +++ b/include/linux/atomic/atomic-instrumented.h @@ -4998,6 +4998,14 @@ atomic_long_dec_if_positive(atomic_long_t *v) raw_try_cmpxchg128_local(__ai_ptr, __ai_oldp, __VA_ARGS__); \ }) +#define sync_try_cmpxchg(ptr, ...) \ +({ \ + typeof(ptr) __ai_ptr = (ptr); \ + kcsan_mb(); \ + instrument_atomic_read_write(__ai_ptr, sizeof(*__ai_ptr)); \ + raw_sync_try_cmpxchg(__ai_ptr, __VA_ARGS__); \ +}) + #endif /* _LINUX_ATOMIC_INSTRUMENTED_H */ -// 1568f875fef72097413caab8339120c065a39aa4 +// 2cc4bc990fef44d3836ec108f11b610f3f438184 diff --git a/scripts/atomic/gen-atomic-fallback.sh b/scripts/atomic/gen-atomic-fallback.sh index a45154cefa48..f80d69cfeb1f 100755 --- a/scripts/atomic/gen-atomic-fallback.sh +++ b/scripts/atomic/gen-atomic-fallback.sh @@ -223,14 +223,15 @@ gen_xchg_fallbacks() gen_try_cmpxchg_fallback() { + local prefix="$1"; shift local cmpxchg="$1"; shift; - local order="$1"; shift; + local suffix="$1"; shift; cat < Date: Tue, 10 Oct 2023 09:31:38 +0100 Subject: sched/numa: Document vma_numab_state fields Document the intended usage of the fields. [ mingo: Reformatted to take less vertical space & tidied it up. ] Signed-off-by: Mel Gorman Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20231010083143.19593-2-mgorman@techsingularity.net --- include/linux/mm_types.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 36c5b43999e6..d7f042ec1f33 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -551,8 +551,29 @@ struct vma_lock { }; struct vma_numab_state { + /* + * Initialised as time in 'jiffies' after which VMA + * should be scanned. Delays first scan of new VMA by at + * least sysctl_numa_balancing_scan_delay: + */ unsigned long next_scan; + + /* + * Time in jiffies when access_pids[] is reset to + * detect phase change behaviour: + */ unsigned long next_pid_reset; + + /* + * Approximate tracking of PIDs that trapped a NUMA hinting + * fault. May produce false positives due to hash collisions. + * + * [0] Previous PID tracking + * [1] Current PID tracking + * + * Window moves after next_pid_reset has expired approximately + * every VMA_PID_RESET_PERIOD jiffies: + */ unsigned long access_pids[2]; }; -- cgit v1.2.3 From f3a6c97940fbd25d6c84c2d5642338fc99a9b35b Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Tue, 10 Oct 2023 09:31:39 +0100 Subject: sched/numa: Rename vma_numab_state::access_pids[] => ::pids_active[], ::next_pid_reset => ::pids_active_reset The access_pids[] field name is somewhat ambiguous as no PIDs are accessed. Similarly, it's not clear that next_pid_reset is related to access_pids[]. Rename the fields to more accurately reflect their purpose. [ mingo: Rename in the comments too. ] Signed-off-by: Mel Gorman Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20231010083143.19593-3-mgorman@techsingularity.net --- include/linux/mm.h | 4 ++-- include/linux/mm_types.h | 6 +++--- kernel/sched/fair.c | 12 ++++++------ 3 files changed, 11 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index bf5d0b1b16f4..19fc73b02c9f 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1726,8 +1726,8 @@ static inline void vma_set_access_pid_bit(struct vm_area_struct *vma) unsigned int pid_bit; pid_bit = hash_32(current->pid, ilog2(BITS_PER_LONG)); - if (vma->numab_state && !test_bit(pid_bit, &vma->numab_state->access_pids[1])) { - __set_bit(pid_bit, &vma->numab_state->access_pids[1]); + if (vma->numab_state && !test_bit(pid_bit, &vma->numab_state->pids_active[1])) { + __set_bit(pid_bit, &vma->numab_state->pids_active[1]); } } #else /* !CONFIG_NUMA_BALANCING */ diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index d7f042ec1f33..e7571eca1131 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -559,10 +559,10 @@ struct vma_numab_state { unsigned long next_scan; /* - * Time in jiffies when access_pids[] is reset to + * Time in jiffies when pids_active[] is reset to * detect phase change behaviour: */ - unsigned long next_pid_reset; + unsigned long pids_active_reset; /* * Approximate tracking of PIDs that trapped a NUMA hinting @@ -574,7 +574,7 @@ struct vma_numab_state { * Window moves after next_pid_reset has expired approximately * every VMA_PID_RESET_PERIOD jiffies: */ - unsigned long access_pids[2]; + unsigned long pids_active[2]; }; /* diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index e7c1bafc0460..6b47edcbe834 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -3125,7 +3125,7 @@ static bool vma_is_accessed(struct vm_area_struct *vma) if (READ_ONCE(current->mm->numa_scan_seq) < 2) return true; - pids = vma->numab_state->access_pids[0] | vma->numab_state->access_pids[1]; + pids = vma->numab_state->pids_active[0] | vma->numab_state->pids_active[1]; return test_bit(hash_32(current->pid, ilog2(BITS_PER_LONG)), &pids); } @@ -3241,7 +3241,7 @@ static void task_numa_work(struct callback_head *work) msecs_to_jiffies(sysctl_numa_balancing_scan_delay); /* Reset happens after 4 times scan delay of scan start */ - vma->numab_state->next_pid_reset = vma->numab_state->next_scan + + vma->numab_state->pids_active_reset = vma->numab_state->next_scan + msecs_to_jiffies(VMA_PID_RESET_PERIOD); } @@ -3262,11 +3262,11 @@ static void task_numa_work(struct callback_head *work) * vma for recent access to avoid clearing PID info before access.. */ if (mm->numa_scan_seq && - time_after(jiffies, vma->numab_state->next_pid_reset)) { - vma->numab_state->next_pid_reset = vma->numab_state->next_pid_reset + + time_after(jiffies, vma->numab_state->pids_active_reset)) { + vma->numab_state->pids_active_reset = vma->numab_state->pids_active_reset + msecs_to_jiffies(VMA_PID_RESET_PERIOD); - vma->numab_state->access_pids[0] = READ_ONCE(vma->numab_state->access_pids[1]); - vma->numab_state->access_pids[1] = 0; + vma->numab_state->pids_active[0] = READ_ONCE(vma->numab_state->pids_active[1]); + vma->numab_state->pids_active[1] = 0; } do { -- cgit v1.2.3 From ed2da8b725b932b1e2b2f4835bb664d47ed03031 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Tue, 10 Oct 2023 09:31:40 +0100 Subject: sched/numa: Trace decisions related to skipping VMAs NUMA balancing skips or scans VMAs for a variety of reasons. In preparation for completing scans of VMAs regardless of PID access, trace the reasons why a VMA was skipped. In a later patch, the tracing will be used to track if a VMA was forcibly scanned. Signed-off-by: Mel Gorman Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20231010083143.19593-4-mgorman@techsingularity.net --- include/linux/sched/numa_balancing.h | 8 ++++++ include/trace/events/sched.h | 50 ++++++++++++++++++++++++++++++++++++ kernel/sched/fair.c | 17 +++++++++--- 3 files changed, 71 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched/numa_balancing.h b/include/linux/sched/numa_balancing.h index 3988762efe15..c127a1509e2f 100644 --- a/include/linux/sched/numa_balancing.h +++ b/include/linux/sched/numa_balancing.h @@ -15,6 +15,14 @@ #define TNF_FAULT_LOCAL 0x08 #define TNF_MIGRATE_FAIL 0x10 +enum numa_vmaskip_reason { + NUMAB_SKIP_UNSUITABLE, + NUMAB_SKIP_SHARED_RO, + NUMAB_SKIP_INACCESSIBLE, + NUMAB_SKIP_SCAN_DELAY, + NUMAB_SKIP_PID_INACTIVE, +}; + #ifdef CONFIG_NUMA_BALANCING extern void task_numa_fault(int last_node, int node, int pages, int flags); extern pid_t task_numa_group_id(struct task_struct *p); diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index a13d5d06be9d..d82a04d6a1bc 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -664,6 +664,56 @@ DEFINE_EVENT(sched_numa_pair_template, sched_swap_numa, TP_ARGS(src_tsk, src_cpu, dst_tsk, dst_cpu) ); +#ifdef CONFIG_NUMA_BALANCING +#define NUMAB_SKIP_REASON \ + EM( NUMAB_SKIP_UNSUITABLE, "unsuitable" ) \ + EM( NUMAB_SKIP_SHARED_RO, "shared_ro" ) \ + EM( NUMAB_SKIP_INACCESSIBLE, "inaccessible" ) \ + EM( NUMAB_SKIP_SCAN_DELAY, "scan_delay" ) \ + EMe(NUMAB_SKIP_PID_INACTIVE, "pid_inactive" ) + +/* Redefine for export. */ +#undef EM +#undef EMe +#define EM(a, b) TRACE_DEFINE_ENUM(a); +#define EMe(a, b) TRACE_DEFINE_ENUM(a); + +NUMAB_SKIP_REASON + +/* Redefine for symbolic printing. */ +#undef EM +#undef EMe +#define EM(a, b) { a, b }, +#define EMe(a, b) { a, b } + +TRACE_EVENT(sched_skip_vma_numa, + + TP_PROTO(struct mm_struct *mm, struct vm_area_struct *vma, + enum numa_vmaskip_reason reason), + + TP_ARGS(mm, vma, reason), + + TP_STRUCT__entry( + __field(unsigned long, numa_scan_offset) + __field(unsigned long, vm_start) + __field(unsigned long, vm_end) + __field(enum numa_vmaskip_reason, reason) + ), + + TP_fast_assign( + __entry->numa_scan_offset = mm->numa_scan_offset; + __entry->vm_start = vma->vm_start; + __entry->vm_end = vma->vm_end; + __entry->reason = reason; + ), + + TP_printk("numa_scan_offset=%lX vm_start=%lX vm_end=%lX reason=%s", + __entry->numa_scan_offset, + __entry->vm_start, + __entry->vm_end, + __print_symbolic(__entry->reason, NUMAB_SKIP_REASON)) +); +#endif /* CONFIG_NUMA_BALANCING */ /* * Tracepoint for waking a polling cpu without an IPI. diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 6b47edcbe834..31cfdb0794fb 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -3210,6 +3210,7 @@ static void task_numa_work(struct callback_head *work) do { if (!vma_migratable(vma) || !vma_policy_mof(vma) || is_vm_hugetlb_page(vma) || (vma->vm_flags & VM_MIXEDMAP)) { + trace_sched_skip_vma_numa(mm, vma, NUMAB_SKIP_UNSUITABLE); continue; } @@ -3220,15 +3221,19 @@ static void task_numa_work(struct callback_head *work) * as migrating the pages will be of marginal benefit. */ if (!vma->vm_mm || - (vma->vm_file && (vma->vm_flags & (VM_READ|VM_WRITE)) == (VM_READ))) + (vma->vm_file && (vma->vm_flags & (VM_READ|VM_WRITE)) == (VM_READ))) { + trace_sched_skip_vma_numa(mm, vma, NUMAB_SKIP_SHARED_RO); continue; + } /* * Skip inaccessible VMAs to avoid any confusion between * PROT_NONE and NUMA hinting ptes */ - if (!vma_is_accessible(vma)) + if (!vma_is_accessible(vma)) { + trace_sched_skip_vma_numa(mm, vma, NUMAB_SKIP_INACCESSIBLE); continue; + } /* Initialise new per-VMA NUMAB state. */ if (!vma->numab_state) { @@ -3250,12 +3255,16 @@ static void task_numa_work(struct callback_head *work) * delay the scan for new VMAs. */ if (mm->numa_scan_seq && time_before(jiffies, - vma->numab_state->next_scan)) + vma->numab_state->next_scan)) { + trace_sched_skip_vma_numa(mm, vma, NUMAB_SKIP_SCAN_DELAY); continue; + } /* Do not scan the VMA if task has not accessed */ - if (!vma_is_accessed(vma)) + if (!vma_is_accessed(vma)) { + trace_sched_skip_vma_numa(mm, vma, NUMAB_SKIP_PID_INACTIVE); continue; + } /* * RESET access PIDs regularly for old VMAs. Resetting after checking -- cgit v1.2.3 From 57ec42b9a1b7e4db4a1c2aa4fcc4eefe6d31bcb8 Mon Sep 17 00:00:00 2001 From: Matt Johnston Date: Tue, 3 Oct 2023 15:53:39 +0800 Subject: i3c: Fix typo "Provisional ID" to "Provisioned ID" The MIPI I3C spec refers to a Provisioned ID, since it is (sometimes) provisioned at device manufacturing. Signed-off-by: Matt Johnston Acked-by: Rob Herring Reviewed-by: Miquel Raynal Link: https://lore.kernel.org/r/20231003075339.197099-1-matt@codeconstruct.com.au Signed-off-by: Alexandre Belloni --- Documentation/ABI/testing/sysfs-bus-i3c | 4 ++-- Documentation/devicetree/bindings/i3c/i3c.yaml | 4 ++-- Documentation/driver-api/i3c/protocol.rst | 4 ++-- drivers/i3c/master/svc-i3c-master.c | 2 +- include/linux/i3c/device.h | 2 +- include/linux/i3c/master.h | 2 +- 6 files changed, 9 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/Documentation/ABI/testing/sysfs-bus-i3c b/Documentation/ABI/testing/sysfs-bus-i3c index 1f4a2662335b..e5248fd67a56 100644 --- a/Documentation/ABI/testing/sysfs-bus-i3c +++ b/Documentation/ABI/testing/sysfs-bus-i3c @@ -67,7 +67,7 @@ What: /sys/bus/i3c/devices/i3c-/pid KernelVersion: 5.0 Contact: linux-i3c@vger.kernel.org Description: - PID stands for Provisional ID and is used to uniquely identify + PID stands for Provisioned ID and is used to uniquely identify a device on a bus. This PID contains information about the vendor, the part and an instance ID so that several devices of the same type can be connected on the same bus. @@ -123,7 +123,7 @@ What: /sys/bus/i3c/devices/i3c-/-/pid KernelVersion: 5.0 Contact: linux-i3c@vger.kernel.org Description: - PID stands for Provisional ID and is used to uniquely identify + PID stands for Provisioned ID and is used to uniquely identify a device on a bus. This PID contains information about the vendor, the part and an instance ID so that several devices of the same type can be connected on the same bus. diff --git a/Documentation/devicetree/bindings/i3c/i3c.yaml b/Documentation/devicetree/bindings/i3c/i3c.yaml index ab69f4115de4..f8ac7a3e3123 100644 --- a/Documentation/devicetree/bindings/i3c/i3c.yaml +++ b/Documentation/devicetree/bindings/i3c/i3c.yaml @@ -119,12 +119,12 @@ patternProperties: minimum: 0 maximum: 0x7f - description: | - First half of the Provisional ID (following the PID + First half of the Provisioned ID (following the PID definition provided by the I3C specification). Contains the manufacturer ID left-shifted by 1. - description: | - Second half of the Provisional ID (following the PID + Second half of the Provisioned ID (following the PID definition provided by the I3C specification). Contains the ORing of the part ID left-shifted by 16, diff --git a/Documentation/driver-api/i3c/protocol.rst b/Documentation/driver-api/i3c/protocol.rst index 02653defa011..23a0b93c62b1 100644 --- a/Documentation/driver-api/i3c/protocol.rst +++ b/Documentation/driver-api/i3c/protocol.rst @@ -71,8 +71,8 @@ During DAA, each I3C device reports 3 important things: related capabilities * DCR: Device Characteristic Register. This 8-bit register describes the functionalities provided by the device -* Provisional ID: A 48-bit unique identifier. On a given bus there should be no - Provisional ID collision, otherwise the discovery mechanism may fail. +* Provisioned ID: A 48-bit unique identifier. On a given bus there should be no + Provisioned ID collision, otherwise the discovery mechanism may fail. I3C slave events ================ diff --git a/drivers/i3c/master/svc-i3c-master.c b/drivers/i3c/master/svc-i3c-master.c index 32eca2d6caf0..e23d7900c5a1 100644 --- a/drivers/i3c/master/svc-i3c-master.c +++ b/drivers/i3c/master/svc-i3c-master.c @@ -765,7 +765,7 @@ static int svc_i3c_master_do_daa_locked(struct svc_i3c_master *master, u8 data[6]; /* - * We only care about the 48-bit provisional ID yet to + * We only care about the 48-bit provisioned ID yet to * be sure a device does not nack an address twice. * Otherwise, we would just need to flush the RX FIFO. */ diff --git a/include/linux/i3c/device.h b/include/linux/i3c/device.h index 90fa83464f00..84ed77c04940 100644 --- a/include/linux/i3c/device.h +++ b/include/linux/i3c/device.h @@ -96,7 +96,7 @@ enum i3c_dcr { /** * struct i3c_device_info - I3C device information - * @pid: Provisional ID + * @pid: Provisioned ID * @bcr: Bus Characteristic Register * @dcr: Device Characteristic Register * @static_addr: static/I2C address diff --git a/include/linux/i3c/master.h b/include/linux/i3c/master.h index 0b52da4f2346..4fd6a777150f 100644 --- a/include/linux/i3c/master.h +++ b/include/linux/i3c/master.h @@ -166,7 +166,7 @@ struct i3c_device_ibi_info { * assigned a dynamic address by the master. Will be used during * bus initialization to assign it a specific dynamic address * before starting DAA (Dynamic Address Assignment) - * @pid: I3C Provisional ID exposed by the device. This is a unique identifier + * @pid: I3C Provisioned ID exposed by the device. This is a unique identifier * that may be used to attach boardinfo to i3c_dev_desc when the device * does not have a static address * @of_node: optional DT node in case the device has been described in the DT -- cgit v1.2.3 From 295d3c441226d004d1ed59c4fcf62d5dba18d9e1 Mon Sep 17 00:00:00 2001 From: Wedson Almeida Filho Date: Sat, 30 Sep 2023 02:00:33 -0300 Subject: net: move sockfs_xattr_handlers to .rodata This makes it harder for accidental or malicious changes to sockfs_xattr_handlers at runtime. Cc: "David S. Miller" Cc: Eric Dumazet Cc: Jakub Kicinski Cc: Paolo Abeni Cc: Richard Cochran Cc: netdev@vger.kernel.org Signed-off-by: Wedson Almeida Filho Link: https://lore.kernel.org/r/20230930050033.41174-30-wedsonaf@gmail.com Acked-by: Jakub Kicinski Signed-off-by: Christian Brauner --- include/linux/pseudo_fs.h | 2 +- net/socket.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pseudo_fs.h b/include/linux/pseudo_fs.h index eceda1d1407a..730f77381d55 100644 --- a/include/linux/pseudo_fs.h +++ b/include/linux/pseudo_fs.h @@ -5,7 +5,7 @@ struct pseudo_fs_context { const struct super_operations *ops; - const struct xattr_handler **xattr; + const struct xattr_handler * const *xattr; const struct dentry_operations *dops; unsigned long magic; }; diff --git a/net/socket.c b/net/socket.c index c8b08b32f097..be301d523679 100644 --- a/net/socket.c +++ b/net/socket.c @@ -403,7 +403,7 @@ static const struct xattr_handler sockfs_security_xattr_handler = { .set = sockfs_security_xattr_set, }; -static const struct xattr_handler *sockfs_xattr_handlers[] = { +static const struct xattr_handler * const sockfs_xattr_handlers[] = { &sockfs_xattr_handler, &sockfs_security_xattr_handler, NULL -- cgit v1.2.3 From 909f4abd1097769d024c3a9c2e59c2fbe5d2d0c0 Mon Sep 17 00:00:00 2001 From: Yi Liu Date: Thu, 28 Sep 2023 00:15:23 -0700 Subject: iommu: Add new iommu op to create domains owned by userspace Introduce a new iommu_domain op to create domains owned by userspace, e.g. through IOMMUFD. These domains have a few different properties compares to kernel owned domains: - They may be PAGING domains, but created with special parameters. For instance aperture size changes/number of levels, different IOPTE formats, or other things necessary to make a vIOMMU work - We have to track all the memory allocations with GFP_KERNEL_ACCOUNT to make the cgroup sandbox stronger - Device-specialty domains, such as NESTED domains can be created by IOMMUFD. The new op clearly says the domain is being created by IOMMUFD, that the domain is intended for userspace use, and it provides a way to pass user flags or a driver specific uAPI structure to customize the created domain to exactly what the vIOMMU userspace driver requires. iommu drivers that cannot support VFIO/IOMMUFD should not support this op. This includes any driver that cannot provide a fully functional PAGING domain. This new op for now is only supposed to be used by IOMMUFD, hence no wrapper for it. IOMMUFD would call the callback directly. As for domain free, IOMMUFD would use iommu_domain_free(). Link: https://lore.kernel.org/r/20230928071528.26258-2-yi.l.liu@intel.com Suggested-by: Jason Gunthorpe Signed-off-by: Lu Baolu Co-developed-by: Nicolin Chen Signed-off-by: Nicolin Chen Signed-off-by: Yi Liu Reviewed-by: Kevin Tian Signed-off-by: Jason Gunthorpe --- include/linux/iommu.h | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index c50a769d569a..3861d66b65c1 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -234,7 +234,15 @@ struct iommu_iotlb_gather { * op is allocated in the iommu driver and freed by the caller after * use. The information type is one of enum iommu_hw_info_type defined * in include/uapi/linux/iommufd.h. - * @domain_alloc: allocate iommu domain + * @domain_alloc: allocate and return an iommu domain if success. Otherwise + * NULL is returned. The domain is not fully initialized until + * the caller iommu_domain_alloc() returns. + * @domain_alloc_user: Allocate an iommu domain corresponding to the input + * parameters as defined in include/uapi/linux/iommufd.h. + * Unlike @domain_alloc, it is called only by IOMMUFD and + * must fully initialize the new domain before return. + * Upon success, a domain is returned. Upon failure, + * ERR_PTR must be returned. * @probe_device: Add device to iommu driver handling * @release_device: Remove device from iommu driver handling * @probe_finalize: Do final setup work after the device is added to an IOMMU @@ -267,6 +275,7 @@ struct iommu_ops { /* Domain allocation and freeing by the iommu driver */ struct iommu_domain *(*domain_alloc)(unsigned iommu_domain_type); + struct iommu_domain *(*domain_alloc_user)(struct device *dev, u32 flags); struct iommu_device *(*probe_device)(struct device *dev); void (*release_device)(struct device *dev); -- cgit v1.2.3 From b7a5b537c55c088d891ae554103d1b281abef781 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Tue, 10 Oct 2023 09:31:42 +0100 Subject: sched/numa: Complete scanning of partial VMAs regardless of PID activity NUMA Balancing skips VMAs when the current task has not trapped a NUMA fault within the VMA. If the VMA is skipped then mm->numa_scan_offset advances and a task that is trapping faults within the VMA may never fully update PTEs within the VMA. Force tasks to update PTEs for partially scanned PTEs. The VMA will be tagged for NUMA hints by some task but this removes some of the benefit of tracking PID activity within a VMA. A follow-on patch will mitigate this problem. The test cases and machines evaluated did not trigger the corner case so the performance results are neutral with only small changes within the noise from normal test-to-test variance. However, the next patch makes the corner case easier to trigger. Signed-off-by: Mel Gorman Signed-off-by: Ingo Molnar Tested-by: Raghavendra K T Link: https://lore.kernel.org/r/20231010083143.19593-6-mgorman@techsingularity.net --- include/linux/sched/numa_balancing.h | 1 + include/trace/events/sched.h | 3 ++- kernel/sched/fair.c | 18 +++++++++++++++--- 3 files changed, 18 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched/numa_balancing.h b/include/linux/sched/numa_balancing.h index c127a1509e2f..7dcc0bdfddbb 100644 --- a/include/linux/sched/numa_balancing.h +++ b/include/linux/sched/numa_balancing.h @@ -21,6 +21,7 @@ enum numa_vmaskip_reason { NUMAB_SKIP_INACCESSIBLE, NUMAB_SKIP_SCAN_DELAY, NUMAB_SKIP_PID_INACTIVE, + NUMAB_SKIP_IGNORE_PID, }; #ifdef CONFIG_NUMA_BALANCING diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index d82a04d6a1bc..bfc07c10541a 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -670,7 +670,8 @@ DEFINE_EVENT(sched_numa_pair_template, sched_swap_numa, EM( NUMAB_SKIP_SHARED_RO, "shared_ro" ) \ EM( NUMAB_SKIP_INACCESSIBLE, "inaccessible" ) \ EM( NUMAB_SKIP_SCAN_DELAY, "scan_delay" ) \ - EMe(NUMAB_SKIP_PID_INACTIVE, "pid_inactive" ) + EM( NUMAB_SKIP_PID_INACTIVE, "pid_inactive" ) \ + EMe(NUMAB_SKIP_IGNORE_PID, "ignore_pid_inactive" ) /* Redefine for export. */ #undef EM diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index ce36969625bd..ab79013f6e91 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -3113,7 +3113,7 @@ static void reset_ptenuma_scan(struct task_struct *p) p->mm->numa_scan_offset = 0; } -static bool vma_is_accessed(struct vm_area_struct *vma) +static bool vma_is_accessed(struct mm_struct *mm, struct vm_area_struct *vma) { unsigned long pids; /* @@ -3126,7 +3126,19 @@ static bool vma_is_accessed(struct vm_area_struct *vma) return true; pids = vma->numab_state->pids_active[0] | vma->numab_state->pids_active[1]; - return test_bit(hash_32(current->pid, ilog2(BITS_PER_LONG)), &pids); + if (test_bit(hash_32(current->pid, ilog2(BITS_PER_LONG)), &pids)) + return true; + + /* + * Complete a scan that has already started regardless of PID access, or + * some VMAs may never be scanned in multi-threaded applications: + */ + if (mm->numa_scan_offset > vma->vm_start) { + trace_sched_skip_vma_numa(mm, vma, NUMAB_SKIP_IGNORE_PID); + return true; + } + + return false; } #define VMA_PID_RESET_PERIOD (4 * sysctl_numa_balancing_scan_delay) @@ -3270,7 +3282,7 @@ static void task_numa_work(struct callback_head *work) } /* Do not scan the VMA if task has not accessed */ - if (!vma_is_accessed(vma)) { + if (!vma_is_accessed(mm, vma)) { trace_sched_skip_vma_numa(mm, vma, NUMAB_SKIP_PID_INACTIVE); continue; } -- cgit v1.2.3 From f169c62ff7cd1acf8bac8ae17bfeafa307d9e6fa Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Tue, 10 Oct 2023 09:31:43 +0100 Subject: sched/numa: Complete scanning of inactive VMAs when there is no alternative VMAs are skipped if there is no recent fault activity but this represents a chicken-and-egg problem as there may be no fault activity if the PTEs are never updated to trap NUMA hints. There is an indirect reliance on scanning to be forced early in the lifetime of a task but this may fail to detect changes in phase behaviour. Force inactive VMAs to be scanned when all other eligible VMAs have been updated within the same scan sequence. Test results in general look good with some changes in performance, both negative and positive, depending on whether the additional scanning and faulting was beneficial or not to the workload. The autonuma benchmark workload NUMA01_THREADLOCAL was picked for closer examination. The workload creates two processes with numerous threads and thread-local storage that is zero-filled in a loop. It exercises the corner case where unrelated threads may skip VMAs that are thread-local to another thread and still has some VMAs that inactive while the workload executes. The VMA skipping activity frequency with and without the patch: 6.6.0-rc2-sched-numabtrace-v1 ============================= 649 reason=scan_delay 9,094 reason=unsuitable 48,915 reason=shared_ro 143,919 reason=inaccessible 193,050 reason=pid_inactive 6.6.0-rc2-sched-numabselective-v1 ============================= 146 reason=seq_completed 622 reason=ignore_pid_inactive 624 reason=scan_delay 6,570 reason=unsuitable 16,101 reason=shared_ro 27,608 reason=inaccessible 41,939 reason=pid_inactive Note that with the patch applied, the PID activity is ignored (ignore_pid_inactive) to ensure a VMA with some activity is completely scanned. In addition, a small number of VMAs are scanned when no other eligible VMA is available during a single scan window (seq_completed). The number of times a VMA is skipped due to no PID activity from the scanning task (pid_inactive) drops dramatically. It is expected that this will increase the number of PTEs updated for NUMA hinting faults as well as hinting faults but these represent PTEs that would otherwise have been missed. The tradeoff is scan+fault overhead versus improving locality due to migration. On a 2-socket Cascade Lake test machine, the time to complete the workload is as follows; 6.6.0-rc2 6.6.0-rc2 sched-numabtrace-v1 sched-numabselective-v1 Min elsp-NUMA01_THREADLOCAL 174.22 ( 0.00%) 117.64 ( 32.48%) Amean elsp-NUMA01_THREADLOCAL 175.68 ( 0.00%) 123.34 * 29.79%* Stddev elsp-NUMA01_THREADLOCAL 1.20 ( 0.00%) 4.06 (-238.20%) CoeffVar elsp-NUMA01_THREADLOCAL 0.68 ( 0.00%) 3.29 (-381.70%) Max elsp-NUMA01_THREADLOCAL 177.18 ( 0.00%) 128.03 ( 27.74%) The time to complete the workload is reduced by almost 30%: 6.6.0-rc2 6.6.0-rc2 sched-numabtrace-v1 sched-numabselective-v1 / Duration User 91201.80 63506.64 Duration System 2015.53 1819.78 Duration Elapsed 1234.77 868.37 In this specific case, system CPU time was not increased but it's not universally true. From vmstat, the NUMA scanning and fault activity is as follows; 6.6.0-rc2 6.6.0-rc2 sched-numabtrace-v1 sched-numabselective-v1 Ops NUMA base-page range updates 64272.00 26374386.00 Ops NUMA PTE updates 36624.00 55538.00 Ops NUMA PMD updates 54.00 51404.00 Ops NUMA hint faults 15504.00 75786.00 Ops NUMA hint local faults % 14860.00 56763.00 Ops NUMA hint local percent 95.85 74.90 Ops NUMA pages migrated 1629.00 6469222.00 Both the number of PTE updates and hint faults is dramatically increased. While this is superficially unfortunate, it represents ranges that were simply skipped without the patch. As a result of the scanning and hinting faults, many more pages were also migrated but as the time to completion is reduced, the overhead is offset by the gain. Signed-off-by: Mel Gorman Signed-off-by: Ingo Molnar Tested-by: Raghavendra K T Link: https://lore.kernel.org/r/20231010083143.19593-7-mgorman@techsingularity.net --- include/linux/mm_types.h | 6 ++++ include/linux/sched/numa_balancing.h | 1 + include/trace/events/sched.h | 3 +- kernel/sched/fair.c | 55 ++++++++++++++++++++++++++++++++++-- 4 files changed, 61 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index e7571eca1131..589f31ef2e84 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -575,6 +575,12 @@ struct vma_numab_state { * every VMA_PID_RESET_PERIOD jiffies: */ unsigned long pids_active[2]; + + /* + * MM scan sequence ID when the VMA was last completely scanned. + * A VMA is not eligible for scanning if prev_scan_seq == numa_scan_seq + */ + int prev_scan_seq; }; /* diff --git a/include/linux/sched/numa_balancing.h b/include/linux/sched/numa_balancing.h index 7dcc0bdfddbb..b69afb8630db 100644 --- a/include/linux/sched/numa_balancing.h +++ b/include/linux/sched/numa_balancing.h @@ -22,6 +22,7 @@ enum numa_vmaskip_reason { NUMAB_SKIP_SCAN_DELAY, NUMAB_SKIP_PID_INACTIVE, NUMAB_SKIP_IGNORE_PID, + NUMAB_SKIP_SEQ_COMPLETED, }; #ifdef CONFIG_NUMA_BALANCING diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index bfc07c10541a..6188ad0d9e0d 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -671,7 +671,8 @@ DEFINE_EVENT(sched_numa_pair_template, sched_swap_numa, EM( NUMAB_SKIP_INACCESSIBLE, "inaccessible" ) \ EM( NUMAB_SKIP_SCAN_DELAY, "scan_delay" ) \ EM( NUMAB_SKIP_PID_INACTIVE, "pid_inactive" ) \ - EMe(NUMAB_SKIP_IGNORE_PID, "ignore_pid_inactive" ) + EM( NUMAB_SKIP_IGNORE_PID, "ignore_pid_inactive" ) \ + EMe(NUMAB_SKIP_SEQ_COMPLETED, "seq_completed" ) /* Redefine for export. */ #undef EM diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index ab79013f6e91..922905194c0c 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -3158,6 +3158,8 @@ static void task_numa_work(struct callback_head *work) unsigned long nr_pte_updates = 0; long pages, virtpages; struct vma_iterator vmi; + bool vma_pids_skipped; + bool vma_pids_forced = false; SCHED_WARN_ON(p != container_of(work, struct task_struct, numa_work)); @@ -3200,7 +3202,6 @@ static void task_numa_work(struct callback_head *work) */ p->node_stamp += 2 * TICK_NSEC; - start = mm->numa_scan_offset; pages = sysctl_numa_balancing_scan_size; pages <<= 20 - PAGE_SHIFT; /* MB in pages */ virtpages = pages * 8; /* Scan up to this much virtual space */ @@ -3210,6 +3211,16 @@ static void task_numa_work(struct callback_head *work) if (!mmap_read_trylock(mm)) return; + + /* + * VMAs are skipped if the current PID has not trapped a fault within + * the VMA recently. Allow scanning to be forced if there is no + * suitable VMA remaining. + */ + vma_pids_skipped = false; + +retry_pids: + start = mm->numa_scan_offset; vma_iter_init(&vmi, mm, start); vma = vma_next(&vmi); if (!vma) { @@ -3260,6 +3271,13 @@ static void task_numa_work(struct callback_head *work) /* Reset happens after 4 times scan delay of scan start */ vma->numab_state->pids_active_reset = vma->numab_state->next_scan + msecs_to_jiffies(VMA_PID_RESET_PERIOD); + + /* + * Ensure prev_scan_seq does not match numa_scan_seq, + * to prevent VMAs being skipped prematurely on the + * first scan: + */ + vma->numab_state->prev_scan_seq = mm->numa_scan_seq - 1; } /* @@ -3281,8 +3299,19 @@ static void task_numa_work(struct callback_head *work) vma->numab_state->pids_active[1] = 0; } - /* Do not scan the VMA if task has not accessed */ - if (!vma_is_accessed(mm, vma)) { + /* Do not rescan VMAs twice within the same sequence. */ + if (vma->numab_state->prev_scan_seq == mm->numa_scan_seq) { + mm->numa_scan_offset = vma->vm_end; + trace_sched_skip_vma_numa(mm, vma, NUMAB_SKIP_SEQ_COMPLETED); + continue; + } + + /* + * Do not scan the VMA if task has not accessed it, unless no other + * VMA candidate exists. + */ + if (!vma_pids_forced && !vma_is_accessed(mm, vma)) { + vma_pids_skipped = true; trace_sched_skip_vma_numa(mm, vma, NUMAB_SKIP_PID_INACTIVE); continue; } @@ -3311,8 +3340,28 @@ static void task_numa_work(struct callback_head *work) cond_resched(); } while (end != vma->vm_end); + + /* VMA scan is complete, do not scan until next sequence. */ + vma->numab_state->prev_scan_seq = mm->numa_scan_seq; + + /* + * Only force scan within one VMA at a time, to limit the + * cost of scanning a potentially uninteresting VMA. + */ + if (vma_pids_forced) + break; } for_each_vma(vmi, vma); + /* + * If no VMAs are remaining and VMAs were skipped due to the PID + * not accessing the VMA previously, then force a scan to ensure + * forward progress: + */ + if (!vma && !vma_pids_forced && vma_pids_skipped) { + vma_pids_forced = true; + goto retry_pids; + } + out: /* * It is possible to reach the end of the VMA list but the last few -- cgit v1.2.3 From 8527ca7735ef4cdad32c45853b0138f46ab2df58 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 8 Oct 2023 14:41:21 -0700 Subject: net: skbuff: fix kernel-doc typos Correct punctuation and drop an extraneous word. Signed-off-by: Randy Dunlap Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20231008214121.25940-1-rdunlap@infradead.org Signed-off-by: Jakub Kicinski --- include/linux/skbuff.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 4174c4b82d13..97bfef071255 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1309,7 +1309,7 @@ struct sk_buff_fclones { * * Returns true if skb is a fast clone, and its clone is not freed. * Some drivers call skb_orphan() in their ndo_start_xmit(), - * so we also check that this didnt happen. + * so we also check that didn't happen. */ static inline bool skb_fclone_busy(const struct sock *sk, const struct sk_buff *skb) @@ -2016,7 +2016,7 @@ static inline struct sk_buff *skb_share_check(struct sk_buff *skb, gfp_t pri) * Copy shared buffers into a new sk_buff. We effectively do COW on * packets to handle cases where we have a local reader and forward * and a couple of other messy ones. The normal one is tcpdumping - * a packet thats being forwarded. + * a packet that's being forwarded. */ /** -- cgit v1.2.3 From 5247dbf16cee4e83eb89e4d3b87bd5e79c5d1655 Mon Sep 17 00:00:00 2001 From: Yajun Deng Date: Mon, 9 Oct 2023 19:16:33 +0800 Subject: net/core: Introduce netdev_core_stats_inc() Although there is a kfree_skb_reason() helper function that can be used to find the reason why this skb is dropped, but most callers didn't increase one of rx_dropped, tx_dropped, rx_nohandler and rx_otherhost_dropped. For the users, people are more concerned about why the dropped in ip is increasing. Introduce netdev_core_stats_inc() for trace the caller of dev_core_stats_*_inc(). Also, add __code to netdev_core_stats_alloc(), as it's called with small probability. And add noinline make sure netdev_core_stats_inc was never inlined. Signed-off-by: Yajun Deng Suggested-by: Alexander Lobakin Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/netdevice.h | 21 ++++----------------- net/core/dev.c | 21 +++++++++++++++++++-- 2 files changed, 23 insertions(+), 19 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index e070a4540fba..11d704bfec9b 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -4002,32 +4002,19 @@ static __always_inline bool __is_skb_forwardable(const struct net_device *dev, return false; } -struct net_device_core_stats __percpu *netdev_core_stats_alloc(struct net_device *dev); - -static inline struct net_device_core_stats __percpu *dev_core_stats(struct net_device *dev) -{ - /* This READ_ONCE() pairs with the write in netdev_core_stats_alloc() */ - struct net_device_core_stats __percpu *p = READ_ONCE(dev->core_stats); - - if (likely(p)) - return p; - - return netdev_core_stats_alloc(dev); -} +void netdev_core_stats_inc(struct net_device *dev, u32 offset); #define DEV_CORE_STATS_INC(FIELD) \ static inline void dev_core_stats_##FIELD##_inc(struct net_device *dev) \ { \ - struct net_device_core_stats __percpu *p; \ - \ - p = dev_core_stats(dev); \ - if (p) \ - this_cpu_inc(p->FIELD); \ + netdev_core_stats_inc(dev, \ + offsetof(struct net_device_core_stats, FIELD)); \ } DEV_CORE_STATS_INC(rx_dropped) DEV_CORE_STATS_INC(tx_dropped) DEV_CORE_STATS_INC(rx_nohandler) DEV_CORE_STATS_INC(rx_otherhost_dropped) +#undef DEV_CORE_STATS_INC static __always_inline int ____dev_forward_skb(struct net_device *dev, struct sk_buff *skb, diff --git a/net/core/dev.c b/net/core/dev.c index 606a366cc209..02949a929e7f 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -10497,7 +10497,8 @@ void netdev_stats_to_stats64(struct rtnl_link_stats64 *stats64, } EXPORT_SYMBOL(netdev_stats_to_stats64); -struct net_device_core_stats __percpu *netdev_core_stats_alloc(struct net_device *dev) +static __cold struct net_device_core_stats __percpu *netdev_core_stats_alloc( + struct net_device *dev) { struct net_device_core_stats __percpu *p; @@ -10510,7 +10511,23 @@ struct net_device_core_stats __percpu *netdev_core_stats_alloc(struct net_device /* This READ_ONCE() pairs with the cmpxchg() above */ return READ_ONCE(dev->core_stats); } -EXPORT_SYMBOL(netdev_core_stats_alloc); + +noinline void netdev_core_stats_inc(struct net_device *dev, u32 offset) +{ + /* This READ_ONCE() pairs with the write in netdev_core_stats_alloc() */ + struct net_device_core_stats __percpu *p = READ_ONCE(dev->core_stats); + unsigned long __percpu *field; + + if (unlikely(!p)) { + p = netdev_core_stats_alloc(dev); + if (!p) + return; + } + + field = (__force unsigned long __percpu *)((__force void *)p + offset); + this_cpu_inc(*field); +} +EXPORT_SYMBOL_GPL(netdev_core_stats_inc); /** * dev_get_stats - get network device statistics -- cgit v1.2.3 From acd6199f195d6de814ac4090ce0864a613b1580e Mon Sep 17 00:00:00 2001 From: Wentong Wu Date: Mon, 9 Oct 2023 14:33:22 +0800 Subject: usb: Add support for Intel LJCA device Implements the USB part of Intel USB-I2C/GPIO/SPI adapter device named "La Jolla Cove Adapter" (LJCA). The communication between the various LJCA module drivers and the hardware will be muxed/demuxed by this driver. Three modules ( I2C, GPIO, and SPI) are supported currently. Each sub-module of LJCA device is identified by type field within the LJCA message header. The sub-modules of LJCA can use ljca_transfer() to issue a transfer between host and hardware. And ljca_register_event_cb is exported to LJCA sub-module drivers for hardware event subscription. The minimum code in ASL that covers this board is Scope (\_SB.PCI0.DWC3.RHUB.HS01) { Device (GPIO) { Name (_ADR, Zero) Name (_STA, 0x0F) } Device (I2C) { Name (_ADR, One) Name (_STA, 0x0F) } Device (SPI) { Name (_ADR, 0x02) Name (_STA, 0x0F) } } Signed-off-by: Wentong Wu Reviewed-by: Sakari Ailus Reviewed-by: Andi Shyti Tested-by: Hans de Goede Reviewed-by: Oliver Neukum Link: https://lore.kernel.org/r/1696833205-16716-2-git-send-email-wentong.wu@intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/misc/Kconfig | 13 + drivers/usb/misc/Makefile | 1 + drivers/usb/misc/usb-ljca.c | 902 ++++++++++++++++++++++++++++++++++++++++++++ include/linux/usb/ljca.h | 145 +++++++ 4 files changed, 1061 insertions(+) create mode 100644 drivers/usb/misc/usb-ljca.c create mode 100644 include/linux/usb/ljca.h (limited to 'include/linux') diff --git a/drivers/usb/misc/Kconfig b/drivers/usb/misc/Kconfig index 99b15b77dfd5..c510af7baa0d 100644 --- a/drivers/usb/misc/Kconfig +++ b/drivers/usb/misc/Kconfig @@ -165,6 +165,19 @@ config APPLE_MFI_FASTCHARGE It is safe to say M here. +config USB_LJCA + tristate "Intel La Jolla Cove Adapter support" + select AUXILIARY_BUS + depends on USB && ACPI + help + This adds support for Intel La Jolla Cove USB-I2C/SPI/GPIO + Master Adapter (LJCA). Additional drivers such as I2C_LJCA, + GPIO_LJCA and SPI_LJCA must be enabled in order to use the + functionality of the device. + + This driver can also be built as a module. If so, the module + will be called usb-ljca. + source "drivers/usb/misc/sisusbvga/Kconfig" config USB_LD diff --git a/drivers/usb/misc/Makefile b/drivers/usb/misc/Makefile index 1992cc284d8a..0bc732bcb162 100644 --- a/drivers/usb/misc/Makefile +++ b/drivers/usb/misc/Makefile @@ -11,6 +11,7 @@ obj-$(CONFIG_USB_EMI26) += emi26.o obj-$(CONFIG_USB_EMI62) += emi62.o obj-$(CONFIG_USB_EZUSB_FX2) += ezusb.o obj-$(CONFIG_APPLE_MFI_FASTCHARGE) += apple-mfi-fastcharge.o +obj-$(CONFIG_USB_LJCA) += usb-ljca.o obj-$(CONFIG_USB_IDMOUSE) += idmouse.o obj-$(CONFIG_USB_IOWARRIOR) += iowarrior.o obj-$(CONFIG_USB_ISIGHTFW) += isight_firmware.o diff --git a/drivers/usb/misc/usb-ljca.c b/drivers/usb/misc/usb-ljca.c new file mode 100644 index 000000000000..c9decd0396d4 --- /dev/null +++ b/drivers/usb/misc/usb-ljca.c @@ -0,0 +1,902 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Intel La Jolla Cove Adapter USB driver + * + * Copyright (c) 2023, Intel Corporation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +/* command flags */ +#define LJCA_ACK_FLAG BIT(0) +#define LJCA_RESP_FLAG BIT(1) +#define LJCA_CMPL_FLAG BIT(2) + +#define LJCA_MAX_PACKET_SIZE 64u +#define LJCA_MAX_PAYLOAD_SIZE \ + (LJCA_MAX_PACKET_SIZE - sizeof(struct ljca_msg)) + +#define LJCA_WRITE_TIMEOUT_MS 200 +#define LJCA_WRITE_ACK_TIMEOUT_MS 500 +#define LJCA_ENUM_CLIENT_TIMEOUT_MS 20 + +/* ljca client type */ +enum ljca_client_type { + LJCA_CLIENT_MNG = 1, + LJCA_CLIENT_GPIO = 3, + LJCA_CLIENT_I2C = 4, + LJCA_CLIENT_SPI = 5, +}; + +/* MNG client commands */ +enum ljca_mng_cmd { + LJCA_MNG_RESET = 2, + LJCA_MNG_ENUM_GPIO = 4, + LJCA_MNG_ENUM_I2C = 5, + LJCA_MNG_ENUM_SPI = 8, +}; + +/* ljca client acpi _ADR */ +enum ljca_client_acpi_adr { + LJCA_GPIO_ACPI_ADR, + LJCA_I2C1_ACPI_ADR, + LJCA_I2C2_ACPI_ADR, + LJCA_SPI1_ACPI_ADR, + LJCA_SPI2_ACPI_ADR, + LJCA_CLIENT_ACPI_ADR_MAX, +}; + +/* ljca cmd message structure */ +struct ljca_msg { + u8 type; + u8 cmd; + u8 flags; + u8 len; + u8 data[] __counted_by(len); +} __packed; + +struct ljca_i2c_ctr_info { + u8 id; + u8 capacity; + u8 intr_pin; +} __packed; + +struct ljca_i2c_descriptor { + u8 num; + struct ljca_i2c_ctr_info info[] __counted_by(num); +} __packed; + +struct ljca_spi_ctr_info { + u8 id; + u8 capacity; + u8 intr_pin; +} __packed; + +struct ljca_spi_descriptor { + u8 num; + struct ljca_spi_ctr_info info[] __counted_by(num); +} __packed; + +struct ljca_bank_descriptor { + u8 bank_id; + u8 pin_num; + + /* 1 bit for each gpio, 1 means valid */ + __le32 valid_pins; +} __packed; + +struct ljca_gpio_descriptor { + u8 pins_per_bank; + u8 bank_num; + struct ljca_bank_descriptor bank_desc[] __counted_by(bank_num); +} __packed; + +/** + * struct ljca_adapter - represent a ljca adapter + * + * @intf: the usb interface for this ljca adapter + * @usb_dev: the usb device for this ljca adapter + * @dev: the specific device info of the usb interface + * @rx_pipe: bulk in pipe for receive data from firmware + * @tx_pipe: bulk out pipe for send data to firmware + * @rx_urb: urb used for the bulk in pipe + * @rx_buf: buffer used to receive command response and event + * @rx_len: length of rx buffer + * @ex_buf: external buffer to save command response + * @ex_buf_len: length of external buffer + * @actual_length: actual length of data copied to external buffer + * @tx_buf: buffer used to download command to firmware + * @tx_buf_len: length of tx buffer + * @lock: spinlock to protect tx_buf and ex_buf + * @cmd_completion: completion object as the command receives ack + * @mutex: mutex to avoid command download concurrently + * @client_list: client device list + * @disconnect: usb disconnect ongoing or not + * @reset_id: used to reset firmware + */ +struct ljca_adapter { + struct usb_interface *intf; + struct usb_device *usb_dev; + struct device *dev; + + unsigned int rx_pipe; + unsigned int tx_pipe; + + struct urb *rx_urb; + void *rx_buf; + unsigned int rx_len; + + u8 *ex_buf; + u8 ex_buf_len; + u8 actual_length; + + void *tx_buf; + u8 tx_buf_len; + + spinlock_t lock; + + struct completion cmd_completion; + struct mutex mutex; + + struct list_head client_list; + + bool disconnect; + + u32 reset_id; +}; + +struct ljca_match_ids_walk_data { + const struct acpi_device_id *ids; + const char *uid; + struct acpi_device *adev; +}; + +static const struct acpi_device_id ljca_gpio_hids[] = { + { "INTC1074" }, + { "INTC1096" }, + { "INTC100B" }, + { "INTC10D1" }, + {}, +}; + +static const struct acpi_device_id ljca_i2c_hids[] = { + { "INTC1075" }, + { "INTC1097" }, + { "INTC100C" }, + { "INTC10D2" }, + {}, +}; + +static const struct acpi_device_id ljca_spi_hids[] = { + { "INTC1091" }, + { "INTC1098" }, + { "INTC100D" }, + { "INTC10D3" }, + {}, +}; + +static void ljca_handle_event(struct ljca_adapter *adap, + struct ljca_msg *header) +{ + struct ljca_client *client; + + list_for_each_entry(client, &adap->client_list, link) { + /* + * Currently only GPIO register event callback, but + * firmware message structure should include id when + * multiple same type clients register event callback. + */ + if (client->type == header->type) { + unsigned long flags; + + spin_lock_irqsave(&client->event_cb_lock, flags); + client->event_cb(client->context, header->cmd, + header->data, header->len); + spin_unlock_irqrestore(&client->event_cb_lock, flags); + + break; + } + } +} + +/* process command ack and received data if available */ +static void ljca_handle_cmd_ack(struct ljca_adapter *adap, struct ljca_msg *header) +{ + struct ljca_msg *tx_header = adap->tx_buf; + u8 ibuf_len, actual_len = 0; + unsigned long flags; + u8 *ibuf; + + spin_lock_irqsave(&adap->lock, flags); + + if (tx_header->type != header->type || tx_header->cmd != header->cmd) { + spin_unlock_irqrestore(&adap->lock, flags); + dev_err(adap->dev, "cmd ack mismatch error\n"); + return; + } + + ibuf_len = adap->ex_buf_len; + ibuf = adap->ex_buf; + + if (ibuf && ibuf_len) { + actual_len = min(header->len, ibuf_len); + + /* copy received data to external buffer */ + memcpy(ibuf, header->data, actual_len); + } + /* update copied data length */ + adap->actual_length = actual_len; + + spin_unlock_irqrestore(&adap->lock, flags); + + complete(&adap->cmd_completion); +} + +static void ljca_recv(struct urb *urb) +{ + struct ljca_msg *header = urb->transfer_buffer; + struct ljca_adapter *adap = urb->context; + int ret; + + switch (urb->status) { + case 0: + /* success */ + break; + case -ENOENT: + /* + * directly complete the possible ongoing transfer + * during disconnect + */ + if (adap->disconnect) + complete(&adap->cmd_completion); + return; + case -ECONNRESET: + case -ESHUTDOWN: + case -EPIPE: + /* rx urb is terminated */ + dev_dbg(adap->dev, "rx urb terminated with status: %d\n", + urb->status); + return; + default: + dev_dbg(adap->dev, "rx urb error: %d\n", urb->status); + goto resubmit; + } + + if (header->len + sizeof(*header) != urb->actual_length) + goto resubmit; + + if (header->flags & LJCA_ACK_FLAG) + ljca_handle_cmd_ack(adap, header); + else + ljca_handle_event(adap, header); + +resubmit: + ret = usb_submit_urb(urb, GFP_ATOMIC); + if (ret && ret != -EPERM) + dev_err(adap->dev, "resubmit rx urb error %d\n", ret); +} + +static int ljca_send(struct ljca_adapter *adap, u8 type, u8 cmd, + const u8 *obuf, u8 obuf_len, u8 *ibuf, u8 ibuf_len, + bool ack, unsigned long timeout) +{ + unsigned int msg_len = sizeof(struct ljca_msg) + obuf_len; + struct ljca_msg *header = adap->tx_buf; + unsigned int transferred; + unsigned long flags; + int ret; + + if (adap->disconnect) + return -ENODEV; + + if (msg_len > adap->tx_buf_len) + return -EINVAL; + + mutex_lock(&adap->mutex); + + spin_lock_irqsave(&adap->lock, flags); + + header->type = type; + header->cmd = cmd; + header->len = obuf_len; + if (obuf) + memcpy(header->data, obuf, obuf_len); + + header->flags = LJCA_CMPL_FLAG | (ack ? LJCA_ACK_FLAG : 0); + + adap->ex_buf = ibuf; + adap->ex_buf_len = ibuf_len; + adap->actual_length = 0; + + spin_unlock_irqrestore(&adap->lock, flags); + + reinit_completion(&adap->cmd_completion); + + ret = usb_autopm_get_interface(adap->intf); + if (ret < 0) + goto out; + + ret = usb_bulk_msg(adap->usb_dev, adap->tx_pipe, header, + msg_len, &transferred, LJCA_WRITE_TIMEOUT_MS); + + usb_autopm_put_interface(adap->intf); + + if (ret < 0) + goto out; + if (transferred != msg_len) { + ret = -EIO; + goto out; + } + + if (ack) { + ret = wait_for_completion_timeout(&adap->cmd_completion, + timeout); + if (!ret) { + ret = -ETIMEDOUT; + goto out; + } + } + ret = adap->actual_length; + +out: + spin_lock_irqsave(&adap->lock, flags); + adap->ex_buf = NULL; + adap->ex_buf_len = 0; + + memset(header, 0, sizeof(*header)); + spin_unlock_irqrestore(&adap->lock, flags); + + mutex_unlock(&adap->mutex); + + return ret; +} + +int ljca_transfer(struct ljca_client *client, u8 cmd, const u8 *obuf, + u8 obuf_len, u8 *ibuf, u8 ibuf_len) +{ + return ljca_send(client->adapter, client->type, cmd, + obuf, obuf_len, ibuf, ibuf_len, true, + LJCA_WRITE_ACK_TIMEOUT_MS); +} +EXPORT_SYMBOL_NS_GPL(ljca_transfer, LJCA); + +int ljca_transfer_noack(struct ljca_client *client, u8 cmd, const u8 *obuf, + u8 obuf_len) +{ + return ljca_send(client->adapter, client->type, cmd, obuf, + obuf_len, NULL, 0, false, LJCA_WRITE_ACK_TIMEOUT_MS); +} +EXPORT_SYMBOL_NS_GPL(ljca_transfer_noack, LJCA); + +int ljca_register_event_cb(struct ljca_client *client, ljca_event_cb_t event_cb, + void *context) +{ + unsigned long flags; + + if (!event_cb) + return -EINVAL; + + spin_lock_irqsave(&client->event_cb_lock, flags); + + if (client->event_cb) { + spin_unlock_irqrestore(&client->event_cb_lock, flags); + return -EALREADY; + } + + client->event_cb = event_cb; + client->context = context; + + spin_unlock_irqrestore(&client->event_cb_lock, flags); + + return 0; +} +EXPORT_SYMBOL_NS_GPL(ljca_register_event_cb, LJCA); + +void ljca_unregister_event_cb(struct ljca_client *client) +{ + unsigned long flags; + + spin_lock_irqsave(&client->event_cb_lock, flags); + + client->event_cb = NULL; + client->context = NULL; + + spin_unlock_irqrestore(&client->event_cb_lock, flags); +} +EXPORT_SYMBOL_NS_GPL(ljca_unregister_event_cb, LJCA); + +static int ljca_match_device_ids(struct acpi_device *adev, void *data) +{ + struct ljca_match_ids_walk_data *wd = data; + const char *uid = acpi_device_uid(adev); + + if (acpi_match_device_ids(adev, wd->ids)) + return 0; + + if (!wd->uid) + goto match; + + if (!uid) + /* + * Some DSDTs have only one ACPI companion for the two I2C + * controllers and they don't set a UID at all (e.g. Dell + * Latitude 9420). On these platforms only the first I2C + * controller is used, so if a HID match has no UID we use + * "0" as the UID and assign ACPI companion to the first + * I2C controller. + */ + uid = "0"; + else + uid = strchr(uid, wd->uid[0]); + + if (!uid || strcmp(uid, wd->uid)) + return 0; + +match: + wd->adev = adev; + + return 1; +} + +/* bind auxiliary device to acpi device */ +static void ljca_auxdev_acpi_bind(struct ljca_adapter *adap, + struct auxiliary_device *auxdev, + u64 adr, u8 id) +{ + struct ljca_match_ids_walk_data wd = { 0 }; + struct acpi_device *parent, *adev; + struct device *dev = adap->dev; + char uid[4]; + + parent = ACPI_COMPANION(dev); + if (!parent) + return; + + /* + * get auxdev ACPI handle from the ACPI device directly + * under the parent that matches _ADR. + */ + adev = acpi_find_child_device(parent, adr, false); + if (adev) { + ACPI_COMPANION_SET(&auxdev->dev, adev); + return; + } + + /* + * _ADR is a grey area in the ACPI specification, some + * platforms use _HID to distinguish children devices. + */ + switch (adr) { + case LJCA_GPIO_ACPI_ADR: + wd.ids = ljca_gpio_hids; + break; + case LJCA_I2C1_ACPI_ADR: + case LJCA_I2C2_ACPI_ADR: + snprintf(uid, sizeof(uid), "%d", id); + wd.uid = uid; + wd.ids = ljca_i2c_hids; + break; + case LJCA_SPI1_ACPI_ADR: + case LJCA_SPI2_ACPI_ADR: + wd.ids = ljca_spi_hids; + break; + default: + dev_warn(dev, "unsupported _ADR\n"); + return; + } + + acpi_dev_for_each_child(parent, ljca_match_device_ids, &wd); + if (wd.adev) { + ACPI_COMPANION_SET(&auxdev->dev, wd.adev); + return; + } + + parent = ACPI_COMPANION(dev->parent->parent); + if (!parent) + return; + + acpi_dev_for_each_child(parent, ljca_match_device_ids, &wd); + if (wd.adev) + ACPI_COMPANION_SET(&auxdev->dev, wd.adev); +} + +static void ljca_auxdev_release(struct device *dev) +{ + struct auxiliary_device *auxdev = to_auxiliary_dev(dev); + + kfree(auxdev->dev.platform_data); +} + +static int ljca_new_client_device(struct ljca_adapter *adap, u8 type, u8 id, + char *name, void *data, u64 adr) +{ + struct auxiliary_device *auxdev; + struct ljca_client *client; + int ret; + + client = kzalloc(sizeof *client, GFP_KERNEL); + if (!client) + return -ENOMEM; + + client->type = type; + client->id = id; + client->adapter = adap; + spin_lock_init(&client->event_cb_lock); + + auxdev = &client->auxdev; + auxdev->name = name; + auxdev->id = id; + + auxdev->dev.parent = adap->dev; + auxdev->dev.platform_data = data; + auxdev->dev.release = ljca_auxdev_release; + + ret = auxiliary_device_init(auxdev); + if (ret) + goto err_free; + + ljca_auxdev_acpi_bind(adap, auxdev, adr, id); + + ret = auxiliary_device_add(auxdev); + if (ret) + goto err_uninit; + + list_add_tail(&client->link, &adap->client_list); + + return 0; + +err_uninit: + auxiliary_device_uninit(auxdev); + +err_free: + kfree(client); + + return ret; +} + +static int ljca_enumerate_gpio(struct ljca_adapter *adap) +{ + u32 valid_pin[LJCA_MAX_GPIO_NUM / BITS_PER_TYPE(u32)]; + struct ljca_gpio_descriptor *desc; + struct ljca_gpio_info *gpio_info; + u8 buf[LJCA_MAX_PAYLOAD_SIZE]; + int ret, gpio_num; + unsigned int i; + + ret = ljca_send(adap, LJCA_CLIENT_MNG, LJCA_MNG_ENUM_GPIO, NULL, 0, buf, + sizeof(buf), true, LJCA_ENUM_CLIENT_TIMEOUT_MS); + if (ret < 0) + return ret; + + /* check firmware response */ + desc = (struct ljca_gpio_descriptor *)buf; + if (ret != struct_size(desc, bank_desc, desc->bank_num)) + return -EINVAL; + + gpio_num = desc->pins_per_bank * desc->bank_num; + if (gpio_num > LJCA_MAX_GPIO_NUM) + return -EINVAL; + + /* construct platform data */ + gpio_info = kzalloc(sizeof *gpio_info, GFP_KERNEL); + if (!gpio_info) + return -ENOMEM; + gpio_info->num = gpio_num; + + for (i = 0; i < desc->bank_num; i++) + valid_pin[i] = get_unaligned_le32(&desc->bank_desc[i].valid_pins); + bitmap_from_arr32(gpio_info->valid_pin_map, valid_pin, gpio_num); + + ret = ljca_new_client_device(adap, LJCA_CLIENT_GPIO, 0, "ljca-gpio", + gpio_info, LJCA_GPIO_ACPI_ADR); + if (ret) + kfree(gpio_info); + + return ret; +} + +static int ljca_enumerate_i2c(struct ljca_adapter *adap) +{ + struct ljca_i2c_descriptor *desc; + struct ljca_i2c_info *i2c_info; + u8 buf[LJCA_MAX_PAYLOAD_SIZE]; + unsigned int i; + int ret; + + ret = ljca_send(adap, LJCA_CLIENT_MNG, LJCA_MNG_ENUM_I2C, NULL, 0, buf, + sizeof(buf), true, LJCA_ENUM_CLIENT_TIMEOUT_MS); + if (ret < 0) + return ret; + + /* check firmware response */ + desc = (struct ljca_i2c_descriptor *)buf; + if (ret != struct_size(desc, info, desc->num)) + return -EINVAL; + + for (i = 0; i < desc->num; i++) { + /* construct platform data */ + i2c_info = kzalloc(sizeof *i2c_info, GFP_KERNEL); + if (!i2c_info) + return -ENOMEM; + + i2c_info->id = desc->info[i].id; + i2c_info->capacity = desc->info[i].capacity; + i2c_info->intr_pin = desc->info[i].intr_pin; + + ret = ljca_new_client_device(adap, LJCA_CLIENT_I2C, i, + "ljca-i2c", i2c_info, + LJCA_I2C1_ACPI_ADR + i); + if (ret) { + kfree(i2c_info); + return ret; + } + } + + return 0; +} + +static int ljca_enumerate_spi(struct ljca_adapter *adap) +{ + struct ljca_spi_descriptor *desc; + struct ljca_spi_info *spi_info; + u8 buf[LJCA_MAX_PAYLOAD_SIZE]; + unsigned int i; + int ret; + + ret = ljca_send(adap, LJCA_CLIENT_MNG, LJCA_MNG_ENUM_SPI, NULL, 0, buf, + sizeof(buf), true, LJCA_ENUM_CLIENT_TIMEOUT_MS); + if (ret < 0) + return ret; + + /* check firmware response */ + desc = (struct ljca_spi_descriptor *)buf; + if (ret != struct_size(desc, info, desc->num)) + return -EINVAL; + + for (i = 0; i < desc->num; i++) { + /* construct platform data */ + spi_info = kzalloc(sizeof *spi_info, GFP_KERNEL); + if (!spi_info) + return -ENOMEM; + + spi_info->id = desc->info[i].id; + spi_info->capacity = desc->info[i].capacity; + + ret = ljca_new_client_device(adap, LJCA_CLIENT_SPI, i, + "ljca-spi", spi_info, + LJCA_SPI1_ACPI_ADR + i); + if (ret) { + kfree(spi_info); + return ret; + } + } + + return 0; +} + +static int ljca_reset_handshake(struct ljca_adapter *adap) +{ + __le32 reset_id = cpu_to_le32(adap->reset_id); + __le32 reset_id_ret = 0; + int ret; + + adap->reset_id++; + + ret = ljca_send(adap, LJCA_CLIENT_MNG, LJCA_MNG_RESET, (u8 *)&reset_id, + sizeof(__le32), (u8 *)&reset_id_ret, sizeof(__le32), + true, LJCA_WRITE_ACK_TIMEOUT_MS); + if (ret < 0) + return ret; + + if (reset_id_ret != reset_id) + return -EINVAL; + + return 0; +} + +static int ljca_enumerate_clients(struct ljca_adapter *adap) +{ + struct ljca_client *client, *next; + int ret; + + ret = ljca_reset_handshake(adap); + if (ret) + goto err_kill; + + ret = ljca_enumerate_gpio(adap); + if (ret) { + dev_err(adap->dev, "enumerate GPIO error\n"); + goto err_kill; + } + + ret = ljca_enumerate_i2c(adap); + if (ret) { + dev_err(adap->dev, "enumerate I2C error\n"); + goto err_kill; + } + + ret = ljca_enumerate_spi(adap); + if (ret) { + dev_err(adap->dev, "enumerate SPI error\n"); + goto err_kill; + } + + return 0; + +err_kill: + adap->disconnect = true; + + usb_kill_urb(adap->rx_urb); + + list_for_each_entry_safe_reverse(client, next, &adap->client_list, link) { + auxiliary_device_delete(&client->auxdev); + auxiliary_device_uninit(&client->auxdev); + + list_del_init(&client->link); + kfree(client); + } + + return ret; +} + +static int ljca_probe(struct usb_interface *interface, + const struct usb_device_id *id) +{ + struct usb_device *usb_dev = interface_to_usbdev(interface); + struct usb_host_interface *alt = interface->cur_altsetting; + struct usb_endpoint_descriptor *ep_in, *ep_out; + struct device *dev = &interface->dev; + struct ljca_adapter *adap; + int ret; + + adap = devm_kzalloc(dev, sizeof(*adap), GFP_KERNEL); + if (!adap) + return -ENOMEM; + + /* separate tx buffer allocation for alignment */ + adap->tx_buf = devm_kzalloc(dev, LJCA_MAX_PACKET_SIZE, GFP_KERNEL); + if (!adap->tx_buf) + return -ENOMEM; + adap->tx_buf_len = LJCA_MAX_PACKET_SIZE; + + mutex_init(&adap->mutex); + spin_lock_init(&adap->lock); + init_completion(&adap->cmd_completion); + INIT_LIST_HEAD(&adap->client_list); + + adap->intf = usb_get_intf(interface); + adap->usb_dev = usb_dev; + adap->dev = dev; + + /* + * find the first bulk in and out endpoints. + * ignore any others. + */ + ret = usb_find_common_endpoints(alt, &ep_in, &ep_out, NULL, NULL); + if (ret) { + dev_err(dev, "bulk endpoints not found\n"); + goto err_put; + } + adap->rx_pipe = usb_rcvbulkpipe(usb_dev, usb_endpoint_num(ep_in)); + adap->tx_pipe = usb_sndbulkpipe(usb_dev, usb_endpoint_num(ep_out)); + + /* setup rx buffer */ + adap->rx_len = usb_endpoint_maxp(ep_in); + adap->rx_buf = devm_kzalloc(dev, adap->rx_len, GFP_KERNEL); + if (!adap->rx_buf) { + ret = -ENOMEM; + goto err_put; + } + + /* alloc rx urb */ + adap->rx_urb = usb_alloc_urb(0, GFP_KERNEL); + if (!adap->rx_urb) { + ret = -ENOMEM; + goto err_put; + } + usb_fill_bulk_urb(adap->rx_urb, usb_dev, adap->rx_pipe, + adap->rx_buf, adap->rx_len, ljca_recv, adap); + + usb_set_intfdata(interface, adap); + + /* submit rx urb before enumerate clients */ + ret = usb_submit_urb(adap->rx_urb, GFP_KERNEL); + if (ret) { + dev_err(dev, "submit rx urb failed: %d\n", ret); + goto err_free; + } + + ret = ljca_enumerate_clients(adap); + if (ret) + goto err_free; + + usb_enable_autosuspend(usb_dev); + + return 0; + +err_free: + usb_free_urb(adap->rx_urb); + +err_put: + usb_put_intf(adap->intf); + + mutex_destroy(&adap->mutex); + + return ret; +} + +static void ljca_disconnect(struct usb_interface *interface) +{ + struct ljca_adapter *adap = usb_get_intfdata(interface); + struct ljca_client *client, *next; + + adap->disconnect = true; + + usb_kill_urb(adap->rx_urb); + + list_for_each_entry_safe_reverse(client, next, &adap->client_list, link) { + auxiliary_device_delete(&client->auxdev); + auxiliary_device_uninit(&client->auxdev); + + list_del_init(&client->link); + kfree(client); + } + + usb_free_urb(adap->rx_urb); + + usb_put_intf(adap->intf); + + mutex_destroy(&adap->mutex); +} + +static int ljca_suspend(struct usb_interface *interface, pm_message_t message) +{ + struct ljca_adapter *adap = usb_get_intfdata(interface); + + usb_kill_urb(adap->rx_urb); + + return 0; +} + +static int ljca_resume(struct usb_interface *interface) +{ + struct ljca_adapter *adap = usb_get_intfdata(interface); + + return usb_submit_urb(adap->rx_urb, GFP_KERNEL); +} + +static const struct usb_device_id ljca_table[] = { + { USB_DEVICE(0x8086, 0x0b63) }, + { /* sentinel */ } +}; +MODULE_DEVICE_TABLE(usb, ljca_table); + +static struct usb_driver ljca_driver = { + .name = "ljca", + .id_table = ljca_table, + .probe = ljca_probe, + .disconnect = ljca_disconnect, + .suspend = ljca_suspend, + .resume = ljca_resume, + .supports_autosuspend = 1, +}; +module_usb_driver(ljca_driver); + +MODULE_AUTHOR("Wentong Wu "); +MODULE_AUTHOR("Zhifeng Wang "); +MODULE_AUTHOR("Lixu Zhang "); +MODULE_DESCRIPTION("Intel La Jolla Cove Adapter USB driver"); +MODULE_LICENSE("GPL"); diff --git a/include/linux/usb/ljca.h b/include/linux/usb/ljca.h new file mode 100644 index 000000000000..47661feda96c --- /dev/null +++ b/include/linux/usb/ljca.h @@ -0,0 +1,145 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2023, Intel Corporation. All rights reserved. + */ +#ifndef _LINUX_USB_LJCA_H_ +#define _LINUX_USB_LJCA_H_ + +#include +#include +#include +#include + +#define LJCA_MAX_GPIO_NUM 64 + +#define auxiliary_dev_to_ljca_client(auxiliary_dev) \ + container_of(auxiliary_dev, struct ljca_client, auxdev) + +struct ljca_adapter; + +/** + * typedef ljca_event_cb_t - event callback function signature + * + * @context: the execution context of who registered this callback + * @cmd: the command from device for this event + * @evt_data: the event data payload + * @len: the event data payload length + * + * The callback function is called in interrupt context and the data payload is + * only valid during the call. If the user needs later access of the data, it + * must copy it. + */ +typedef void (*ljca_event_cb_t)(void *context, u8 cmd, const void *evt_data, int len); + +/** + * struct ljca_client - represent a ljca client device + * + * @type: ljca client type + * @id: ljca client id within same client type + * @link: ljca client on the same ljca adapter + * @auxdev: auxiliary device object + * @adapter: ljca adapter the ljca client sit on + * @context: the execution context of the event callback + * @event_cb: ljca client driver register this callback to get + * firmware asynchronous rx buffer pending notifications + * @event_cb_lock: spinlock to protect event callback + */ +struct ljca_client { + u8 type; + u8 id; + struct list_head link; + struct auxiliary_device auxdev; + struct ljca_adapter *adapter; + + void *context; + ljca_event_cb_t event_cb; + /* lock to protect event_cb */ + spinlock_t event_cb_lock; +}; + +/** + * struct ljca_gpio_info - ljca gpio client device info + * + * @num: ljca gpio client device pin number + * @valid_pin_map: ljca gpio client device valid pin mapping + */ +struct ljca_gpio_info { + unsigned int num; + DECLARE_BITMAP(valid_pin_map, LJCA_MAX_GPIO_NUM); +}; + +/** + * struct ljca_i2c_info - ljca i2c client device info + * + * @id: ljca i2c client device identification number + * @capacity: ljca i2c client device capacity + * @intr_pin: ljca i2c client device interrupt pin number if exists + */ +struct ljca_i2c_info { + u8 id; + u8 capacity; + u8 intr_pin; +}; + +/** + * struct ljca_spi_info - ljca spi client device info + * + * @id: ljca spi client device identification number + * @capacity: ljca spi client device capacity + */ +struct ljca_spi_info { + u8 id; + u8 capacity; +}; + +/** + * ljca_register_event_cb - register a callback function to receive events + * + * @client: ljca client device + * @event_cb: callback function + * @context: execution context of event callback + * + * Return: 0 in case of success, negative value in case of error + */ +int ljca_register_event_cb(struct ljca_client *client, ljca_event_cb_t event_cb, void *context); + +/** + * ljca_unregister_event_cb - unregister the callback function for an event + * + * @client: ljca client device + */ +void ljca_unregister_event_cb(struct ljca_client *client); + +/** + * ljca_transfer - issue a LJCA command and wait for a response + * + * @client: ljca client device + * @cmd: the command to be sent to the device + * @obuf: the buffer to be sent to the device; it can be NULL if the user + * doesn't need to transmit data with this command + * @obuf_len: the size of the buffer to be sent to the device; it should + * be 0 when obuf is NULL + * @ibuf: any data associated with the response will be copied here; it can be + * NULL if the user doesn't need the response data + * @ibuf_len: must be initialized to the input buffer size + * + * Return: the actual length of response data for success, negative value for errors + */ +int ljca_transfer(struct ljca_client *client, u8 cmd, const u8 *obuf, + u8 obuf_len, u8 *ibuf, u8 ibuf_len); + +/** + * ljca_transfer_noack - issue a LJCA command without a response + * + * @client: ljca client device + * @cmd: the command to be sent to the device + * @obuf: the buffer to be sent to the device; it can be NULL if the user + * doesn't need to transmit data with this command + * @obuf_len: the size of the buffer to be sent to the device + * + * Return: 0 for success, negative value for errors + */ +int ljca_transfer_noack(struct ljca_client *client, u8 cmd, const u8 *obuf, + u8 obuf_len); + +#endif -- cgit v1.2.3 From c4dd854f740c21ae8dd9903fc67969c5497cb14b Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Mon, 25 Sep 2023 17:28:39 +0100 Subject: cpu-hotplug: Provide prototypes for arch CPU registration Provide common prototypes for arch_register_cpu() and arch_unregister_cpu(). These are called by acpi_processor.c, with weak versions, so the prototype for this is already set. It is generally not necessary for function prototypes to be conditional on preprocessor macros. Some architectures (e.g. Loongarch) are missing the prototype for this, and rather than add it to Loongarch's asm/cpu.h, do the job once for everyone. Since this covers everyone, remove the now unnecessary prototypes in asm/cpu.h, and therefore remove the 'static' from one of ia64's arch_register_cpu() definitions. [ tglx: Bring back the ia64 part and remove the ACPI prototypes ] Signed-off-by: Russell King (Oracle) Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/E1qkoRr-0088Q8-Da@rmk-PC.armlinux.org.uk --- arch/ia64/include/asm/cpu.h | 5 ----- arch/ia64/kernel/topology.c | 2 +- arch/x86/include/asm/cpu.h | 2 -- arch/x86/kernel/topology.c | 2 +- drivers/acpi/acpi_processor.c | 1 + include/acpi/processor.h | 5 ----- include/linux/cpu.h | 2 ++ 7 files changed, 5 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/arch/ia64/include/asm/cpu.h b/arch/ia64/include/asm/cpu.h index db125df9e088..642d71675ddb 100644 --- a/arch/ia64/include/asm/cpu.h +++ b/arch/ia64/include/asm/cpu.h @@ -15,9 +15,4 @@ DECLARE_PER_CPU(struct ia64_cpu, cpu_devices); DECLARE_PER_CPU(int, cpu_state); -#ifdef CONFIG_HOTPLUG_CPU -extern int arch_register_cpu(int num); -extern void arch_unregister_cpu(int); -#endif - #endif /* _ASM_IA64_CPU_H_ */ diff --git a/arch/ia64/kernel/topology.c b/arch/ia64/kernel/topology.c index 94a848b06f15..741863a187a6 100644 --- a/arch/ia64/kernel/topology.c +++ b/arch/ia64/kernel/topology.c @@ -59,7 +59,7 @@ void __ref arch_unregister_cpu(int num) } EXPORT_SYMBOL(arch_unregister_cpu); #else -static int __init arch_register_cpu(int num) +int __init arch_register_cpu(int num) { return register_cpu(&sysfs_cpus[num].cpu, num); } diff --git a/arch/x86/include/asm/cpu.h b/arch/x86/include/asm/cpu.h index 3a233ebff712..25050d953eee 100644 --- a/arch/x86/include/asm/cpu.h +++ b/arch/x86/include/asm/cpu.h @@ -28,8 +28,6 @@ struct x86_cpu { }; #ifdef CONFIG_HOTPLUG_CPU -extern int arch_register_cpu(int num); -extern void arch_unregister_cpu(int); extern void soft_restart_cpu(void); #endif diff --git a/arch/x86/kernel/topology.c b/arch/x86/kernel/topology.c index ca004e2e4469..0bab03130033 100644 --- a/arch/x86/kernel/topology.c +++ b/arch/x86/kernel/topology.c @@ -54,7 +54,7 @@ void arch_unregister_cpu(int num) EXPORT_SYMBOL(arch_unregister_cpu); #else /* CONFIG_HOTPLUG_CPU */ -static int __init arch_register_cpu(int num) +int __init arch_register_cpu(int num) { return register_cpu(&per_cpu(cpu_devices, num).cpu, num); } diff --git a/drivers/acpi/acpi_processor.c b/drivers/acpi/acpi_processor.c index c711db8a9c33..0f5218e361df 100644 --- a/drivers/acpi/acpi_processor.c +++ b/drivers/acpi/acpi_processor.c @@ -12,6 +12,7 @@ #define pr_fmt(fmt) "ACPI: " fmt #include +#include #include #include #include diff --git a/include/acpi/processor.h b/include/acpi/processor.h index 94181fe9780a..3f34ebb27525 100644 --- a/include/acpi/processor.h +++ b/include/acpi/processor.h @@ -465,9 +465,4 @@ extern int acpi_processor_ffh_lpi_probe(unsigned int cpu); extern int acpi_processor_ffh_lpi_enter(struct acpi_lpi_state *lpi); #endif -#ifdef CONFIG_ACPI_HOTPLUG_CPU -extern int arch_register_cpu(int cpu); -extern void arch_unregister_cpu(int cpu); -#endif - #endif diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 0abd60a7987b..eb768a866fe3 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -80,6 +80,8 @@ extern __printf(4, 5) struct device *cpu_device_create(struct device *parent, void *drvdata, const struct attribute_group **groups, const char *fmt, ...); +extern int arch_register_cpu(int cpu); +extern void arch_unregister_cpu(int cpu); #ifdef CONFIG_HOTPLUG_CPU extern void unregister_cpu(struct cpu *cpu); extern ssize_t arch_cpu_probe(const char *, size_t); -- cgit v1.2.3 From 5987279373446e97206a7078b2229446ba871ea0 Mon Sep 17 00:00:00 2001 From: David Lechner Date: Thu, 5 Oct 2023 19:50:29 -0500 Subject: iio: event: add optional event label support This adds a new optional field to struct iio_info to allow drivers to specify a label for the event. This is useful for cases where there are many events or the event attribute name is not descriptive enough or where an event doesn't have any other attributes. The implementation is based on the existing label support for channels. So either all events of a device have a label attribute or none do. Signed-off-by: David Lechner Link: https://lore.kernel.org/r/20231005-ad2s1210-mainline-v4-12-ec00746840fc@baylibre.com Signed-off-by: Jonathan Cameron --- drivers/iio/industrialio-event.c | 55 ++++++++++++++++++++++++++++++++++++++++ include/linux/iio/iio.h | 8 ++++++ 2 files changed, 63 insertions(+) (limited to 'include/linux') diff --git a/drivers/iio/industrialio-event.c b/drivers/iio/industrialio-event.c index 19f7a91157ee..910c1f14abd5 100644 --- a/drivers/iio/industrialio-event.c +++ b/drivers/iio/industrialio-event.c @@ -355,6 +355,21 @@ static ssize_t iio_ev_value_store(struct device *dev, return len; } +static ssize_t iio_ev_label_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct iio_dev *indio_dev = dev_to_iio_dev(dev); + struct iio_dev_attr *this_attr = to_iio_dev_attr(attr); + + if (indio_dev->info->read_event_label) + return indio_dev->info->read_event_label(indio_dev, + this_attr->c, iio_ev_attr_type(this_attr), + iio_ev_attr_dir(this_attr), buf); + + return -EINVAL; +} + static int iio_device_add_event(struct iio_dev *indio_dev, const struct iio_chan_spec *chan, unsigned int spec_index, enum iio_event_type type, enum iio_event_direction dir, @@ -411,6 +426,41 @@ static int iio_device_add_event(struct iio_dev *indio_dev, return attrcount; } +static int iio_device_add_event_label(struct iio_dev *indio_dev, + const struct iio_chan_spec *chan, + unsigned int spec_index, + enum iio_event_type type, + enum iio_event_direction dir) +{ + struct iio_dev_opaque *iio_dev_opaque = to_iio_dev_opaque(indio_dev); + char *postfix; + int ret; + + if (!indio_dev->info->read_event_label) + return 0; + + if (dir != IIO_EV_DIR_NONE) + postfix = kasprintf(GFP_KERNEL, "%s_%s_label", + iio_ev_type_text[type], + iio_ev_dir_text[dir]); + else + postfix = kasprintf(GFP_KERNEL, "%s_label", + iio_ev_type_text[type]); + if (postfix == NULL) + return -ENOMEM; + + ret = __iio_add_chan_devattr(postfix, chan, &iio_ev_label_show, NULL, + spec_index, IIO_SEPARATE, &indio_dev->dev, NULL, + &iio_dev_opaque->event_interface->dev_attr_list); + + kfree(postfix); + + if (ret < 0) + return ret; + + return 1; +} + static int iio_device_add_event_sysfs(struct iio_dev *indio_dev, struct iio_chan_spec const *chan) { @@ -448,6 +498,11 @@ static int iio_device_add_event_sysfs(struct iio_dev *indio_dev, if (ret < 0) return ret; attrcount += ret; + + ret = iio_device_add_event_label(indio_dev, chan, i, type, dir); + if (ret < 0) + return ret; + attrcount += ret; } ret = attrcount; return ret; diff --git a/include/linux/iio/iio.h b/include/linux/iio/iio.h index 7bfa1b9bc8a2..d0ce3b71106a 100644 --- a/include/linux/iio/iio.h +++ b/include/linux/iio/iio.h @@ -427,6 +427,8 @@ struct iio_trigger; /* forward declaration */ * @write_event_config: set if the event is enabled. * @read_event_value: read a configuration value associated with the event. * @write_event_value: write a configuration value for the event. + * @read_event_label: function to request label name for a specified label, + * for better event identification. * @validate_trigger: function to validate the trigger when the * current trigger gets changed. * @update_scan_mode: function to configure device and scan buffer when @@ -511,6 +513,12 @@ struct iio_info { enum iio_event_direction dir, enum iio_event_info info, int val, int val2); + int (*read_event_label)(struct iio_dev *indio_dev, + struct iio_chan_spec const *chan, + enum iio_event_type type, + enum iio_event_direction dir, + char *label); + int (*validate_trigger)(struct iio_dev *indio_dev, struct iio_trigger *trig); int (*update_scan_mode)(struct iio_dev *indio_dev, -- cgit v1.2.3 From 21ca59b365c091d583f36ac753eaa8baf947be6f Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Thu, 28 Oct 2021 12:31:14 +0200 Subject: binfmt_misc: enable sandboxed mounts Enable unprivileged sandboxes to create their own binfmt_misc mounts. This is based on Laurent's work in [1] but has been significantly reworked to fix various issues we identified in earlier versions. While binfmt_misc can currently only be mounted in the initial user namespace, binary types registered in this binfmt_misc instance are available to all sandboxes (Either by having them installed in the sandbox or by registering the binary type with the F flag causing the interpreter to be opened right away). So binfmt_misc binary types are already delegated to sandboxes implicitly. However, while a sandbox has access to all registered binary types in binfmt_misc a sandbox cannot currently register its own binary types in binfmt_misc. This has prevented various use-cases some of which were already outlined in [1] but we have a range of issues associated with this (cf. [3]-[5] below which are just a small sample). Extend binfmt_misc to be mountable in non-initial user namespaces. Similar to other filesystem such as nfsd, mqueue, and sunrpc we use keyed superblock management. The key determines whether we need to create a new superblock or can reuse an already existing one. We use the user namespace of the mount as key. This means a new binfmt_misc superblock is created once per user namespace creation. Subsequent mounts of binfmt_misc in the same user namespace will mount the same binfmt_misc instance. We explicitly do not create a new binfmt_misc superblock on every binfmt_misc mount as the semantics for load_misc_binary() line up with the keying model. This also allows us to retrieve the relevant binfmt_misc instance based on the caller's user namespace which can be done in a simple (bounded to 32 levels) loop. Similar to the current binfmt_misc semantics allowing access to the binary types in the initial binfmt_misc instance we do allow sandboxes access to their parent's binfmt_misc mounts if they do not have created a separate binfmt_misc instance. Overall, this will unblock the use-cases mentioned below and in general will also allow to support and harden execution of another architecture's binaries in tight sandboxes. For instance, using the unshare binary it possible to start a chroot of another architecture and configure the binfmt_misc interpreter without being root to run the binaries in this chroot and without requiring the host to modify its binary type handlers. Henning had already posted a few experiments in the cover letter at [1]. But here's an additional example where an unprivileged container registers qemu-user-static binary handlers for various binary types in its separate binfmt_misc mount and is then seamlessly able to start containers with a different architecture without affecting the host: root [lxc monitor] /var/snap/lxd/common/lxd/containers f1 1000000 \_ /sbin/init 1000000 \_ /lib/systemd/systemd-journald 1000000 \_ /lib/systemd/systemd-udevd 1000100 \_ /lib/systemd/systemd-networkd 1000101 \_ /lib/systemd/systemd-resolved 1000000 \_ /usr/sbin/cron -f 1000103 \_ /usr/bin/dbus-daemon --system --address=systemd: --nofork --nopidfile --systemd-activation --syslog-only 1000000 \_ /usr/bin/python3 /usr/bin/networkd-dispatcher --run-startup-triggers 1000104 \_ /usr/sbin/rsyslogd -n -iNONE 1000000 \_ /lib/systemd/systemd-logind 1000000 \_ /sbin/agetty -o -p -- \u --noclear --keep-baud console 115200,38400,9600 vt220 1000107 \_ dnsmasq --conf-file=/dev/null -u lxc-dnsmasq --strict-order --bind-interfaces --pid-file=/run/lxc/dnsmasq.pid --liste 1000000 \_ [lxc monitor] /var/lib/lxc f1-s390x 1100000 \_ /usr/bin/qemu-s390x-static /sbin/init 1100000 \_ /usr/bin/qemu-s390x-static /lib/systemd/systemd-journald 1100000 \_ /usr/bin/qemu-s390x-static /usr/sbin/cron -f 1100103 \_ /usr/bin/qemu-s390x-static /usr/bin/dbus-daemon --system --address=systemd: --nofork --nopidfile --systemd-ac 1100000 \_ /usr/bin/qemu-s390x-static /usr/bin/python3 /usr/bin/networkd-dispatcher --run-startup-triggers 1100104 \_ /usr/bin/qemu-s390x-static /usr/sbin/rsyslogd -n -iNONE 1100000 \_ /usr/bin/qemu-s390x-static /lib/systemd/systemd-logind 1100000 \_ /usr/bin/qemu-s390x-static /sbin/agetty -o -p -- \u --noclear --keep-baud console 115200,38400,9600 vt220 1100000 \_ /usr/bin/qemu-s390x-static /sbin/agetty -o -p -- \u --noclear --keep-baud pts/0 115200,38400,9600 vt220 1100000 \_ /usr/bin/qemu-s390x-static /sbin/agetty -o -p -- \u --noclear --keep-baud pts/1 115200,38400,9600 vt220 1100000 \_ /usr/bin/qemu-s390x-static /sbin/agetty -o -p -- \u --noclear --keep-baud pts/2 115200,38400,9600 vt220 1100000 \_ /usr/bin/qemu-s390x-static /sbin/agetty -o -p -- \u --noclear --keep-baud pts/3 115200,38400,9600 vt220 1100000 \_ /usr/bin/qemu-s390x-static /lib/systemd/systemd-udevd [1]: https://lore.kernel.org/all/20191216091220.465626-1-laurent@vivier.eu [2]: https://discuss.linuxcontainers.org/t/binfmt-misc-permission-denied [3]: https://discuss.linuxcontainers.org/t/lxd-binfmt-support-for-qemu-static-interpreters [4]: https://discuss.linuxcontainers.org/t/3-1-0-binfmt-support-service-in-unprivileged-guest-requires-write-access-on-hosts-proc-sys-fs-binfmt-misc [5]: https://discuss.linuxcontainers.org/t/qemu-user-static-not-working-4-11 Link: https://lore.kernel.org/r/20191216091220.465626-2-laurent@vivier.eu (origin) Link: https://lore.kernel.org/r/20211028103114.2849140-2-brauner@kernel.org (v1) Cc: Sargun Dhillon Cc: Serge Hallyn Cc: Jann Horn Cc: Henning Schild Cc: Andrei Vagin Cc: Al Viro Cc: Laurent Vivier Cc: linux-fsdevel@vger.kernel.org Signed-off-by: Laurent Vivier Signed-off-by: Christian Brauner Signed-off-by: Christian Brauner Signed-off-by: Kees Cook --- /* v2 */ - Serge Hallyn : - Use GFP_KERNEL_ACCOUNT for userspace triggered allocations when a new binary type handler is registered. - Christian Brauner : - Switch authorship to me. I refused to do that earlier even though Laurent said I should do so because I think it's genuinely bad form. But by now I have changed so many things that it'd be unfair to blame Laurent for any potential bugs in here. - Add more comments that explain what's going on. - Rename functions while changing them to better reflect what they are doing to make the code easier to understand. - In the first version when a specific binary type handler was removed either through a write to the entry's file or all binary type handlers were removed by a write to the binfmt_misc mount's status file all cleanup work happened during inode eviction. That includes removal of the relevant entries from entry list. While that works fine I disliked that model after thinking about it for a bit. Because it means that there was a window were someone has already removed a or all binary handlers but they could still be safely reached from load_misc_binary() when it has managed to take the read_lock() on the entries list while inode eviction was already happening. Again, that perfectly benign but it's cleaner to remove the binary handler from the list immediately meaning that ones the write to then entry's file or the binfmt_misc status file returns the binary type cannot be executed anymore. That gives stronger guarantees to the user. --- fs/binfmt_misc.c | 202 ++++++++++++++++++++++++++++++++++------- include/linux/binfmts.h | 10 ++ include/linux/user_namespace.h | 8 ++ kernel/user.c | 13 +++ kernel/user_namespace.c | 3 + 5 files changed, 202 insertions(+), 34 deletions(-) (limited to 'include/linux') diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c index cf5ed5cd4102..deacc105119d 100644 --- a/fs/binfmt_misc.c +++ b/fs/binfmt_misc.c @@ -40,9 +40,6 @@ enum { VERBOSE_STATUS = 1 /* make it zero to save 400 bytes kernel memory */ }; -static LIST_HEAD(entries); -static int enabled = 1; - enum {Enabled, Magic}; #define MISC_FMT_PRESERVE_ARGV0 (1UL << 31) #define MISC_FMT_OPEN_BINARY (1UL << 30) @@ -63,7 +60,6 @@ typedef struct { refcount_t users; /* sync removal with load_misc_binary() */ } Node; -static DEFINE_RWLOCK(entries_lock); static struct file_system_type bm_fs_type; /* @@ -91,13 +87,14 @@ static struct file_system_type bm_fs_type; * * Return: binary type list entry on success, NULL on failure */ -static Node *search_binfmt_handler(struct linux_binprm *bprm) +static Node *search_binfmt_handler(struct binfmt_misc *misc, + struct linux_binprm *bprm) { char *p = strrchr(bprm->interp, '.'); Node *e; /* Walk all the registered handlers. */ - list_for_each_entry(e, &entries, list) { + list_for_each_entry(e, &misc->entries, list) { char *s; int j; @@ -140,15 +137,16 @@ static Node *search_binfmt_handler(struct linux_binprm *bprm) * * Return: binary type list entry on success, NULL on failure */ -static Node *get_binfmt_handler(struct linux_binprm *bprm) +static Node *get_binfmt_handler(struct binfmt_misc *misc, + struct linux_binprm *bprm) { Node *e; - read_lock(&entries_lock); - e = search_binfmt_handler(bprm); + read_lock(&misc->entries_lock); + e = search_binfmt_handler(misc, bprm); if (e) refcount_inc(&e->users); - read_unlock(&entries_lock); + read_unlock(&misc->entries_lock); return e; } @@ -169,6 +167,35 @@ static void put_binfmt_handler(Node *e) } } +/** + * load_binfmt_misc - load the binfmt_misc of the caller's user namespace + * + * To be called in load_misc_binary() to load the relevant struct binfmt_misc. + * If a user namespace doesn't have its own binfmt_misc mount it can make use + * of its ancestor's binfmt_misc handlers. This mimicks the behavior of + * pre-namespaced binfmt_misc where all registered binfmt_misc handlers where + * available to all user and user namespaces on the system. + * + * Return: the binfmt_misc instance of the caller's user namespace + */ +static struct binfmt_misc *load_binfmt_misc(void) +{ + const struct user_namespace *user_ns; + struct binfmt_misc *misc; + + user_ns = current_user_ns(); + while (user_ns) { + /* Pairs with smp_store_release() in bm_fill_super(). */ + misc = smp_load_acquire(&user_ns->binfmt_misc); + if (misc) + return misc; + + user_ns = user_ns->parent; + } + + return &init_binfmt_misc; +} + /* * the loader itself */ @@ -176,13 +203,14 @@ static int load_misc_binary(struct linux_binprm *bprm) { Node *fmt; struct file *interp_file = NULL; - int retval; + int retval = -ENOEXEC; + struct binfmt_misc *misc; - retval = -ENOEXEC; - if (!enabled) + misc = load_binfmt_misc(); + if (!misc->enabled) return retval; - fmt = get_binfmt_handler(bprm); + fmt = get_binfmt_handler(misc, bprm); if (!fmt) return retval; @@ -240,9 +268,9 @@ ret: /* * If we actually put the node here all concurrent calls to * load_misc_binary() will have finished. We also know - * that for the refcount to be zero ->evict_inode() must have removed - * the node to be deleted from the list. All that is left for us is to - * close and free. + * that for the refcount to be zero someone must have concurently + * removed the binary type handler from the list and it's our job to + * free it. */ put_binfmt_handler(fmt); @@ -334,7 +362,7 @@ static Node *create_entry(const char __user *buffer, size_t count) err = -ENOMEM; memsize = sizeof(Node) + count + 8; - e = kmalloc(memsize, GFP_KERNEL); + e = kmalloc(memsize, GFP_KERNEL_ACCOUNT); if (!e) goto out; @@ -446,7 +474,7 @@ static Node *create_entry(const char __user *buffer, size_t count) if (e->mask) { int i; - char *masked = kmalloc(e->size, GFP_KERNEL); + char *masked = kmalloc(e->size, GFP_KERNEL_ACCOUNT); print_hex_dump_bytes( KBUILD_MODNAME ": register: mask[decoded]: ", @@ -599,6 +627,22 @@ static struct inode *bm_get_inode(struct super_block *sb, int mode) return inode; } +/** + * i_binfmt_misc - retrieve struct binfmt_misc from a binfmt_misc inode + * @inode: inode of the relevant binfmt_misc instance + * + * This helper retrieves struct binfmt_misc from a binfmt_misc inode. This can + * be done without any memory barriers because we are guaranteed that + * user_ns->binfmt_misc is fully initialized. It was fully initialized when the + * binfmt_misc mount was first created. + * + * Return: struct binfmt_misc of the relevant binfmt_misc instance + */ +static struct binfmt_misc *i_binfmt_misc(struct inode *inode) +{ + return inode->i_sb->s_user_ns->binfmt_misc; +} + /** * bm_evict_inode - cleanup data associated with @inode * @inode: inode to which the data is attached @@ -619,10 +663,13 @@ static void bm_evict_inode(struct inode *inode) clear_inode(inode); if (e) { - write_lock(&entries_lock); + struct binfmt_misc *misc; + + misc = i_binfmt_misc(inode); + write_lock(&misc->entries_lock); if (!list_empty(&e->list)) list_del_init(&e->list); - write_unlock(&entries_lock); + write_unlock(&misc->entries_lock); put_binfmt_handler(e); } } @@ -677,11 +724,11 @@ static void unlink_binfmt_dentry(struct dentry *dentry) * to use writes to files in order to delete binary type handlers. But it has * worked for so long that it's not a pressing issue. */ -static void remove_binfmt_handler(Node *e) +static void remove_binfmt_handler(struct binfmt_misc *misc, Node *e) { - write_lock(&entries_lock); + write_lock(&misc->entries_lock); list_del_init(&e->list); - write_unlock(&entries_lock); + write_unlock(&misc->entries_lock); unlink_binfmt_dentry(e->dentry); } @@ -737,7 +784,7 @@ static ssize_t bm_entry_write(struct file *file, const char __user *buffer, * actually remove the entry from the list. */ if (!list_empty(&e->list)) - remove_binfmt_handler(e); + remove_binfmt_handler(i_binfmt_misc(inode), e); inode_unlock(inode); break; @@ -763,6 +810,7 @@ static ssize_t bm_register_write(struct file *file, const char __user *buffer, struct inode *inode; struct super_block *sb = file_inode(file)->i_sb; struct dentry *root = sb->s_root, *dentry; + struct binfmt_misc *misc; int err = 0; struct file *f = NULL; @@ -772,7 +820,18 @@ static ssize_t bm_register_write(struct file *file, const char __user *buffer, return PTR_ERR(e); if (e->flags & MISC_FMT_OPEN_FILE) { + const struct cred *old_cred; + + /* + * Now that we support unprivileged binfmt_misc mounts make + * sure we use the credentials that the register @file was + * opened with to also open the interpreter. Before that this + * didn't matter much as only a privileged process could open + * the register file. + */ + old_cred = override_creds(file->f_cred); f = open_exec(e->interpreter); + revert_creds(old_cred); if (IS_ERR(f)) { pr_notice("register: failed to install interpreter file %s\n", e->interpreter); @@ -804,9 +863,10 @@ static ssize_t bm_register_write(struct file *file, const char __user *buffer, inode->i_fop = &bm_entry_operations; d_instantiate(dentry, inode); - write_lock(&entries_lock); - list_add(&e->list, &entries); - write_unlock(&entries_lock); + misc = i_binfmt_misc(inode); + write_lock(&misc->entries_lock); + list_add(&e->list, &misc->entries); + write_unlock(&misc->entries_lock); err = 0; out2: @@ -833,26 +893,31 @@ static const struct file_operations bm_register_operations = { static ssize_t bm_status_read(struct file *file, char __user *buf, size_t nbytes, loff_t *ppos) { - char *s = enabled ? "enabled\n" : "disabled\n"; + struct binfmt_misc *misc; + char *s; + misc = i_binfmt_misc(file_inode(file)); + s = misc->enabled ? "enabled\n" : "disabled\n"; return simple_read_from_buffer(buf, nbytes, ppos, s, strlen(s)); } static ssize_t bm_status_write(struct file *file, const char __user *buffer, size_t count, loff_t *ppos) { + struct binfmt_misc *misc; int res = parse_command(buffer, count); Node *e, *next; struct inode *inode; + misc = i_binfmt_misc(file_inode(file)); switch (res) { case 1: /* Disable all handlers. */ - enabled = 0; + misc->enabled = false; break; case 2: /* Enable all handlers. */ - enabled = 1; + misc->enabled = true; break; case 3: /* Delete all handlers. */ @@ -868,8 +933,8 @@ static ssize_t bm_status_write(struct file *file, const char __user *buffer, * read-only. So we only need to take the write lock when we * actually remove the entry from the list. */ - list_for_each_entry_safe(e, next, &entries, list) - remove_binfmt_handler(e); + list_for_each_entry_safe(e, next, &misc->entries, list) + remove_binfmt_handler(misc, e); inode_unlock(inode); break; @@ -888,32 +953,100 @@ static const struct file_operations bm_status_operations = { /* Superblock handling */ +static void bm_put_super(struct super_block *sb) +{ + struct user_namespace *user_ns = sb->s_fs_info; + + sb->s_fs_info = NULL; + put_user_ns(user_ns); +} + static const struct super_operations s_ops = { .statfs = simple_statfs, .evict_inode = bm_evict_inode, + .put_super = bm_put_super, }; static int bm_fill_super(struct super_block *sb, struct fs_context *fc) { int err; + struct user_namespace *user_ns = sb->s_user_ns; + struct binfmt_misc *misc; static const struct tree_descr bm_files[] = { [2] = {"status", &bm_status_operations, S_IWUSR|S_IRUGO}, [3] = {"register", &bm_register_operations, S_IWUSR}, /* last one */ {""} }; + if (WARN_ON(user_ns != current_user_ns())) + return -EINVAL; + + /* + * Lazily allocate a new binfmt_misc instance for this namespace, i.e. + * do it here during the first mount of binfmt_misc. We don't need to + * waste memory for every user namespace allocation. It's likely much + * more common to not mount a separate binfmt_misc instance than it is + * to mount one. + * + * While multiple superblocks can exist they are keyed by userns in + * s_fs_info for binfmt_misc. Hence, the vfs guarantees that + * bm_fill_super() is called exactly once whenever a binfmt_misc + * superblock for a userns is created. This in turn lets us conclude + * that when a binfmt_misc superblock is created for the first time for + * a userns there's no one racing us. Therefore we don't need any + * barriers when we dereference binfmt_misc. + */ + misc = user_ns->binfmt_misc; + if (!misc) { + /* + * If it turns out that most user namespaces actually want to + * register their own binary type handler and therefore all + * create their own separate binfm_misc mounts we should + * consider turning this into a kmem cache. + */ + misc = kzalloc(sizeof(struct binfmt_misc), GFP_KERNEL); + if (!misc) + return -ENOMEM; + + INIT_LIST_HEAD(&misc->entries); + rwlock_init(&misc->entries_lock); + + /* Pairs with smp_load_acquire() in load_binfmt_misc(). */ + smp_store_release(&user_ns->binfmt_misc, misc); + } + + /* + * When the binfmt_misc superblock for this userns is shutdown + * ->enabled might have been set to false and we don't reinitialize + * ->enabled again in put_super() as someone might already be mounting + * binfmt_misc again. It also would be pointless since by the time + * ->put_super() is called we know that the binary type list for this + * bintfmt_misc mount is empty making load_misc_binary() return + * -ENOEXEC independent of whether ->enabled is true. Instead, if + * someone mounts binfmt_misc for the first time or again we simply + * reset ->enabled to true. + */ + misc->enabled = true; + err = simple_fill_super(sb, BINFMTFS_MAGIC, bm_files); if (!err) sb->s_op = &s_ops; return err; } +static void bm_free(struct fs_context *fc) +{ + if (fc->s_fs_info) + put_user_ns(fc->s_fs_info); +} + static int bm_get_tree(struct fs_context *fc) { - return get_tree_single(fc, bm_fill_super); + return get_tree_keyed(fc, bm_fill_super, get_user_ns(fc->user_ns)); } static const struct fs_context_operations bm_context_ops = { + .free = bm_free, .get_tree = bm_get_tree, }; @@ -932,6 +1065,7 @@ static struct file_system_type bm_fs_type = { .owner = THIS_MODULE, .name = "binfmt_misc", .init_fs_context = bm_init_fs_context, + .fs_flags = FS_USERNS_MOUNT, .kill_sb = kill_litter_super, }; MODULE_ALIAS_FS("binfmt_misc"); diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h index 8d51f69f9f5e..70f97f685bff 100644 --- a/include/linux/binfmts.h +++ b/include/linux/binfmts.h @@ -90,6 +90,16 @@ struct linux_binfmt { #endif } __randomize_layout; +#if IS_ENABLED(CONFIG_BINFMT_MISC) +struct binfmt_misc { + struct list_head entries; + rwlock_t entries_lock; + bool enabled; +} __randomize_layout; + +extern struct binfmt_misc init_binfmt_misc; +#endif + extern void __register_binfmt(struct linux_binfmt *fmt, int insert); /* Registration of default binfmt handlers */ diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h index 45f09bec02c4..6030a8235617 100644 --- a/include/linux/user_namespace.h +++ b/include/linux/user_namespace.h @@ -65,6 +65,10 @@ enum rlimit_type { UCOUNT_RLIMIT_COUNTS, }; +#if IS_ENABLED(CONFIG_BINFMT_MISC) +struct binfmt_misc; +#endif + struct user_namespace { struct uid_gid_map uid_map; struct uid_gid_map gid_map; @@ -102,6 +106,10 @@ struct user_namespace { struct ucounts *ucounts; long ucount_max[UCOUNT_COUNTS]; long rlimit_max[UCOUNT_RLIMIT_COUNTS]; + +#if IS_ENABLED(CONFIG_BINFMT_MISC) + struct binfmt_misc *binfmt_misc; +#endif } __randomize_layout; struct ucounts { diff --git a/kernel/user.c b/kernel/user.c index d667debeafd6..03cedc366dc9 100644 --- a/kernel/user.c +++ b/kernel/user.c @@ -18,8 +18,18 @@ #include #include #include +#include #include +#if IS_ENABLED(CONFIG_BINFMT_MISC) +struct binfmt_misc init_binfmt_misc = { + .entries = LIST_HEAD_INIT(init_binfmt_misc.entries), + .enabled = true, + .entries_lock = __RW_LOCK_UNLOCKED(init_binfmt_misc.entries_lock), +}; +EXPORT_SYMBOL_GPL(init_binfmt_misc); +#endif + /* * userns count is 1 for root user, 1 for init_uts_ns, * and 1 for... ? @@ -67,6 +77,9 @@ struct user_namespace init_user_ns = { .keyring_name_list = LIST_HEAD_INIT(init_user_ns.keyring_name_list), .keyring_sem = __RWSEM_INITIALIZER(init_user_ns.keyring_sem), #endif +#if IS_ENABLED(CONFIG_BINFMT_MISC) + .binfmt_misc = &init_binfmt_misc, +#endif }; EXPORT_SYMBOL_GPL(init_user_ns); diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index 1d8e47bed3f1..d52a894ecf57 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c @@ -213,6 +213,9 @@ static void free_user_ns(struct work_struct *work) kfree(ns->projid_map.forward); kfree(ns->projid_map.reverse); } +#if IS_ENABLED(CONFIG_BINFMT_MISC) + kfree(ns->binfmt_misc); +#endif retire_userns_sysctls(ns); key_free_user_ns(ns); ns_free_inum(&ns->ns); -- cgit v1.2.3 From f6d7f050e258e3c71e310f5167c4d65bbefaeb31 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 10 Oct 2023 19:31:00 +0300 Subject: spi: Don't use flexible array in struct spi_message definition The struct spi_message can be embedded into another structures. With that the flexible array might be problematic as sparse complains about it, although there is no real issue in the code because when the message is embedded it doesn't use flexible array member. That memeber is a private to spi_message_alloc() API, so move it to that API in a form of an inherited data type. Reported-by: Marc Kleine-Budde Fixes: 75e308ffc4f0 ("spi: Use struct_size() helper")) Closes: https://lore.kernel.org/r/20231009-onshore-underage-c58415adfd92-mkl@pengutronix.de Signed-off-by: Andy Shevchenko Reviewed-by: Marc Kleine-Budde Link: https://lore.kernel.org/r/20231010163100.89734-1-andriy.shevchenko@linux.intel.com Signed-off-by: Mark Brown --- include/linux/spi/spi.h | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index 7f8b478fdeb3..487da1f6e4b7 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -1086,8 +1086,6 @@ struct spi_transfer { * @state: for use by whichever driver currently owns the message * @resources: for resource management when the SPI message is processed * @prepared: spi_prepare_message was called for the this message - * @t: for use with spi_message_alloc() when message and transfers have - * been allocated together * * A @spi_message is used to execute an atomic sequence of data transfers, * each represented by a struct spi_transfer. The sequence is "atomic" @@ -1142,9 +1140,6 @@ struct spi_message { /* List of spi_res resources when the SPI message is processed */ struct list_head resources; - - /* For embedding transfers into the memory of the message */ - struct spi_transfer t[]; }; static inline void spi_message_init_no_memset(struct spi_message *m) @@ -1203,17 +1198,21 @@ struct spi_transfer *xfers, unsigned int num_xfers) */ static inline struct spi_message *spi_message_alloc(unsigned ntrans, gfp_t flags) { - struct spi_message *m; + struct spi_message_with_transfers { + struct spi_message m; + struct spi_transfer t[]; + } *mwt; + unsigned i; + + mwt = kzalloc(struct_size(mwt, t, ntrans), flags); + if (!mwt) + return NULL; - m = kzalloc(struct_size(m, t, ntrans), flags); - if (m) { - unsigned i; + spi_message_init_no_memset(&mwt->m); + for (i = 0; i < ntrans; i++) + spi_message_add_tail(&mwt->t[i], &mwt->m); - spi_message_init_no_memset(m); - for (i = 0; i < ntrans; i++) - spi_message_add_tail(&m->t[i], m); - } - return m; + return &mwt->m; } static inline void spi_message_free(struct spi_message *m) -- cgit v1.2.3 From 9d77eb52778499a97cab662aa96de4e2e4fa72d3 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Thu, 24 Aug 2023 16:39:08 +0200 Subject: nvme-keyring: register '.nvme' keyring Register a '.nvme' keyring to hold keys for TLS and DH-HMAC-CHAP and add a new config option NVME_KEYRING. We need a separate keyring for NVMe as the configuration is done via individual commands (eg for configfs), and the usual per-session or per-process keyrings can't be used. Signed-off-by: Hannes Reinecke Reviewed-by: Sagi Grimberg Signed-off-by: Keith Busch --- drivers/nvme/common/Kconfig | 4 ++++ drivers/nvme/common/Makefile | 3 ++- drivers/nvme/common/keyring.c | 40 ++++++++++++++++++++++++++++++++++++++++ drivers/nvme/host/core.c | 10 ++++++++-- include/linux/nvme-keyring.h | 28 ++++++++++++++++++++++++++++ 5 files changed, 82 insertions(+), 3 deletions(-) create mode 100644 drivers/nvme/common/keyring.c create mode 100644 include/linux/nvme-keyring.h (limited to 'include/linux') diff --git a/drivers/nvme/common/Kconfig b/drivers/nvme/common/Kconfig index 4514f44362dd..641b27adb047 100644 --- a/drivers/nvme/common/Kconfig +++ b/drivers/nvme/common/Kconfig @@ -2,3 +2,7 @@ config NVME_COMMON tristate + +config NVME_KEYRING + bool + select KEYS diff --git a/drivers/nvme/common/Makefile b/drivers/nvme/common/Makefile index 720c625b8a52..0cbd0b0b8d49 100644 --- a/drivers/nvme/common/Makefile +++ b/drivers/nvme/common/Makefile @@ -4,4 +4,5 @@ ccflags-y += -I$(src) obj-$(CONFIG_NVME_COMMON) += nvme-common.o -nvme-common-y += auth.o +nvme-common-$(CONFIG_NVME_AUTH) += auth.o +nvme-common-$(CONFIG_NVME_KEYRING) += keyring.o diff --git a/drivers/nvme/common/keyring.c b/drivers/nvme/common/keyring.c new file mode 100644 index 000000000000..5cf64b278119 --- /dev/null +++ b/drivers/nvme/common/keyring.c @@ -0,0 +1,40 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2023 Hannes Reinecke, SUSE Labs + */ + +#include +#include +#include +#include +#include + +static struct key *nvme_keyring; + +key_serial_t nvme_keyring_id(void) +{ + return nvme_keyring->serial; +} +EXPORT_SYMBOL_GPL(nvme_keyring_id); + +int nvme_keyring_init(void) +{ + nvme_keyring = keyring_alloc(".nvme", + GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, + current_cred(), + (KEY_POS_ALL & ~KEY_POS_SETATTR) | + (KEY_USR_ALL & ~KEY_USR_SETATTR), + KEY_ALLOC_NOT_IN_QUOTA, NULL, NULL); + if (IS_ERR(nvme_keyring)) + return PTR_ERR(nvme_keyring); + + return 0; +} +EXPORT_SYMBOL_GPL(nvme_keyring_init); + +void nvme_keyring_exit(void) +{ + key_revoke(nvme_keyring); + key_put(nvme_keyring); +} +EXPORT_SYMBOL_GPL(nvme_keyring_exit); diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 21783aa2ee8e..a49b65d34cda 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -25,6 +25,7 @@ #include "nvme.h" #include "fabrics.h" #include +#include #define CREATE_TRACE_POINTS #include "trace.h" @@ -4723,12 +4724,16 @@ static int __init nvme_core_init(void) result = PTR_ERR(nvme_ns_chr_class); goto unregister_generic_ns; } - - result = nvme_init_auth(); + result = nvme_keyring_init(); if (result) goto destroy_ns_chr; + result = nvme_init_auth(); + if (result) + goto keyring_exit; return 0; +keyring_exit: + nvme_keyring_exit(); destroy_ns_chr: class_destroy(nvme_ns_chr_class); unregister_generic_ns: @@ -4752,6 +4757,7 @@ out: static void __exit nvme_core_exit(void) { nvme_exit_auth(); + nvme_keyring_exit(); class_destroy(nvme_ns_chr_class); class_destroy(nvme_subsys_class); class_destroy(nvme_class); diff --git a/include/linux/nvme-keyring.h b/include/linux/nvme-keyring.h new file mode 100644 index 000000000000..32bd264a71e6 --- /dev/null +++ b/include/linux/nvme-keyring.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2023 Hannes Reinecke, SUSE Labs + */ + +#ifndef _NVME_KEYRING_H +#define _NVME_KEYRING_H + +#ifdef CONFIG_NVME_KEYRING + +key_serial_t nvme_keyring_id(void); +int nvme_keyring_init(void); +void nvme_keyring_exit(void); + +#else + +static inline key_serial_t nvme_keyring_id(void) +{ + return 0; +} +static inline int nvme_keyring_init(void) +{ + return 0; +} +static inline void nvme_keyring_exit(void) {} + +#endif /* !CONFIG_NVME_KEYRING */ +#endif /* _NVME_KEYRING_H */ -- cgit v1.2.3 From 646f45b23218c7a97a84259d8eeb22dad5711fc8 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Thu, 24 Aug 2023 16:39:10 +0200 Subject: nvme: add TCP TSAS definitions Signed-off-by: Hannes Reinecke Reviewed-by: Sagi Grimberg Signed-off-by: Keith Busch --- include/linux/nvme.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 26dd3f859d9d..a7ba74babad7 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -108,6 +108,13 @@ enum { NVMF_RDMA_CMS_RDMA_CM = 1, /* Sockets based endpoint addressing */ }; +/* TSAS SECTYPE for TCP transport */ +enum { + NVMF_TCP_SECTYPE_NONE = 0, /* No Security */ + NVMF_TCP_SECTYPE_TLS12 = 1, /* TLSv1.2, NVMe-oF 1.1 and NVMe-TCP 3.6.1.1 */ + NVMF_TCP_SECTYPE_TLS13 = 2, /* TLSv1.3, NVMe-oF 1.1 and NVMe-TCP 3.6.1.1 */ +}; + #define NVME_AQ_DEPTH 32 #define NVME_NR_AEN_COMMANDS 1 #define NVME_AQ_BLK_MQ_DEPTH (NVME_AQ_DEPTH - NVME_NR_AEN_COMMANDS) @@ -1493,6 +1500,9 @@ struct nvmf_disc_rsp_page_entry { __u16 pkey; __u8 resv10[246]; } rdma; + struct tcp { + __u8 sectype; + } tcp; } tsas; }; -- cgit v1.2.3 From a86062aac34d100a3117c0fff91ee1892ebfb460 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Thu, 24 Aug 2023 16:39:11 +0200 Subject: nvme-tcp: add definitions for TLS cipher suites Signed-off-by: Hannes Reinecke Reviewed-by: Sagi Grimberg Signed-off-by: Keith Busch --- include/linux/nvme-tcp.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nvme-tcp.h b/include/linux/nvme-tcp.h index 57ebe1267f7f..e07e8978d691 100644 --- a/include/linux/nvme-tcp.h +++ b/include/linux/nvme-tcp.h @@ -18,6 +18,12 @@ enum nvme_tcp_pfv { NVME_TCP_PFV_1_0 = 0x0, }; +enum nvme_tcp_tls_cipher { + NVME_TCP_TLS_CIPHER_INVALID = 0, + NVME_TCP_TLS_CIPHER_SHA256 = 1, + NVME_TCP_TLS_CIPHER_SHA384 = 2, +}; + enum nvme_tcp_fatal_error_status { NVME_TCP_FES_INVALID_PDU_HDR = 0x01, NVME_TCP_FES_PDU_SEQ_ERR = 0x02, -- cgit v1.2.3 From 501cc6f4aca8dc0958c4d9716f0233ba7cff4830 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Thu, 24 Aug 2023 16:39:12 +0200 Subject: nvme-keyring: implement nvme_tls_psk_default() Implement a function to select the preferred PSK for TLS. A 'retained' PSK should be preferred over a 'generated' PSK, and SHA-384 should be preferred to SHA-256. Signed-off-by: Hannes Reinecke Reviewed-by: Sagi Grimberg Signed-off-by: Keith Busch --- drivers/nvme/common/keyring.c | 48 +++++++++++++++++++++++++++++++++++++++++++ include/linux/nvme-keyring.h | 8 ++++++++ 2 files changed, 56 insertions(+) (limited to 'include/linux') diff --git a/drivers/nvme/common/keyring.c b/drivers/nvme/common/keyring.c index 494dd365052e..f8d9a208397b 100644 --- a/drivers/nvme/common/keyring.c +++ b/drivers/nvme/common/keyring.c @@ -5,6 +5,7 @@ #include #include +#include #include #include #include @@ -103,6 +104,53 @@ static struct key *nvme_tls_psk_lookup(struct key *keyring, return key_ref_to_ptr(keyref); } +/* + * NVMe PSK priority list + * + * 'Retained' PSKs (ie 'generated == false') + * should be preferred to 'generated' PSKs, + * and SHA-384 should be preferred to SHA-256. + */ +struct nvme_tls_psk_priority_list { + bool generated; + enum nvme_tcp_tls_cipher cipher; +} nvme_tls_psk_prio[] = { + { .generated = false, + .cipher = NVME_TCP_TLS_CIPHER_SHA384, }, + { .generated = false, + .cipher = NVME_TCP_TLS_CIPHER_SHA256, }, + { .generated = true, + .cipher = NVME_TCP_TLS_CIPHER_SHA384, }, + { .generated = true, + .cipher = NVME_TCP_TLS_CIPHER_SHA256, }, +}; + +/* + * nvme_tls_psk_default - Return the preferred PSK to use for TLS ClientHello + */ +key_serial_t nvme_tls_psk_default(struct key *keyring, + const char *hostnqn, const char *subnqn) +{ + struct key *tls_key; + key_serial_t tls_key_id; + int prio; + + for (prio = 0; prio < ARRAY_SIZE(nvme_tls_psk_prio); prio++) { + bool generated = nvme_tls_psk_prio[prio].generated; + enum nvme_tcp_tls_cipher cipher = nvme_tls_psk_prio[prio].cipher; + + tls_key = nvme_tls_psk_lookup(keyring, hostnqn, subnqn, + cipher, generated); + if (!IS_ERR(tls_key)) { + tls_key_id = tls_key->serial; + key_put(tls_key); + return tls_key_id; + } + } + return 0; +} +EXPORT_SYMBOL_GPL(nvme_tls_psk_default); + int nvme_keyring_init(void) { int err; diff --git a/include/linux/nvme-keyring.h b/include/linux/nvme-keyring.h index 32bd264a71e6..4efea9dd967c 100644 --- a/include/linux/nvme-keyring.h +++ b/include/linux/nvme-keyring.h @@ -8,12 +8,20 @@ #ifdef CONFIG_NVME_KEYRING +key_serial_t nvme_tls_psk_default(struct key *keyring, + const char *hostnqn, const char *subnqn); + key_serial_t nvme_keyring_id(void); int nvme_keyring_init(void); void nvme_keyring_exit(void); #else +static inline key_serial_t nvme_tls_psk_default(struct key *keyring, + const char *hostnqn, const char *subnqn) +{ + return 0; +} static inline key_serial_t nvme_keyring_id(void) { return 0; -- cgit v1.2.3 From 037c34318a479294cdb98dc8018edd5d191b68c0 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Thu, 24 Aug 2023 16:39:13 +0200 Subject: security/keys: export key_lookup() For in-kernel consumers one cannot readily assign a user (eg when running from a workqueue), so the normal key search permissions cannot be applied. This patch exports the 'key_lookup()' function for a simple lookup of keys without checking for permissions. Signed-off-by: Hannes Reinecke Reviewed-by: Sagi Grimberg Acked-by: David Howells Signed-off-by: Keith Busch --- include/linux/key.h | 1 + security/keys/key.c | 1 + 2 files changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/key.h b/include/linux/key.h index 938d7ecfb495..943a432da3ae 100644 --- a/include/linux/key.h +++ b/include/linux/key.h @@ -515,6 +515,7 @@ extern void key_init(void); #define key_init() do { } while(0) #define key_free_user_ns(ns) do { } while(0) #define key_remove_domain(d) do { } while(0) +#define key_lookup(k) NULL #endif /* CONFIG_KEYS */ #endif /* __KERNEL__ */ diff --git a/security/keys/key.c b/security/keys/key.c index 5c0c7df833f8..0260a1902922 100644 --- a/security/keys/key.c +++ b/security/keys/key.c @@ -693,6 +693,7 @@ error: spin_unlock(&key_serial_lock); return key; } +EXPORT_SYMBOL(key_lookup); /* * Find and lock the specified key type against removal. -- cgit v1.2.3 From 39c6eed1f61594f737160e498d29673edbd9eefd Mon Sep 17 00:00:00 2001 From: Maciej Wieczor-Retman Date: Tue, 10 Oct 2023 12:42:36 +0200 Subject: x86/resctrl: Rename arch_has_sparse_bitmaps MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rename arch_has_sparse_bitmaps to arch_has_sparse_bitmasks to ensure consistent terminology throughout resctrl. Suggested-by: Reinette Chatre Signed-off-by: Maciej Wieczor-Retman Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Ilpo Järvinen Reviewed-by: Peter Newman Reviewed-by: Reinette Chatre Reviewed-by: Babu Moger Tested-by: Peter Newman Link: https://lore.kernel.org/r/e330fcdae873ef1a831e707025a4b70fa346666e.1696934091.git.maciej.wieczor-retman@intel.com --- arch/x86/kernel/cpu/resctrl/core.c | 4 ++-- arch/x86/kernel/cpu/resctrl/ctrlmondata.c | 4 ++-- include/linux/resctrl.h | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c index 030d3b409768..c09e4fdded3c 100644 --- a/arch/x86/kernel/cpu/resctrl/core.c +++ b/arch/x86/kernel/cpu/resctrl/core.c @@ -872,7 +872,7 @@ static __init void rdt_init_res_defs_intel(void) if (r->rid == RDT_RESOURCE_L3 || r->rid == RDT_RESOURCE_L2) { - r->cache.arch_has_sparse_bitmaps = false; + r->cache.arch_has_sparse_bitmasks = false; r->cache.arch_has_per_cpu_cfg = false; r->cache.min_cbm_bits = 1; } else if (r->rid == RDT_RESOURCE_MBA) { @@ -892,7 +892,7 @@ static __init void rdt_init_res_defs_amd(void) if (r->rid == RDT_RESOURCE_L3 || r->rid == RDT_RESOURCE_L2) { - r->cache.arch_has_sparse_bitmaps = true; + r->cache.arch_has_sparse_bitmasks = true; r->cache.arch_has_per_cpu_cfg = true; r->cache.min_cbm_bits = 0; } else if (r->rid == RDT_RESOURCE_MBA) { diff --git a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c index b44c487727d4..ab45012288bb 100644 --- a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c +++ b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c @@ -113,8 +113,8 @@ static bool cbm_validate(char *buf, u32 *data, struct rdt_resource *r) first_bit = find_first_bit(&val, cbm_len); zero_bit = find_next_zero_bit(&val, cbm_len, first_bit); - /* Are non-contiguous bitmaps allowed? */ - if (!r->cache.arch_has_sparse_bitmaps && + /* Are non-contiguous bitmasks allowed? */ + if (!r->cache.arch_has_sparse_bitmasks && (find_next_bit(&val, cbm_len, zero_bit) < cbm_len)) { rdt_last_cmd_printf("The mask %lx has non-consecutive 1-bits\n", val); return false; diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h index 8334eeacfec5..66942d7fba7f 100644 --- a/include/linux/resctrl.h +++ b/include/linux/resctrl.h @@ -94,7 +94,7 @@ struct rdt_domain { * zero CBM. * @shareable_bits: Bitmask of shareable resource with other * executing entities - * @arch_has_sparse_bitmaps: True if a bitmap like f00f is valid. + * @arch_has_sparse_bitmasks: True if a bitmask like f00f is valid. * @arch_has_per_cpu_cfg: True if QOS_CFG register for this cache * level has CPU scope. */ @@ -102,7 +102,7 @@ struct resctrl_cache { unsigned int cbm_len; unsigned int min_cbm_bits; unsigned int shareable_bits; - bool arch_has_sparse_bitmaps; + bool arch_has_sparse_bitmasks; bool arch_has_per_cpu_cfg; }; -- cgit v1.2.3 From fefba7d1ae198dcbf8b3b432de46a4e29f8dbd8c Mon Sep 17 00:00:00 2001 From: Daan De Meyer Date: Wed, 11 Oct 2023 20:51:04 +0200 Subject: bpf: Propagate modified uaddrlen from cgroup sockaddr programs As prep for adding unix socket support to the cgroup sockaddr hooks, let's propagate the sockaddr length back to the caller after running a bpf cgroup sockaddr hook program. While not important for AF_INET or AF_INET6, the sockaddr length is important when working with AF_UNIX sockaddrs as the size of the sockaddr cannot be determined just from the address family or the sockaddr's contents. __cgroup_bpf_run_filter_sock_addr() is modified to take the uaddrlen as an input/output argument. After running the program, the modified sockaddr length is stored in the uaddrlen pointer. Signed-off-by: Daan De Meyer Link: https://lore.kernel.org/r/20231011185113.140426-3-daan.j.demeyer@gmail.com Signed-off-by: Martin KaFai Lau --- include/linux/bpf-cgroup.h | 73 +++++++++++++++++++++++----------------------- include/linux/filter.h | 1 + kernel/bpf/cgroup.c | 17 +++++++++-- net/ipv4/af_inet.c | 7 +++-- net/ipv4/ping.c | 2 +- net/ipv4/tcp_ipv4.c | 2 +- net/ipv4/udp.c | 9 ++++-- net/ipv6/af_inet6.c | 9 +++--- net/ipv6/ping.c | 2 +- net/ipv6/tcp_ipv6.c | 2 +- net/ipv6/udp.c | 6 ++-- 11 files changed, 76 insertions(+), 54 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index 8506690dbb9c..31561e789715 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -120,6 +120,7 @@ int __cgroup_bpf_run_filter_sk(struct sock *sk, int __cgroup_bpf_run_filter_sock_addr(struct sock *sk, struct sockaddr *uaddr, + int *uaddrlen, enum cgroup_bpf_attach_type atype, void *t_ctx, u32 *flags); @@ -230,22 +231,22 @@ static inline bool cgroup_bpf_sock_enabled(struct sock *sk, #define BPF_CGROUP_RUN_PROG_INET6_POST_BIND(sk) \ BPF_CGROUP_RUN_SK_PROG(sk, CGROUP_INET6_POST_BIND) -#define BPF_CGROUP_RUN_SA_PROG(sk, uaddr, atype) \ +#define BPF_CGROUP_RUN_SA_PROG(sk, uaddr, uaddrlen, atype) \ ({ \ int __ret = 0; \ if (cgroup_bpf_enabled(atype)) \ - __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, atype, \ - NULL, NULL); \ + __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, uaddrlen, \ + atype, NULL, NULL); \ __ret; \ }) -#define BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, atype, t_ctx) \ +#define BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, atype, t_ctx) \ ({ \ int __ret = 0; \ if (cgroup_bpf_enabled(atype)) { \ lock_sock(sk); \ - __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, atype, \ - t_ctx, NULL); \ + __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, uaddrlen, \ + atype, t_ctx, NULL); \ release_sock(sk); \ } \ __ret; \ @@ -256,14 +257,14 @@ static inline bool cgroup_bpf_sock_enabled(struct sock *sk, * (at bit position 0) is to indicate CAP_NET_BIND_SERVICE capability check * should be bypassed (BPF_RET_BIND_NO_CAP_NET_BIND_SERVICE). */ -#define BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr, atype, bind_flags) \ +#define BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr, uaddrlen, atype, bind_flags) \ ({ \ u32 __flags = 0; \ int __ret = 0; \ if (cgroup_bpf_enabled(atype)) { \ lock_sock(sk); \ - __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, atype, \ - NULL, &__flags); \ + __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, uaddrlen, \ + atype, NULL, &__flags); \ release_sock(sk); \ if (__flags & BPF_RET_BIND_NO_CAP_NET_BIND_SERVICE) \ *bind_flags |= BIND_NO_CAP_NET_BIND_SERVICE; \ @@ -276,29 +277,29 @@ static inline bool cgroup_bpf_sock_enabled(struct sock *sk, cgroup_bpf_enabled(CGROUP_INET6_CONNECT)) && \ (sk)->sk_prot->pre_connect) -#define BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr) \ - BPF_CGROUP_RUN_SA_PROG(sk, uaddr, CGROUP_INET4_CONNECT) +#define BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr, uaddrlen) \ + BPF_CGROUP_RUN_SA_PROG(sk, uaddr, uaddrlen, CGROUP_INET4_CONNECT) -#define BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr) \ - BPF_CGROUP_RUN_SA_PROG(sk, uaddr, CGROUP_INET6_CONNECT) +#define BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr, uaddrlen) \ + BPF_CGROUP_RUN_SA_PROG(sk, uaddr, uaddrlen, CGROUP_INET6_CONNECT) -#define BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr) \ - BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, CGROUP_INET4_CONNECT, NULL) +#define BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr, uaddrlen) \ + BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_INET4_CONNECT, NULL) -#define BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr) \ - BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, CGROUP_INET6_CONNECT, NULL) +#define BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr, uaddrlen) \ + BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_INET6_CONNECT, NULL) -#define BPF_CGROUP_RUN_PROG_UDP4_SENDMSG_LOCK(sk, uaddr, t_ctx) \ - BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, CGROUP_UDP4_SENDMSG, t_ctx) +#define BPF_CGROUP_RUN_PROG_UDP4_SENDMSG_LOCK(sk, uaddr, uaddrlen, t_ctx) \ + BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_UDP4_SENDMSG, t_ctx) -#define BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk, uaddr, t_ctx) \ - BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, CGROUP_UDP6_SENDMSG, t_ctx) +#define BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk, uaddr, uaddrlen, t_ctx) \ + BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_UDP6_SENDMSG, t_ctx) -#define BPF_CGROUP_RUN_PROG_UDP4_RECVMSG_LOCK(sk, uaddr) \ - BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, CGROUP_UDP4_RECVMSG, NULL) +#define BPF_CGROUP_RUN_PROG_UDP4_RECVMSG_LOCK(sk, uaddr, uaddrlen) \ + BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_UDP4_RECVMSG, NULL) -#define BPF_CGROUP_RUN_PROG_UDP6_RECVMSG_LOCK(sk, uaddr) \ - BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, CGROUP_UDP6_RECVMSG, NULL) +#define BPF_CGROUP_RUN_PROG_UDP6_RECVMSG_LOCK(sk, uaddr, uaddrlen) \ + BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_UDP6_RECVMSG, NULL) /* The SOCK_OPS"_SK" macro should be used when sock_ops->sk is not a * fullsock and its parent fullsock cannot be traced by @@ -477,24 +478,24 @@ static inline int bpf_percpu_cgroup_storage_update(struct bpf_map *map, } #define cgroup_bpf_enabled(atype) (0) -#define BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, atype, t_ctx) ({ 0; }) -#define BPF_CGROUP_RUN_SA_PROG(sk, uaddr, atype) ({ 0; }) +#define BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, atype, t_ctx) ({ 0; }) +#define BPF_CGROUP_RUN_SA_PROG(sk, uaddr, uaddrlen, atype) ({ 0; }) #define BPF_CGROUP_PRE_CONNECT_ENABLED(sk) (0) #define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk,skb) ({ 0; }) #define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk,skb) ({ 0; }) #define BPF_CGROUP_RUN_PROG_INET_SOCK(sk) ({ 0; }) #define BPF_CGROUP_RUN_PROG_INET_SOCK_RELEASE(sk) ({ 0; }) -#define BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr, atype, flags) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr, uaddrlen, atype, flags) ({ 0; }) #define BPF_CGROUP_RUN_PROG_INET4_POST_BIND(sk) ({ 0; }) #define BPF_CGROUP_RUN_PROG_INET6_POST_BIND(sk) ({ 0; }) -#define BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr) ({ 0; }) -#define BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr) ({ 0; }) -#define BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr) ({ 0; }) -#define BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr) ({ 0; }) -#define BPF_CGROUP_RUN_PROG_UDP4_SENDMSG_LOCK(sk, uaddr, t_ctx) ({ 0; }) -#define BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk, uaddr, t_ctx) ({ 0; }) -#define BPF_CGROUP_RUN_PROG_UDP4_RECVMSG_LOCK(sk, uaddr) ({ 0; }) -#define BPF_CGROUP_RUN_PROG_UDP6_RECVMSG_LOCK(sk, uaddr) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr, uaddrlen) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr, uaddrlen) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr, uaddrlen) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr, uaddrlen) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_UDP4_SENDMSG_LOCK(sk, uaddr, uaddrlen, t_ctx) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk, uaddr, uaddrlen, t_ctx) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_UDP4_RECVMSG_LOCK(sk, uaddr, uaddrlen) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_UDP6_RECVMSG_LOCK(sk, uaddr, uaddrlen) ({ 0; }) #define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) ({ 0; }) #define BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(atype, major, minor, access) ({ 0; }) #define BPF_CGROUP_RUN_PROG_SYSCTL(head,table,write,buf,count,pos) ({ 0; }) diff --git a/include/linux/filter.h b/include/linux/filter.h index ff7ecc89d3dd..bcd2bc15ff56 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -1335,6 +1335,7 @@ struct bpf_sock_addr_kern { */ u64 tmp_reg; void *t_ctx; /* Attach type specific context. */ + u32 uaddrlen; }; struct bpf_sock_ops_kern { diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index 03b3d4492980..ac37bd53aee0 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c @@ -1450,6 +1450,9 @@ EXPORT_SYMBOL(__cgroup_bpf_run_filter_sk); * provided by user sockaddr * @sk: sock struct that will use sockaddr * @uaddr: sockaddr struct provided by user + * @uaddrlen: Pointer to the size of the sockaddr struct provided by user. It is + * read-only for AF_INET[6] uaddr but can be modified for AF_UNIX + * uaddr. * @atype: The type of program to be executed * @t_ctx: Pointer to attach type specific context * @flags: Pointer to u32 which contains higher bits of BPF program @@ -1462,6 +1465,7 @@ EXPORT_SYMBOL(__cgroup_bpf_run_filter_sk); */ int __cgroup_bpf_run_filter_sock_addr(struct sock *sk, struct sockaddr *uaddr, + int *uaddrlen, enum cgroup_bpf_attach_type atype, void *t_ctx, u32 *flags) @@ -1473,6 +1477,7 @@ int __cgroup_bpf_run_filter_sock_addr(struct sock *sk, }; struct sockaddr_storage unspec; struct cgroup *cgrp; + int ret; /* Check socket family since not all sockets represent network * endpoint (e.g. AF_UNIX). @@ -1483,11 +1488,19 @@ int __cgroup_bpf_run_filter_sock_addr(struct sock *sk, if (!ctx.uaddr) { memset(&unspec, 0, sizeof(unspec)); ctx.uaddr = (struct sockaddr *)&unspec; + ctx.uaddrlen = 0; + } else { + ctx.uaddrlen = *uaddrlen; } cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data); - return bpf_prog_run_array_cg(&cgrp->bpf, atype, &ctx, bpf_prog_run, - 0, flags); + ret = bpf_prog_run_array_cg(&cgrp->bpf, atype, &ctx, bpf_prog_run, + 0, flags); + + if (!ret && uaddr) + *uaddrlen = ctx.uaddrlen; + + return ret; } EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_addr); diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 3d2e30e20473..7e27ad37b939 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -452,7 +452,7 @@ int inet_bind_sk(struct sock *sk, struct sockaddr *uaddr, int addr_len) /* BPF prog is run before any checks are done so that if the prog * changes context in a wrong way it will be caught. */ - err = BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr, + err = BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr, &addr_len, CGROUP_INET4_BIND, &flags); if (err) return err; @@ -788,6 +788,7 @@ int inet_getname(struct socket *sock, struct sockaddr *uaddr, struct sock *sk = sock->sk; struct inet_sock *inet = inet_sk(sk); DECLARE_SOCKADDR(struct sockaddr_in *, sin, uaddr); + int sin_addr_len = sizeof(*sin); sin->sin_family = AF_INET; lock_sock(sk); @@ -800,7 +801,7 @@ int inet_getname(struct socket *sock, struct sockaddr *uaddr, } sin->sin_port = inet->inet_dport; sin->sin_addr.s_addr = inet->inet_daddr; - BPF_CGROUP_RUN_SA_PROG(sk, (struct sockaddr *)sin, + BPF_CGROUP_RUN_SA_PROG(sk, (struct sockaddr *)sin, &sin_addr_len, CGROUP_INET4_GETPEERNAME); } else { __be32 addr = inet->inet_rcv_saddr; @@ -808,7 +809,7 @@ int inet_getname(struct socket *sock, struct sockaddr *uaddr, addr = inet->inet_saddr; sin->sin_port = inet->inet_sport; sin->sin_addr.s_addr = addr; - BPF_CGROUP_RUN_SA_PROG(sk, (struct sockaddr *)sin, + BPF_CGROUP_RUN_SA_PROG(sk, (struct sockaddr *)sin, &sin_addr_len, CGROUP_INET4_GETSOCKNAME); } release_sock(sk); diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 4dd809b7b188..2887177822c9 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -301,7 +301,7 @@ static int ping_pre_connect(struct sock *sk, struct sockaddr *uaddr, if (addr_len < sizeof(struct sockaddr_in)) return -EINVAL; - return BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr); + return BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr, &addr_len); } /* Checks the bind address and possibly modifies sk->sk_bound_dev_if. */ diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index f13eb7e23d03..7c18dd3ce011 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -194,7 +194,7 @@ static int tcp_v4_pre_connect(struct sock *sk, struct sockaddr *uaddr, sock_owned_by_me(sk); - return BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr); + return BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr, &addr_len); } /* This will initiate an outgoing connection. */ diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index c3ff984b6354..7b21a51dd25a 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1143,7 +1143,9 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) if (cgroup_bpf_enabled(CGROUP_UDP4_SENDMSG) && !connected) { err = BPF_CGROUP_RUN_PROG_UDP4_SENDMSG_LOCK(sk, - (struct sockaddr *)usin, &ipc.addr); + (struct sockaddr *)usin, + &msg->msg_namelen, + &ipc.addr); if (err) goto out_free; if (usin) { @@ -1865,7 +1867,8 @@ try_again: *addr_len = sizeof(*sin); BPF_CGROUP_RUN_PROG_UDP4_RECVMSG_LOCK(sk, - (struct sockaddr *)sin); + (struct sockaddr *)sin, + addr_len); } if (udp_test_bit(GRO_ENABLED, sk)) @@ -1904,7 +1907,7 @@ int udp_pre_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) if (addr_len < sizeof(struct sockaddr_in)) return -EINVAL; - return BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr); + return BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr, &addr_len); } EXPORT_SYMBOL(udp_pre_connect); diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 6337fb4504fd..c35d302a3da9 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -454,7 +454,7 @@ int inet6_bind_sk(struct sock *sk, struct sockaddr *uaddr, int addr_len) /* BPF prog is run before any checks are done so that if the prog * changes context in a wrong way it will be caught. */ - err = BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr, + err = BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr, &addr_len, CGROUP_INET6_BIND, &flags); if (err) return err; @@ -520,6 +520,7 @@ int inet6_getname(struct socket *sock, struct sockaddr *uaddr, int peer) { struct sockaddr_in6 *sin = (struct sockaddr_in6 *)uaddr; + int sin_addr_len = sizeof(*sin); struct sock *sk = sock->sk; struct inet_sock *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); @@ -539,7 +540,7 @@ int inet6_getname(struct socket *sock, struct sockaddr *uaddr, sin->sin6_addr = sk->sk_v6_daddr; if (inet6_test_bit(SNDFLOW, sk)) sin->sin6_flowinfo = np->flow_label; - BPF_CGROUP_RUN_SA_PROG(sk, (struct sockaddr *)sin, + BPF_CGROUP_RUN_SA_PROG(sk, (struct sockaddr *)sin, &sin_addr_len, CGROUP_INET6_GETPEERNAME); } else { if (ipv6_addr_any(&sk->sk_v6_rcv_saddr)) @@ -547,13 +548,13 @@ int inet6_getname(struct socket *sock, struct sockaddr *uaddr, else sin->sin6_addr = sk->sk_v6_rcv_saddr; sin->sin6_port = inet->inet_sport; - BPF_CGROUP_RUN_SA_PROG(sk, (struct sockaddr *)sin, + BPF_CGROUP_RUN_SA_PROG(sk, (struct sockaddr *)sin, &sin_addr_len, CGROUP_INET6_GETSOCKNAME); } sin->sin6_scope_id = ipv6_iface_scope_id(&sin->sin6_addr, sk->sk_bound_dev_if); release_sock(sk); - return sizeof(*sin); + return sin_addr_len; } EXPORT_SYMBOL(inet6_getname); diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c index e8fb0d275cc2..d2098dd4ceae 100644 --- a/net/ipv6/ping.c +++ b/net/ipv6/ping.c @@ -56,7 +56,7 @@ static int ping_v6_pre_connect(struct sock *sk, struct sockaddr *uaddr, if (addr_len < SIN6_LEN_RFC2133) return -EINVAL; - return BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr); + return BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr, &addr_len); } static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 94afb8d0f2d0..3a1e76a2d33e 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -135,7 +135,7 @@ static int tcp_v6_pre_connect(struct sock *sk, struct sockaddr *uaddr, sock_owned_by_me(sk); - return BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr); + return BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr, &addr_len); } static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 5e9312eefed0..622b10a549f7 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -410,7 +410,8 @@ try_again: *addr_len = sizeof(*sin6); BPF_CGROUP_RUN_PROG_UDP6_RECVMSG_LOCK(sk, - (struct sockaddr *)sin6); + (struct sockaddr *)sin6, + addr_len); } if (udp_test_bit(GRO_ENABLED, sk)) @@ -1157,7 +1158,7 @@ static int udpv6_pre_connect(struct sock *sk, struct sockaddr *uaddr, if (addr_len < SIN6_LEN_RFC2133) return -EINVAL; - return BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr); + return BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr, &addr_len); } /** @@ -1510,6 +1511,7 @@ do_udp_sendmsg: if (cgroup_bpf_enabled(CGROUP_UDP6_SENDMSG) && !connected) { err = BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk, (struct sockaddr *)sin6, + &addr_len, &fl6->saddr); if (err) goto out_no_dst; -- cgit v1.2.3 From 859051dd165ec6cc915f0f2114699021144fd249 Mon Sep 17 00:00:00 2001 From: Daan De Meyer Date: Wed, 11 Oct 2023 20:51:06 +0200 Subject: bpf: Implement cgroup sockaddr hooks for unix sockets These hooks allows intercepting connect(), getsockname(), getpeername(), sendmsg() and recvmsg() for unix sockets. The unix socket hooks get write access to the address length because the address length is not fixed when dealing with unix sockets and needs to be modified when a unix socket address is modified by the hook. Because abstract socket unix addresses start with a NUL byte, we cannot recalculate the socket address in kernelspace after running the hook by calculating the length of the unix socket path using strlen(). These hooks can be used when users want to multiplex syscall to a single unix socket to multiple different processes behind the scenes by redirecting the connect() and other syscalls to process specific sockets. We do not implement support for intercepting bind() because when using bind() with unix sockets with a pathname address, this creates an inode in the filesystem which must be cleaned up. If we rewrite the address, the user might try to clean up the wrong file, leaking the socket in the filesystem where it is never cleaned up. Until we figure out a solution for this (and a use case for intercepting bind()), we opt to not allow rewriting the sockaddr in bind() calls. We also implement recvmsg() support for connected streams so that after a connect() that is modified by a sockaddr hook, any corresponding recmvsg() on the connected socket can also be modified to make the connected program think it is connected to the "intended" remote. Reviewed-by: Kuniyuki Iwashima Signed-off-by: Daan De Meyer Link: https://lore.kernel.org/r/20231011185113.140426-5-daan.j.demeyer@gmail.com Signed-off-by: Martin KaFai Lau --- include/linux/bpf-cgroup-defs.h | 5 +++++ include/linux/bpf-cgroup.h | 17 +++++++++++++++++ include/uapi/linux/bpf.h | 13 +++++++++---- kernel/bpf/cgroup.c | 11 +++++++++-- kernel/bpf/syscall.c | 15 +++++++++++++++ kernel/bpf/verifier.c | 5 ++++- net/core/filter.c | 14 ++++++++++++-- net/unix/af_unix.c | 35 ++++++++++++++++++++++++++++++++++- tools/include/uapi/linux/bpf.h | 13 +++++++++---- 9 files changed, 114 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf-cgroup-defs.h b/include/linux/bpf-cgroup-defs.h index 7b121bd780eb..0985221d5478 100644 --- a/include/linux/bpf-cgroup-defs.h +++ b/include/linux/bpf-cgroup-defs.h @@ -28,19 +28,24 @@ enum cgroup_bpf_attach_type { CGROUP_INET6_BIND, CGROUP_INET4_CONNECT, CGROUP_INET6_CONNECT, + CGROUP_UNIX_CONNECT, CGROUP_INET4_POST_BIND, CGROUP_INET6_POST_BIND, CGROUP_UDP4_SENDMSG, CGROUP_UDP6_SENDMSG, + CGROUP_UNIX_SENDMSG, CGROUP_SYSCTL, CGROUP_UDP4_RECVMSG, CGROUP_UDP6_RECVMSG, + CGROUP_UNIX_RECVMSG, CGROUP_GETSOCKOPT, CGROUP_SETSOCKOPT, CGROUP_INET4_GETPEERNAME, CGROUP_INET6_GETPEERNAME, + CGROUP_UNIX_GETPEERNAME, CGROUP_INET4_GETSOCKNAME, CGROUP_INET6_GETSOCKNAME, + CGROUP_UNIX_GETSOCKNAME, CGROUP_INET_SOCK_RELEASE, CGROUP_LSM_START, CGROUP_LSM_END = CGROUP_LSM_START + CGROUP_LSM_NUM - 1, diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index 31561e789715..98b8cea904fe 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -48,19 +48,24 @@ to_cgroup_bpf_attach_type(enum bpf_attach_type attach_type) CGROUP_ATYPE(CGROUP_INET6_BIND); CGROUP_ATYPE(CGROUP_INET4_CONNECT); CGROUP_ATYPE(CGROUP_INET6_CONNECT); + CGROUP_ATYPE(CGROUP_UNIX_CONNECT); CGROUP_ATYPE(CGROUP_INET4_POST_BIND); CGROUP_ATYPE(CGROUP_INET6_POST_BIND); CGROUP_ATYPE(CGROUP_UDP4_SENDMSG); CGROUP_ATYPE(CGROUP_UDP6_SENDMSG); + CGROUP_ATYPE(CGROUP_UNIX_SENDMSG); CGROUP_ATYPE(CGROUP_SYSCTL); CGROUP_ATYPE(CGROUP_UDP4_RECVMSG); CGROUP_ATYPE(CGROUP_UDP6_RECVMSG); + CGROUP_ATYPE(CGROUP_UNIX_RECVMSG); CGROUP_ATYPE(CGROUP_GETSOCKOPT); CGROUP_ATYPE(CGROUP_SETSOCKOPT); CGROUP_ATYPE(CGROUP_INET4_GETPEERNAME); CGROUP_ATYPE(CGROUP_INET6_GETPEERNAME); + CGROUP_ATYPE(CGROUP_UNIX_GETPEERNAME); CGROUP_ATYPE(CGROUP_INET4_GETSOCKNAME); CGROUP_ATYPE(CGROUP_INET6_GETSOCKNAME); + CGROUP_ATYPE(CGROUP_UNIX_GETSOCKNAME); CGROUP_ATYPE(CGROUP_INET_SOCK_RELEASE); default: return CGROUP_BPF_ATTACH_TYPE_INVALID; @@ -289,18 +294,27 @@ static inline bool cgroup_bpf_sock_enabled(struct sock *sk, #define BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr, uaddrlen) \ BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_INET6_CONNECT, NULL) +#define BPF_CGROUP_RUN_PROG_UNIX_CONNECT_LOCK(sk, uaddr, uaddrlen) \ + BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_UNIX_CONNECT, NULL) + #define BPF_CGROUP_RUN_PROG_UDP4_SENDMSG_LOCK(sk, uaddr, uaddrlen, t_ctx) \ BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_UDP4_SENDMSG, t_ctx) #define BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk, uaddr, uaddrlen, t_ctx) \ BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_UDP6_SENDMSG, t_ctx) +#define BPF_CGROUP_RUN_PROG_UNIX_SENDMSG_LOCK(sk, uaddr, uaddrlen, t_ctx) \ + BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_UNIX_SENDMSG, t_ctx) + #define BPF_CGROUP_RUN_PROG_UDP4_RECVMSG_LOCK(sk, uaddr, uaddrlen) \ BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_UDP4_RECVMSG, NULL) #define BPF_CGROUP_RUN_PROG_UDP6_RECVMSG_LOCK(sk, uaddr, uaddrlen) \ BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_UDP6_RECVMSG, NULL) +#define BPF_CGROUP_RUN_PROG_UNIX_RECVMSG_LOCK(sk, uaddr, uaddrlen) \ + BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_UNIX_RECVMSG, NULL) + /* The SOCK_OPS"_SK" macro should be used when sock_ops->sk is not a * fullsock and its parent fullsock cannot be traced by * sk_to_full_sk(). @@ -492,10 +506,13 @@ static inline int bpf_percpu_cgroup_storage_update(struct bpf_map *map, #define BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr, uaddrlen) ({ 0; }) #define BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr, uaddrlen) ({ 0; }) #define BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr, uaddrlen) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_UNIX_CONNECT_LOCK(sk, uaddr, uaddrlen) ({ 0; }) #define BPF_CGROUP_RUN_PROG_UDP4_SENDMSG_LOCK(sk, uaddr, uaddrlen, t_ctx) ({ 0; }) #define BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk, uaddr, uaddrlen, t_ctx) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_UNIX_SENDMSG_LOCK(sk, uaddr, uaddrlen, t_ctx) ({ 0; }) #define BPF_CGROUP_RUN_PROG_UDP4_RECVMSG_LOCK(sk, uaddr, uaddrlen) ({ 0; }) #define BPF_CGROUP_RUN_PROG_UDP6_RECVMSG_LOCK(sk, uaddr, uaddrlen) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_UNIX_RECVMSG_LOCK(sk, uaddr, uaddrlen) ({ 0; }) #define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) ({ 0; }) #define BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(atype, major, minor, access) ({ 0; }) #define BPF_CGROUP_RUN_PROG_SYSCTL(head,table,write,buf,count,pos) ({ 0; }) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index e0aa457f94a9..7ba61b75bc0e 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1047,6 +1047,11 @@ enum bpf_attach_type { BPF_TCX_INGRESS, BPF_TCX_EGRESS, BPF_TRACE_UPROBE_MULTI, + BPF_CGROUP_UNIX_CONNECT, + BPF_CGROUP_UNIX_SENDMSG, + BPF_CGROUP_UNIX_RECVMSG, + BPF_CGROUP_UNIX_GETPEERNAME, + BPF_CGROUP_UNIX_GETSOCKNAME, __MAX_BPF_ATTACH_TYPE }; @@ -2704,8 +2709,8 @@ union bpf_attr { * *bpf_socket* should be one of the following: * * * **struct bpf_sock_ops** for **BPF_PROG_TYPE_SOCK_OPS**. - * * **struct bpf_sock_addr** for **BPF_CGROUP_INET4_CONNECT** - * and **BPF_CGROUP_INET6_CONNECT**. + * * **struct bpf_sock_addr** for **BPF_CGROUP_INET4_CONNECT**, + * **BPF_CGROUP_INET6_CONNECT** and **BPF_CGROUP_UNIX_CONNECT**. * * This helper actually implements a subset of **setsockopt()**. * It supports the following *level*\ s: @@ -2943,8 +2948,8 @@ union bpf_attr { * *bpf_socket* should be one of the following: * * * **struct bpf_sock_ops** for **BPF_PROG_TYPE_SOCK_OPS**. - * * **struct bpf_sock_addr** for **BPF_CGROUP_INET4_CONNECT** - * and **BPF_CGROUP_INET6_CONNECT**. + * * **struct bpf_sock_addr** for **BPF_CGROUP_INET4_CONNECT**, + * **BPF_CGROUP_INET6_CONNECT** and **BPF_CGROUP_UNIX_CONNECT**. * * This helper actually implements a subset of **getsockopt()**. * It supports the same set of *optname*\ s that is supported by diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index ac37bd53aee0..74ad2215e1ba 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c @@ -1458,7 +1458,7 @@ EXPORT_SYMBOL(__cgroup_bpf_run_filter_sk); * @flags: Pointer to u32 which contains higher bits of BPF program * return value (OR'ed together). * - * socket is expected to be of type INET or INET6. + * socket is expected to be of type INET, INET6 or UNIX. * * This function will return %-EPERM if an attached program is found and * returned value != 1 during execution. In all other cases, 0 is returned. @@ -1482,7 +1482,8 @@ int __cgroup_bpf_run_filter_sock_addr(struct sock *sk, /* Check socket family since not all sockets represent network * endpoint (e.g. AF_UNIX). */ - if (sk->sk_family != AF_INET && sk->sk_family != AF_INET6) + if (sk->sk_family != AF_INET && sk->sk_family != AF_INET6 && + sk->sk_family != AF_UNIX) return 0; if (!ctx.uaddr) { @@ -2533,10 +2534,13 @@ cgroup_common_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) case BPF_CGROUP_SOCK_OPS: case BPF_CGROUP_UDP4_RECVMSG: case BPF_CGROUP_UDP6_RECVMSG: + case BPF_CGROUP_UNIX_RECVMSG: case BPF_CGROUP_INET4_GETPEERNAME: case BPF_CGROUP_INET6_GETPEERNAME: + case BPF_CGROUP_UNIX_GETPEERNAME: case BPF_CGROUP_INET4_GETSOCKNAME: case BPF_CGROUP_INET6_GETSOCKNAME: + case BPF_CGROUP_UNIX_GETSOCKNAME: return NULL; default: return &bpf_get_retval_proto; @@ -2548,10 +2552,13 @@ cgroup_common_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) case BPF_CGROUP_SOCK_OPS: case BPF_CGROUP_UDP4_RECVMSG: case BPF_CGROUP_UDP6_RECVMSG: + case BPF_CGROUP_UNIX_RECVMSG: case BPF_CGROUP_INET4_GETPEERNAME: case BPF_CGROUP_INET6_GETPEERNAME: + case BPF_CGROUP_UNIX_GETPEERNAME: case BPF_CGROUP_INET4_GETSOCKNAME: case BPF_CGROUP_INET6_GETSOCKNAME: + case BPF_CGROUP_UNIX_GETSOCKNAME: return NULL; default: return &bpf_set_retval_proto; diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 6b5280f14a53..8677837f3deb 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -2446,14 +2446,19 @@ bpf_prog_load_check_attach(enum bpf_prog_type prog_type, case BPF_CGROUP_INET6_BIND: case BPF_CGROUP_INET4_CONNECT: case BPF_CGROUP_INET6_CONNECT: + case BPF_CGROUP_UNIX_CONNECT: case BPF_CGROUP_INET4_GETPEERNAME: case BPF_CGROUP_INET6_GETPEERNAME: + case BPF_CGROUP_UNIX_GETPEERNAME: case BPF_CGROUP_INET4_GETSOCKNAME: case BPF_CGROUP_INET6_GETSOCKNAME: + case BPF_CGROUP_UNIX_GETSOCKNAME: case BPF_CGROUP_UDP4_SENDMSG: case BPF_CGROUP_UDP6_SENDMSG: + case BPF_CGROUP_UNIX_SENDMSG: case BPF_CGROUP_UDP4_RECVMSG: case BPF_CGROUP_UDP6_RECVMSG: + case BPF_CGROUP_UNIX_RECVMSG: return 0; default: return -EINVAL; @@ -3678,14 +3683,19 @@ attach_type_to_prog_type(enum bpf_attach_type attach_type) case BPF_CGROUP_INET6_BIND: case BPF_CGROUP_INET4_CONNECT: case BPF_CGROUP_INET6_CONNECT: + case BPF_CGROUP_UNIX_CONNECT: case BPF_CGROUP_INET4_GETPEERNAME: case BPF_CGROUP_INET6_GETPEERNAME: + case BPF_CGROUP_UNIX_GETPEERNAME: case BPF_CGROUP_INET4_GETSOCKNAME: case BPF_CGROUP_INET6_GETSOCKNAME: + case BPF_CGROUP_UNIX_GETSOCKNAME: case BPF_CGROUP_UDP4_SENDMSG: case BPF_CGROUP_UDP6_SENDMSG: + case BPF_CGROUP_UNIX_SENDMSG: case BPF_CGROUP_UDP4_RECVMSG: case BPF_CGROUP_UDP6_RECVMSG: + case BPF_CGROUP_UNIX_RECVMSG: return BPF_PROG_TYPE_CGROUP_SOCK_ADDR; case BPF_CGROUP_SOCK_OPS: return BPF_PROG_TYPE_SOCK_OPS; @@ -3942,14 +3952,19 @@ static int bpf_prog_query(const union bpf_attr *attr, case BPF_CGROUP_INET6_POST_BIND: case BPF_CGROUP_INET4_CONNECT: case BPF_CGROUP_INET6_CONNECT: + case BPF_CGROUP_UNIX_CONNECT: case BPF_CGROUP_INET4_GETPEERNAME: case BPF_CGROUP_INET6_GETPEERNAME: + case BPF_CGROUP_UNIX_GETPEERNAME: case BPF_CGROUP_INET4_GETSOCKNAME: case BPF_CGROUP_INET6_GETSOCKNAME: + case BPF_CGROUP_UNIX_GETSOCKNAME: case BPF_CGROUP_UDP4_SENDMSG: case BPF_CGROUP_UDP6_SENDMSG: + case BPF_CGROUP_UNIX_SENDMSG: case BPF_CGROUP_UDP4_RECVMSG: case BPF_CGROUP_UDP6_RECVMSG: + case BPF_CGROUP_UNIX_RECVMSG: case BPF_CGROUP_SOCK_OPS: case BPF_CGROUP_DEVICE: case BPF_CGROUP_SYSCTL: diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index eed7350e15f4..e777f50401b6 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -14797,10 +14797,13 @@ static int check_return_code(struct bpf_verifier_env *env, int regno) case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: if (env->prog->expected_attach_type == BPF_CGROUP_UDP4_RECVMSG || env->prog->expected_attach_type == BPF_CGROUP_UDP6_RECVMSG || + env->prog->expected_attach_type == BPF_CGROUP_UNIX_RECVMSG || env->prog->expected_attach_type == BPF_CGROUP_INET4_GETPEERNAME || env->prog->expected_attach_type == BPF_CGROUP_INET6_GETPEERNAME || + env->prog->expected_attach_type == BPF_CGROUP_UNIX_GETPEERNAME || env->prog->expected_attach_type == BPF_CGROUP_INET4_GETSOCKNAME || - env->prog->expected_attach_type == BPF_CGROUP_INET6_GETSOCKNAME) + env->prog->expected_attach_type == BPF_CGROUP_INET6_GETSOCKNAME || + env->prog->expected_attach_type == BPF_CGROUP_UNIX_GETSOCKNAME) range = tnum_range(1, 1); if (env->prog->expected_attach_type == BPF_CGROUP_INET4_BIND || env->prog->expected_attach_type == BPF_CGROUP_INET6_BIND) diff --git a/net/core/filter.c b/net/core/filter.c index ff0bd9f20b95..cc2e4babc85f 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -7875,14 +7875,19 @@ sock_addr_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) case BPF_CGROUP_INET6_BIND: case BPF_CGROUP_INET4_CONNECT: case BPF_CGROUP_INET6_CONNECT: + case BPF_CGROUP_UNIX_CONNECT: case BPF_CGROUP_UDP4_RECVMSG: case BPF_CGROUP_UDP6_RECVMSG: + case BPF_CGROUP_UNIX_RECVMSG: case BPF_CGROUP_UDP4_SENDMSG: case BPF_CGROUP_UDP6_SENDMSG: + case BPF_CGROUP_UNIX_SENDMSG: case BPF_CGROUP_INET4_GETPEERNAME: case BPF_CGROUP_INET6_GETPEERNAME: + case BPF_CGROUP_UNIX_GETPEERNAME: case BPF_CGROUP_INET4_GETSOCKNAME: case BPF_CGROUP_INET6_GETSOCKNAME: + case BPF_CGROUP_UNIX_GETSOCKNAME: return &bpf_sock_addr_setsockopt_proto; default: return NULL; @@ -7893,14 +7898,19 @@ sock_addr_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) case BPF_CGROUP_INET6_BIND: case BPF_CGROUP_INET4_CONNECT: case BPF_CGROUP_INET6_CONNECT: + case BPF_CGROUP_UNIX_CONNECT: case BPF_CGROUP_UDP4_RECVMSG: case BPF_CGROUP_UDP6_RECVMSG: + case BPF_CGROUP_UNIX_RECVMSG: case BPF_CGROUP_UDP4_SENDMSG: case BPF_CGROUP_UDP6_SENDMSG: + case BPF_CGROUP_UNIX_SENDMSG: case BPF_CGROUP_INET4_GETPEERNAME: case BPF_CGROUP_INET6_GETPEERNAME: + case BPF_CGROUP_UNIX_GETPEERNAME: case BPF_CGROUP_INET4_GETSOCKNAME: case BPF_CGROUP_INET6_GETSOCKNAME: + case BPF_CGROUP_UNIX_GETSOCKNAME: return &bpf_sock_addr_getsockopt_proto; default: return NULL; @@ -8948,8 +8958,8 @@ static bool sock_addr_is_valid_access(int off, int size, if (off % size != 0) return false; - /* Disallow access to IPv6 fields from IPv4 contex and vise - * versa. + /* Disallow access to fields not belonging to the attach type's address + * family. */ switch (off) { case bpf_ctx_range(struct bpf_sock_addr, user_ip4): diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 3e8a04a13668..e10d07c76044 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -116,6 +116,7 @@ #include #include #include +#include #include "scm.h" @@ -1381,6 +1382,10 @@ static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr, if (err) goto out; + err = BPF_CGROUP_RUN_PROG_UNIX_CONNECT_LOCK(sk, addr, &alen); + if (err) + goto out; + if ((test_bit(SOCK_PASSCRED, &sock->flags) || test_bit(SOCK_PASSPIDFD, &sock->flags)) && !unix_sk(sk)->addr) { @@ -1490,6 +1495,10 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr, if (err) goto out; + err = BPF_CGROUP_RUN_PROG_UNIX_CONNECT_LOCK(sk, uaddr, &addr_len); + if (err) + goto out; + if ((test_bit(SOCK_PASSCRED, &sock->flags) || test_bit(SOCK_PASSPIDFD, &sock->flags)) && !u->addr) { err = unix_autobind(sk); @@ -1770,6 +1779,13 @@ static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int peer) } else { err = addr->len; memcpy(sunaddr, addr->name, addr->len); + + if (peer) + BPF_CGROUP_RUN_SA_PROG(sk, uaddr, &err, + CGROUP_UNIX_GETPEERNAME); + else + BPF_CGROUP_RUN_SA_PROG(sk, uaddr, &err, + CGROUP_UNIX_GETSOCKNAME); } sock_put(sk); out: @@ -1922,6 +1938,13 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg, err = unix_validate_addr(sunaddr, msg->msg_namelen); if (err) goto out; + + err = BPF_CGROUP_RUN_PROG_UNIX_SENDMSG_LOCK(sk, + msg->msg_name, + &msg->msg_namelen, + NULL); + if (err) + goto out; } else { sunaddr = NULL; err = -ENOTCONN; @@ -2390,9 +2413,14 @@ int __unix_dgram_recvmsg(struct sock *sk, struct msghdr *msg, size_t size, EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND); - if (msg->msg_name) + if (msg->msg_name) { unix_copy_addr(msg, skb->sk); + BPF_CGROUP_RUN_PROG_UNIX_RECVMSG_LOCK(sk, + msg->msg_name, + &msg->msg_namelen); + } + if (size > skb->len - skip) size = skb->len - skip; else if (size < skb->len - skip) @@ -2744,6 +2772,11 @@ unlock: DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, state->msg->msg_name); unix_copy_addr(state->msg, skb->sk); + + BPF_CGROUP_RUN_PROG_UNIX_RECVMSG_LOCK(sk, + state->msg->msg_name, + &state->msg->msg_namelen); + sunaddr = NULL; } diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index e0aa457f94a9..7ba61b75bc0e 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -1047,6 +1047,11 @@ enum bpf_attach_type { BPF_TCX_INGRESS, BPF_TCX_EGRESS, BPF_TRACE_UPROBE_MULTI, + BPF_CGROUP_UNIX_CONNECT, + BPF_CGROUP_UNIX_SENDMSG, + BPF_CGROUP_UNIX_RECVMSG, + BPF_CGROUP_UNIX_GETPEERNAME, + BPF_CGROUP_UNIX_GETSOCKNAME, __MAX_BPF_ATTACH_TYPE }; @@ -2704,8 +2709,8 @@ union bpf_attr { * *bpf_socket* should be one of the following: * * * **struct bpf_sock_ops** for **BPF_PROG_TYPE_SOCK_OPS**. - * * **struct bpf_sock_addr** for **BPF_CGROUP_INET4_CONNECT** - * and **BPF_CGROUP_INET6_CONNECT**. + * * **struct bpf_sock_addr** for **BPF_CGROUP_INET4_CONNECT**, + * **BPF_CGROUP_INET6_CONNECT** and **BPF_CGROUP_UNIX_CONNECT**. * * This helper actually implements a subset of **setsockopt()**. * It supports the following *level*\ s: @@ -2943,8 +2948,8 @@ union bpf_attr { * *bpf_socket* should be one of the following: * * * **struct bpf_sock_ops** for **BPF_PROG_TYPE_SOCK_OPS**. - * * **struct bpf_sock_addr** for **BPF_CGROUP_INET4_CONNECT** - * and **BPF_CGROUP_INET6_CONNECT**. + * * **struct bpf_sock_addr** for **BPF_CGROUP_INET4_CONNECT**, + * **BPF_CGROUP_INET6_CONNECT** and **BPF_CGROUP_UNIX_CONNECT**. * * This helper actually implements a subset of **getsockopt()**. * It supports the same set of *optname*\ s that is supported by -- cgit v1.2.3 From 0a779003213b589d7b3eef72f69e19a30f603ebc Mon Sep 17 00:00:00 2001 From: Christian Marangi Date: Mon, 9 Oct 2023 15:37:51 +0200 Subject: netdev: make napi_schedule return bool on NAPI successful schedule Change napi_schedule to return a bool on NAPI successful schedule. This might be useful for some driver to do additional steps after a NAPI has been scheduled. Suggested-by: Eric Dumazet Signed-off-by: Christian Marangi Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20231009133754.9834-2-ansuelsmth@gmail.com Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 11d704bfec9b..2874770a0a74 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -490,11 +490,18 @@ bool napi_schedule_prep(struct napi_struct *n); * * Schedule NAPI poll routine to be called if it is not already * running. + * Return true if we schedule a NAPI or false if not. + * Refer to napi_schedule_prep() for additional reason on why + * a NAPI might not be scheduled. */ -static inline void napi_schedule(struct napi_struct *n) +static inline bool napi_schedule(struct napi_struct *n) { - if (napi_schedule_prep(n)) + if (napi_schedule_prep(n)) { __napi_schedule(n); + return true; + } + + return false; } /** -- cgit v1.2.3 From 73382e919f3d938554dadd01d95760f90d1c25c1 Mon Sep 17 00:00:00 2001 From: Christian Marangi Date: Mon, 9 Oct 2023 15:37:52 +0200 Subject: netdev: replace napi_reschedule with napi_schedule Now that napi_schedule return a bool, we can drop napi_reschedule that does the same exact function. The function comes from a very old commit bfe13f54f502 ("ibm_emac: Convert to use napi_struct independent of struct net_device") and the purpose is actually deprecated in favour of different logic. Convert every user of napi_reschedule to napi_schedule. Signed-off-by: Christian Marangi Acked-by: Jeff Johnson # ath10k Acked-by: Nick Child # ibm Acked-by: Marc Kleine-Budde # for can/dev/rx-offload.c Reviewed-by: Eric Dumazet Acked-by: Tariq Toukan Link: https://lore.kernel.org/r/20231009133754.9834-3-ansuelsmth@gmail.com Signed-off-by: Jakub Kicinski --- drivers/infiniband/ulp/ipoib/ipoib_ib.c | 4 ++-- drivers/net/can/dev/rx-offload.c | 2 +- drivers/net/ethernet/chelsio/cxgb4/sge.c | 2 +- drivers/net/ethernet/chelsio/cxgb4vf/sge.c | 2 +- drivers/net/ethernet/ezchip/nps_enet.c | 2 +- drivers/net/ethernet/google/gve/gve_main.c | 2 +- drivers/net/ethernet/ibm/ehea/ehea_main.c | 2 +- drivers/net/ethernet/ibm/emac/mal.c | 2 +- drivers/net/ethernet/ibm/ibmveth.c | 2 +- drivers/net/ethernet/ibm/ibmvnic.c | 2 +- drivers/net/ethernet/mellanox/mlx4/en_rx.c | 2 +- drivers/net/ethernet/ni/nixge.c | 2 +- drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c | 2 +- drivers/net/ethernet/xscale/ixp4xx_eth.c | 4 ++-- drivers/net/fjes/fjes_main.c | 2 +- drivers/net/wan/ixp4xx_hss.c | 4 ++-- drivers/net/wireless/ath/ath10k/pci.c | 2 +- drivers/net/wwan/t7xx/t7xx_hif_dpmaif_rx.c | 2 +- include/linux/netdevice.h | 10 ---------- 19 files changed, 21 insertions(+), 31 deletions(-) (limited to 'include/linux') diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c index ed25061fac62..7f84d9866cef 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c @@ -488,7 +488,7 @@ poll_more: if (unlikely(ib_req_notify_cq(priv->recv_cq, IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS)) && - napi_reschedule(napi)) + napi_schedule(napi)) goto poll_more; } @@ -518,7 +518,7 @@ poll_more: napi_complete(napi); if (unlikely(ib_req_notify_cq(priv->send_cq, IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS)) && - napi_reschedule(napi)) + napi_schedule(napi)) goto poll_more; } return n < 0 ? 0 : n; diff --git a/drivers/net/can/dev/rx-offload.c b/drivers/net/can/dev/rx-offload.c index 77091f7d1fa7..46e7b6db4a1e 100644 --- a/drivers/net/can/dev/rx-offload.c +++ b/drivers/net/can/dev/rx-offload.c @@ -67,7 +67,7 @@ static int can_rx_offload_napi_poll(struct napi_struct *napi, int quota) /* Check if there was another interrupt */ if (!skb_queue_empty(&offload->skb_queue)) - napi_reschedule(&offload->napi); + napi_schedule(&offload->napi); } return work_done; diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c b/drivers/net/ethernet/chelsio/cxgb4/sge.c index 98dd78551d89..b5ff2e1a9975 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c @@ -4261,7 +4261,7 @@ static void sge_rx_timer_cb(struct timer_list *t) if (fl_starving(adap, fl)) { rxq = container_of(fl, struct sge_eth_rxq, fl); - if (napi_reschedule(&rxq->rspq.napi)) + if (napi_schedule(&rxq->rspq.napi)) fl->starving++; else set_bit(id, s->starving_fl); diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/sge.c b/drivers/net/ethernet/chelsio/cxgb4vf/sge.c index 2d0cf76fb3c5..5b1d746e6563 100644 --- a/drivers/net/ethernet/chelsio/cxgb4vf/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb4vf/sge.c @@ -2094,7 +2094,7 @@ static void sge_rx_timer_cb(struct timer_list *t) struct sge_eth_rxq *rxq; rxq = container_of(fl, struct sge_eth_rxq, fl); - if (napi_reschedule(&rxq->rspq.napi)) + if (napi_schedule(&rxq->rspq.napi)) fl->starving++; else set_bit(id, s->starving_fl); diff --git a/drivers/net/ethernet/ezchip/nps_enet.c b/drivers/net/ethernet/ezchip/nps_enet.c index edf000e7bab4..4d7184d46824 100644 --- a/drivers/net/ethernet/ezchip/nps_enet.c +++ b/drivers/net/ethernet/ezchip/nps_enet.c @@ -198,7 +198,7 @@ static int nps_enet_poll(struct napi_struct *napi, int budget) */ if (nps_enet_is_tx_pending(priv)) { nps_enet_reg_set(priv, NPS_ENET_REG_BUF_INT_ENABLE, 0); - napi_reschedule(napi); + napi_schedule(napi); } } diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c index 83b09dcfafc4..276f996f95dc 100644 --- a/drivers/net/ethernet/google/gve/gve_main.c +++ b/drivers/net/ethernet/google/gve/gve_main.c @@ -281,7 +281,7 @@ static int gve_napi_poll(struct napi_struct *napi, int budget) if (block->rx) reschedule |= gve_rx_work_pending(block->rx); - if (reschedule && napi_reschedule(napi)) + if (reschedule && napi_schedule(napi)) iowrite32be(GVE_IRQ_MASK, irq_doorbell); } return work_done; diff --git a/drivers/net/ethernet/ibm/ehea/ehea_main.c b/drivers/net/ethernet/ibm/ehea/ehea_main.c index 251dedd55cfb..1e29e5c9a2df 100644 --- a/drivers/net/ethernet/ibm/ehea/ehea_main.c +++ b/drivers/net/ethernet/ibm/ehea/ehea_main.c @@ -900,7 +900,7 @@ static int ehea_poll(struct napi_struct *napi, int budget) if (!cqe && !cqe_skb) return rx; - if (!napi_reschedule(napi)) + if (!napi_schedule(napi)) return rx; cqe_skb = ehea_proc_cqes(pr, EHEA_POLL_MAX_CQES); diff --git a/drivers/net/ethernet/ibm/emac/mal.c b/drivers/net/ethernet/ibm/emac/mal.c index 462646d1b817..2439f7e96e05 100644 --- a/drivers/net/ethernet/ibm/emac/mal.c +++ b/drivers/net/ethernet/ibm/emac/mal.c @@ -442,7 +442,7 @@ static int mal_poll(struct napi_struct *napi, int budget) if (unlikely(mc->ops->peek_rx(mc->dev) || test_bit(MAL_COMMAC_RX_STOPPED, &mc->flags))) { MAL_DBG2(mal, "rotting packet" NL); - if (!napi_reschedule(napi)) + if (!napi_schedule(napi)) goto more_work; spin_lock_irqsave(&mal->lock, flags); diff --git a/drivers/net/ethernet/ibm/ibmveth.c b/drivers/net/ethernet/ibm/ibmveth.c index a8d79ee350f8..b5aef0b29efe 100644 --- a/drivers/net/ethernet/ibm/ibmveth.c +++ b/drivers/net/ethernet/ibm/ibmveth.c @@ -1432,7 +1432,7 @@ static int ibmveth_poll(struct napi_struct *napi, int budget) BUG_ON(lpar_rc != H_SUCCESS); if (ibmveth_rxq_pending_buffer(adapter) && - napi_reschedule(napi)) { + napi_schedule(napi)) { lpar_rc = h_vio_signal(adapter->vdev->unit_address, VIO_IRQ_DISABLE); } diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index cdf5251e5679..2094f413cbe4 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -3519,7 +3519,7 @@ restart_poll: if (napi_complete_done(napi, frames_processed)) { enable_scrq_irq(adapter, rx_scrq); if (pending_scrq(adapter, rx_scrq)) { - if (napi_reschedule(napi)) { + if (napi_schedule(napi)) { disable_scrq_irq(adapter, rx_scrq); goto restart_poll; } diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index 332472fe4990..a09b6e05337d 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -400,7 +400,7 @@ void mlx4_en_recover_from_oom(struct mlx4_en_priv *priv) for (ring = 0; ring < priv->rx_ring_num; ring++) { if (mlx4_en_is_ring_empty(priv->rx_ring[ring])) { local_bh_disable(); - napi_reschedule(&priv->rx_cq[ring]->napi); + napi_schedule(&priv->rx_cq[ring]->napi); local_bh_enable(); } } diff --git a/drivers/net/ethernet/ni/nixge.c b/drivers/net/ethernet/ni/nixge.c index f71a4f8bbb89..fa1f78b03cb2 100644 --- a/drivers/net/ethernet/ni/nixge.c +++ b/drivers/net/ethernet/ni/nixge.c @@ -683,7 +683,7 @@ static int nixge_poll(struct napi_struct *napi, int budget) if (status & (XAXIDMA_IRQ_IOC_MASK | XAXIDMA_IRQ_DELAY_MASK)) { /* If there's more, reschedule, but clear */ nixge_dma_write_reg(priv, XAXIDMA_RX_SR_OFFSET, status); - napi_reschedule(napi); + napi_schedule(napi); } else { /* if not, turn on RX IRQs again ... */ cr = nixge_dma_read_reg(priv, XAXIDMA_RX_CR_OFFSET); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c index f9e43fc32ee8..3ca1c2a816ff 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c @@ -802,7 +802,7 @@ static int stmmac_test_flowctrl(struct stmmac_priv *priv) stmmac_start_rx(priv, priv->ioaddr, i); local_bh_disable(); - napi_reschedule(&ch->rx_napi); + napi_schedule(&ch->rx_napi); local_bh_enable(); } diff --git a/drivers/net/ethernet/xscale/ixp4xx_eth.c b/drivers/net/ethernet/xscale/ixp4xx_eth.c index c47108f2a0a0..531bf919aef5 100644 --- a/drivers/net/ethernet/xscale/ixp4xx_eth.c +++ b/drivers/net/ethernet/xscale/ixp4xx_eth.c @@ -723,9 +723,9 @@ static int eth_poll(struct napi_struct *napi, int budget) napi_complete(napi); qmgr_enable_irq(rxq); if (!qmgr_stat_below_low_watermark(rxq) && - napi_reschedule(napi)) { /* not empty again */ + napi_schedule(napi)) { /* not empty again */ #if DEBUG_RX - netdev_debug(dev, "eth_poll napi_reschedule succeeded\n"); + netdev_debug(dev, "eth_poll napi_schedule succeeded\n"); #endif qmgr_disable_irq(rxq); continue; diff --git a/drivers/net/fjes/fjes_main.c b/drivers/net/fjes/fjes_main.c index 2513be6d4e11..cd8cf08477ec 100644 --- a/drivers/net/fjes/fjes_main.c +++ b/drivers/net/fjes/fjes_main.c @@ -1030,7 +1030,7 @@ static int fjes_poll(struct napi_struct *napi, int budget) } if (((long)jiffies - (long)adapter->rx_last_jiffies) < 3) { - napi_reschedule(napi); + napi_schedule(napi); } else { spin_lock(&hw->rx_status_lock); for (epidx = 0; epidx < hw->max_epid; epidx++) { diff --git a/drivers/net/wan/ixp4xx_hss.c b/drivers/net/wan/ixp4xx_hss.c index e46b7f5ee49e..b09f4c235142 100644 --- a/drivers/net/wan/ixp4xx_hss.c +++ b/drivers/net/wan/ixp4xx_hss.c @@ -687,10 +687,10 @@ static int hss_hdlc_poll(struct napi_struct *napi, int budget) napi_complete(napi); qmgr_enable_irq(rxq); if (!qmgr_stat_empty(rxq) && - napi_reschedule(napi)) { + napi_schedule(napi)) { #if DEBUG_RX printk(KERN_DEBUG "%s: hss_hdlc_poll" - " napi_reschedule succeeded\n", + " napi_schedule succeeded\n", dev->name); #endif qmgr_disable_irq(rxq); diff --git a/drivers/net/wireless/ath/ath10k/pci.c b/drivers/net/wireless/ath/ath10k/pci.c index 23f366221939..2f8c785277af 100644 --- a/drivers/net/wireless/ath/ath10k/pci.c +++ b/drivers/net/wireless/ath/ath10k/pci.c @@ -3148,7 +3148,7 @@ static int ath10k_pci_napi_poll(struct napi_struct *ctx, int budget) * immediate servicing. */ if (ath10k_ce_interrupt_summary(ar)) { - napi_reschedule(ctx); + napi_schedule(ctx); goto out; } ath10k_pci_enable_legacy_irq(ar); diff --git a/drivers/net/wwan/t7xx/t7xx_hif_dpmaif_rx.c b/drivers/net/wwan/t7xx/t7xx_hif_dpmaif_rx.c index f4ff2198b5ef..210d84c67ef9 100644 --- a/drivers/net/wwan/t7xx/t7xx_hif_dpmaif_rx.c +++ b/drivers/net/wwan/t7xx/t7xx_hif_dpmaif_rx.c @@ -852,7 +852,7 @@ int t7xx_dpmaif_napi_rx_poll(struct napi_struct *napi, const int budget) if (!ret) { napi_complete_done(napi, work_done); rxq->sleep_lock_pending = true; - napi_reschedule(napi); + napi_schedule(napi); return work_done; } diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 2874770a0a74..ae553f886796 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -516,16 +516,6 @@ static inline void napi_schedule_irqoff(struct napi_struct *n) __napi_schedule_irqoff(n); } -/* Try to reschedule poll. Called by dev->poll() after napi_complete(). */ -static inline bool napi_reschedule(struct napi_struct *napi) -{ - if (napi_schedule_prep(napi)) { - __napi_schedule(napi); - return true; - } - return false; -} - /** * napi_complete_done - NAPI processing complete * @n: NAPI context -- cgit v1.2.3 From b9f29205c0182a2059b4dfa2883db5ef423574d4 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sat, 30 Sep 2023 15:14:27 -0700 Subject: iosys-map: fix kernel-doc typos Correct spelling of "beginning". Signed-off-by: Randy Dunlap Cc: Thomas Zimmermann Cc: dri-devel@lists.freedesktop.org Reviewed-by: Thomas Zimmermann Signed-off-by: Thomas Zimmermann Link: https://patchwork.freedesktop.org/patch/msgid/20230930221428.18463-2-rdunlap@infradead.org --- include/linux/iosys-map.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/iosys-map.h b/include/linux/iosys-map.h index cb71aa616bd3..1b06d074ade0 100644 --- a/include/linux/iosys-map.h +++ b/include/linux/iosys-map.h @@ -426,7 +426,7 @@ static inline void iosys_map_memset(struct iosys_map *dst, size_t offset, * iosys_map_rd_field - Read a member from a struct in the iosys_map * * @map__: The iosys_map structure - * @struct_offset__: Offset from the beggining of the map, where the struct + * @struct_offset__: Offset from the beginning of the map, where the struct * is located * @struct_type__: The struct describing the layout of the mapping * @field__: Member of the struct to read @@ -494,7 +494,7 @@ static inline void iosys_map_memset(struct iosys_map *dst, size_t offset, * iosys_map_wr_field - Write to a member of a struct in the iosys_map * * @map__: The iosys_map structure - * @struct_offset__: Offset from the beggining of the map, where the struct + * @struct_offset__: Offset from the beginning of the map, where the struct * is located * @struct_type__: The struct describing the layout of the mapping * @field__: Member of the struct to read -- cgit v1.2.3 From 668706b10c9b8181a53bd8881a77bb81b328ab33 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 11 Oct 2023 23:26:35 +0300 Subject: gpiolib: provide gpio_device_find_by_fwnode() One of the ways of looking up GPIO devices is using their fwnode. Provide a helper for that to avoid every user implementing their own matching function. Reviewed-by: Dipen Patel Tested-by: Dipen Patel Reviewed-by: Linus Walleij Link: https://lore.kernel.org/r/20231010151709.4104747-2-andriy.shevchenko@linux.intel.com Signed-off-by: Andy Shevchenko Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpiolib.c | 20 ++++++++++++++++++++ include/linux/gpio/driver.h | 1 + 2 files changed, 21 insertions(+) (limited to 'include/linux') diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index 191f9c87b4d0..dd18dd56c33c 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -1145,6 +1145,26 @@ struct gpio_device *gpio_device_find_by_label(const char *label) } EXPORT_SYMBOL_GPL(gpio_device_find_by_label); +static int gpio_chip_match_by_fwnode(struct gpio_chip *gc, void *fwnode) +{ + return device_match_fwnode(&gc->gpiodev->dev, fwnode); +} + +/** + * gpio_device_find_by_fwnode() - wrapper around gpio_device_find() finding + * the GPIO device by its fwnode + * @fwnode: Firmware node to lookup + * + * Returns: + * Reference to the GPIO device or NULL. Reference must be released with + * gpio_device_put(). + */ +struct gpio_device *gpio_device_find_by_fwnode(const struct fwnode_handle *fwnode) +{ + return gpio_device_find((void *)fwnode, gpio_chip_match_by_fwnode); +} +EXPORT_SYMBOL_GPL(gpio_device_find_by_fwnode); + /** * gpio_device_get() - Increase the reference count of this GPIO device * @gdev: GPIO device to increase the refcount for diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index f8ad7f40100c..ae4162d3f1d3 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -611,6 +611,7 @@ struct gpio_chip *gpiochip_find(void *data, struct gpio_device *gpio_device_find(void *data, int (*match)(struct gpio_chip *gc, void *data)); struct gpio_device *gpio_device_find_by_label(const char *label); +struct gpio_device *gpio_device_find_by_fwnode(const struct fwnode_handle *fwnode); struct gpio_device *gpio_device_get(struct gpio_device *gdev); void gpio_device_put(struct gpio_device *gdev); -- cgit v1.2.3 From e001d1447cd4585d7f23a44ff668ba2bc624badb Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Thu, 12 Oct 2023 15:24:17 +0300 Subject: fs: factor out vfs_parse_monolithic_sep() helper Factor out vfs_parse_monolithic_sep() from generic_parse_monolithic(), so filesystems could use it with a custom option separator callback. Acked-by: Christian Brauner Signed-off-by: Amir Goldstein --- fs/fs_context.c | 34 +++++++++++++++++++++++++++++----- include/linux/fs_context.h | 2 ++ 2 files changed, 31 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/fs/fs_context.c b/fs/fs_context.c index a0ad7a0c4680..98589aae5208 100644 --- a/fs/fs_context.c +++ b/fs/fs_context.c @@ -192,17 +192,19 @@ int vfs_parse_fs_string(struct fs_context *fc, const char *key, EXPORT_SYMBOL(vfs_parse_fs_string); /** - * generic_parse_monolithic - Parse key[=val][,key[=val]]* mount data + * vfs_parse_monolithic_sep - Parse key[=val][,key[=val]]* mount data * @fc: The superblock configuration to fill in. * @data: The data to parse + * @sep: callback for separating next option * - * Parse a blob of data that's in key[=val][,key[=val]]* form. This can be - * called from the ->monolithic_mount_data() fs_context operation. + * Parse a blob of data that's in key[=val][,key[=val]]* form with a custom + * option separator callback. * * Returns 0 on success or the error returned by the ->parse_option() fs_context * operation on failure. */ -int generic_parse_monolithic(struct fs_context *fc, void *data) +int vfs_parse_monolithic_sep(struct fs_context *fc, void *data, + char *(*sep)(char **)) { char *options = data, *key; int ret = 0; @@ -214,7 +216,7 @@ int generic_parse_monolithic(struct fs_context *fc, void *data) if (ret) return ret; - while ((key = strsep(&options, ",")) != NULL) { + while ((key = sep(&options)) != NULL) { if (*key) { size_t v_len = 0; char *value = strchr(key, '='); @@ -233,6 +235,28 @@ int generic_parse_monolithic(struct fs_context *fc, void *data) return ret; } +EXPORT_SYMBOL(vfs_parse_monolithic_sep); + +static char *vfs_parse_comma_sep(char **s) +{ + return strsep(s, ","); +} + +/** + * generic_parse_monolithic - Parse key[=val][,key[=val]]* mount data + * @fc: The superblock configuration to fill in. + * @data: The data to parse + * + * Parse a blob of data that's in key[=val][,key[=val]]* form. This can be + * called from the ->monolithic_mount_data() fs_context operation. + * + * Returns 0 on success or the error returned by the ->parse_option() fs_context + * operation on failure. + */ +int generic_parse_monolithic(struct fs_context *fc, void *data) +{ + return vfs_parse_monolithic_sep(fc, data, vfs_parse_comma_sep); +} EXPORT_SYMBOL(generic_parse_monolithic); /** diff --git a/include/linux/fs_context.h b/include/linux/fs_context.h index 96332db693d5..c13e99cbbf81 100644 --- a/include/linux/fs_context.h +++ b/include/linux/fs_context.h @@ -136,6 +136,8 @@ extern struct fs_context *vfs_dup_fs_context(struct fs_context *fc); extern int vfs_parse_fs_param(struct fs_context *fc, struct fs_parameter *param); extern int vfs_parse_fs_string(struct fs_context *fc, const char *key, const char *value, size_t v_size); +int vfs_parse_monolithic_sep(struct fs_context *fc, void *data, + char *(*sep)(char **)); extern int generic_parse_monolithic(struct fs_context *fc, void *data); extern int vfs_get_tree(struct fs_context *fc); extern void put_fs_context(struct fs_context *fc); -- cgit v1.2.3 From 13cc9ee8f8ed58e563294d87d74a62006be40f21 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Thu, 12 Oct 2023 13:09:02 -0400 Subject: cgroup: Fix incorrect css_set_rwsem reference in comment Since commit f0d9a5f17575 ("cgroup: make css_set_rwsem a spinlock and rename it to css_set_lock"), css_set_rwsem has been replaced by css_set_lock. That commit, however, missed the css_set_rwsem reference in include/linux/cgroup-defs.h. Fix that by changing it to css_set_lock as well. Signed-off-by: Waiman Long Signed-off-by: Tejun Heo --- include/linux/cgroup-defs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index f1b3151ac30b..265da00a1a8b 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -238,7 +238,7 @@ struct css_set { * Lists running through all tasks using this cgroup group. * mg_tasks lists tasks which belong to this cset but are in the * process of being migrated out or in. Protected by - * css_set_rwsem, but, during migration, once tasks are moved to + * css_set_lock, but, during migration, once tasks are moved to * mg_tasks, it can be read safely while holding cgroup_mutex. */ struct list_head tasks; -- cgit v1.2.3 From f06cc667f79909e9175460b167c277b7c64d3df0 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 9 Oct 2023 23:04:25 +0200 Subject: perf: Optimize perf_cgroup_switch() Namhyung reported that bd2756811766 ("perf: Rewrite core context handling") regresses context switch overhead when perf-cgroup is in use together with 'slow' PMUs like uncore. Specifically, perf_cgroup_switch()'s perf_ctx_disable() / ctx_sched_out() etc.. all iterate the full list of active PMUs for that CPU, even if they don't have cgroup events. Previously there was cgrp_cpuctx_list which linked the relevant PMUs together, but that got lost in the rework. Instead of re-instruducing a similar list, let the perf_event_pmu_context iteration skip those that do not have cgroup events. This avoids growing multiple versions of the perf_event_pmu_context iteration. Measured performance (on a slightly different patch): Before) $ taskset -c 0 ./perf bench sched pipe -l 10000 -G AAA,BBB # Running 'sched/pipe' benchmark: # Executed 10000 pipe operations between two processes Total time: 0.901 [sec] 90.128700 usecs/op 11095 ops/sec After) $ taskset -c 0 ./perf bench sched pipe -l 10000 -G AAA,BBB # Running 'sched/pipe' benchmark: # Executed 10000 pipe operations between two processes Total time: 0.065 [sec] 6.560100 usecs/op 152436 ops/sec Fixes: bd2756811766 ("perf: Rewrite core context handling") Reported-by: Namhyung Kim Debugged-by: Namhyung Kim Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20231009210425.GC6307@noisy.programming.kicks-ass.net --- include/linux/perf_event.h | 1 + kernel/events/core.c | 115 +++++++++++++++++++++++---------------------- 2 files changed, 61 insertions(+), 55 deletions(-) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index f31f962a6445..0367d748fae0 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -878,6 +878,7 @@ struct perf_event_pmu_context { unsigned int embedded : 1; unsigned int nr_events; + unsigned int nr_cgroups; atomic_t refcount; /* event <-> epc */ struct rcu_head rcu_head; diff --git a/kernel/events/core.c b/kernel/events/core.c index 708d474c2ede..3eb26c2c6e65 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -375,6 +375,7 @@ enum event_type_t { EVENT_TIME = 0x4, /* see ctx_resched() for details */ EVENT_CPU = 0x8, + EVENT_CGROUP = 0x10, EVENT_ALL = EVENT_FLEXIBLE | EVENT_PINNED, }; @@ -684,20 +685,26 @@ do { \ ___p; \ }) -static void perf_ctx_disable(struct perf_event_context *ctx) +static void perf_ctx_disable(struct perf_event_context *ctx, bool cgroup) { struct perf_event_pmu_context *pmu_ctx; - list_for_each_entry(pmu_ctx, &ctx->pmu_ctx_list, pmu_ctx_entry) + list_for_each_entry(pmu_ctx, &ctx->pmu_ctx_list, pmu_ctx_entry) { + if (cgroup && !pmu_ctx->nr_cgroups) + continue; perf_pmu_disable(pmu_ctx->pmu); + } } -static void perf_ctx_enable(struct perf_event_context *ctx) +static void perf_ctx_enable(struct perf_event_context *ctx, bool cgroup) { struct perf_event_pmu_context *pmu_ctx; - list_for_each_entry(pmu_ctx, &ctx->pmu_ctx_list, pmu_ctx_entry) + list_for_each_entry(pmu_ctx, &ctx->pmu_ctx_list, pmu_ctx_entry) { + if (cgroup && !pmu_ctx->nr_cgroups) + continue; perf_pmu_enable(pmu_ctx->pmu); + } } static void ctx_sched_out(struct perf_event_context *ctx, enum event_type_t event_type); @@ -856,9 +863,9 @@ static void perf_cgroup_switch(struct task_struct *task) return; perf_ctx_lock(cpuctx, cpuctx->task_ctx); - perf_ctx_disable(&cpuctx->ctx); + perf_ctx_disable(&cpuctx->ctx, true); - ctx_sched_out(&cpuctx->ctx, EVENT_ALL); + ctx_sched_out(&cpuctx->ctx, EVENT_ALL|EVENT_CGROUP); /* * must not be done before ctxswout due * to update_cgrp_time_from_cpuctx() in @@ -870,9 +877,9 @@ static void perf_cgroup_switch(struct task_struct *task) * perf_cgroup_set_timestamp() in ctx_sched_in() * to not have to pass task around */ - ctx_sched_in(&cpuctx->ctx, EVENT_ALL); + ctx_sched_in(&cpuctx->ctx, EVENT_ALL|EVENT_CGROUP); - perf_ctx_enable(&cpuctx->ctx); + perf_ctx_enable(&cpuctx->ctx, true); perf_ctx_unlock(cpuctx, cpuctx->task_ctx); } @@ -965,6 +972,8 @@ perf_cgroup_event_enable(struct perf_event *event, struct perf_event_context *ct if (!is_cgroup_event(event)) return; + event->pmu_ctx->nr_cgroups++; + /* * Because cgroup events are always per-cpu events, * @ctx == &cpuctx->ctx. @@ -985,6 +994,8 @@ perf_cgroup_event_disable(struct perf_event *event, struct perf_event_context *c if (!is_cgroup_event(event)) return; + event->pmu_ctx->nr_cgroups--; + /* * Because cgroup events are always per-cpu events, * @ctx == &cpuctx->ctx. @@ -2677,9 +2688,9 @@ static void ctx_resched(struct perf_cpu_context *cpuctx, event_type &= EVENT_ALL; - perf_ctx_disable(&cpuctx->ctx); + perf_ctx_disable(&cpuctx->ctx, false); if (task_ctx) { - perf_ctx_disable(task_ctx); + perf_ctx_disable(task_ctx, false); task_ctx_sched_out(task_ctx, event_type); } @@ -2697,9 +2708,9 @@ static void ctx_resched(struct perf_cpu_context *cpuctx, perf_event_sched_in(cpuctx, task_ctx); - perf_ctx_enable(&cpuctx->ctx); + perf_ctx_enable(&cpuctx->ctx, false); if (task_ctx) - perf_ctx_enable(task_ctx); + perf_ctx_enable(task_ctx, false); } void perf_pmu_resched(struct pmu *pmu) @@ -3244,6 +3255,9 @@ ctx_sched_out(struct perf_event_context *ctx, enum event_type_t event_type) struct perf_cpu_context *cpuctx = this_cpu_ptr(&perf_cpu_context); struct perf_event_pmu_context *pmu_ctx; int is_active = ctx->is_active; + bool cgroup = event_type & EVENT_CGROUP; + + event_type &= ~EVENT_CGROUP; lockdep_assert_held(&ctx->lock); @@ -3290,8 +3304,11 @@ ctx_sched_out(struct perf_event_context *ctx, enum event_type_t event_type) is_active ^= ctx->is_active; /* changed bits */ - list_for_each_entry(pmu_ctx, &ctx->pmu_ctx_list, pmu_ctx_entry) + list_for_each_entry(pmu_ctx, &ctx->pmu_ctx_list, pmu_ctx_entry) { + if (cgroup && !pmu_ctx->nr_cgroups) + continue; __pmu_ctx_sched_out(pmu_ctx, is_active); + } } /* @@ -3482,7 +3499,7 @@ perf_event_context_sched_out(struct task_struct *task, struct task_struct *next) raw_spin_lock_nested(&next_ctx->lock, SINGLE_DEPTH_NESTING); if (context_equiv(ctx, next_ctx)) { - perf_ctx_disable(ctx); + perf_ctx_disable(ctx, false); /* PMIs are disabled; ctx->nr_pending is stable. */ if (local_read(&ctx->nr_pending) || @@ -3502,7 +3519,7 @@ perf_event_context_sched_out(struct task_struct *task, struct task_struct *next) perf_ctx_sched_task_cb(ctx, false); perf_event_swap_task_ctx_data(ctx, next_ctx); - perf_ctx_enable(ctx); + perf_ctx_enable(ctx, false); /* * RCU_INIT_POINTER here is safe because we've not @@ -3526,13 +3543,13 @@ unlock: if (do_switch) { raw_spin_lock(&ctx->lock); - perf_ctx_disable(ctx); + perf_ctx_disable(ctx, false); inside_switch: perf_ctx_sched_task_cb(ctx, false); task_ctx_sched_out(ctx, EVENT_ALL); - perf_ctx_enable(ctx); + perf_ctx_enable(ctx, false); raw_spin_unlock(&ctx->lock); } } @@ -3818,47 +3835,32 @@ static int merge_sched_in(struct perf_event *event, void *data) return 0; } -static void ctx_pinned_sched_in(struct perf_event_context *ctx, struct pmu *pmu) +static void pmu_groups_sched_in(struct perf_event_context *ctx, + struct perf_event_groups *groups, + struct pmu *pmu) { - struct perf_event_pmu_context *pmu_ctx; int can_add_hw = 1; - - if (pmu) { - visit_groups_merge(ctx, &ctx->pinned_groups, - smp_processor_id(), pmu, - merge_sched_in, &can_add_hw); - } else { - list_for_each_entry(pmu_ctx, &ctx->pmu_ctx_list, pmu_ctx_entry) { - can_add_hw = 1; - visit_groups_merge(ctx, &ctx->pinned_groups, - smp_processor_id(), pmu_ctx->pmu, - merge_sched_in, &can_add_hw); - } - } + visit_groups_merge(ctx, groups, smp_processor_id(), pmu, + merge_sched_in, &can_add_hw); } -static void ctx_flexible_sched_in(struct perf_event_context *ctx, struct pmu *pmu) +static void ctx_groups_sched_in(struct perf_event_context *ctx, + struct perf_event_groups *groups, + bool cgroup) { struct perf_event_pmu_context *pmu_ctx; - int can_add_hw = 1; - if (pmu) { - visit_groups_merge(ctx, &ctx->flexible_groups, - smp_processor_id(), pmu, - merge_sched_in, &can_add_hw); - } else { - list_for_each_entry(pmu_ctx, &ctx->pmu_ctx_list, pmu_ctx_entry) { - can_add_hw = 1; - visit_groups_merge(ctx, &ctx->flexible_groups, - smp_processor_id(), pmu_ctx->pmu, - merge_sched_in, &can_add_hw); - } + list_for_each_entry(pmu_ctx, &ctx->pmu_ctx_list, pmu_ctx_entry) { + if (cgroup && !pmu_ctx->nr_cgroups) + continue; + pmu_groups_sched_in(ctx, groups, pmu_ctx->pmu); } } -static void __pmu_ctx_sched_in(struct perf_event_context *ctx, struct pmu *pmu) +static void __pmu_ctx_sched_in(struct perf_event_context *ctx, + struct pmu *pmu) { - ctx_flexible_sched_in(ctx, pmu); + pmu_groups_sched_in(ctx, &ctx->flexible_groups, pmu); } static void @@ -3866,6 +3868,9 @@ ctx_sched_in(struct perf_event_context *ctx, enum event_type_t event_type) { struct perf_cpu_context *cpuctx = this_cpu_ptr(&perf_cpu_context); int is_active = ctx->is_active; + bool cgroup = event_type & EVENT_CGROUP; + + event_type &= ~EVENT_CGROUP; lockdep_assert_held(&ctx->lock); @@ -3898,11 +3903,11 @@ ctx_sched_in(struct perf_event_context *ctx, enum event_type_t event_type) * in order to give them the best chance of going on. */ if (is_active & EVENT_PINNED) - ctx_pinned_sched_in(ctx, NULL); + ctx_groups_sched_in(ctx, &ctx->pinned_groups, cgroup); /* Then walk through the lower prio flexible groups */ if (is_active & EVENT_FLEXIBLE) - ctx_flexible_sched_in(ctx, NULL); + ctx_groups_sched_in(ctx, &ctx->flexible_groups, cgroup); } static void perf_event_context_sched_in(struct task_struct *task) @@ -3917,11 +3922,11 @@ static void perf_event_context_sched_in(struct task_struct *task) if (cpuctx->task_ctx == ctx) { perf_ctx_lock(cpuctx, ctx); - perf_ctx_disable(ctx); + perf_ctx_disable(ctx, false); perf_ctx_sched_task_cb(ctx, true); - perf_ctx_enable(ctx); + perf_ctx_enable(ctx, false); perf_ctx_unlock(cpuctx, ctx); goto rcu_unlock; } @@ -3934,7 +3939,7 @@ static void perf_event_context_sched_in(struct task_struct *task) if (!ctx->nr_events) goto unlock; - perf_ctx_disable(ctx); + perf_ctx_disable(ctx, false); /* * We want to keep the following priority order: * cpu pinned (that don't need to move), task pinned, @@ -3944,7 +3949,7 @@ static void perf_event_context_sched_in(struct task_struct *task) * events, no need to flip the cpuctx's events around. */ if (!RB_EMPTY_ROOT(&ctx->pinned_groups.tree)) { - perf_ctx_disable(&cpuctx->ctx); + perf_ctx_disable(&cpuctx->ctx, false); ctx_sched_out(&cpuctx->ctx, EVENT_FLEXIBLE); } @@ -3953,9 +3958,9 @@ static void perf_event_context_sched_in(struct task_struct *task) perf_ctx_sched_task_cb(cpuctx->task_ctx, true); if (!RB_EMPTY_ROOT(&ctx->pinned_groups.tree)) - perf_ctx_enable(&cpuctx->ctx); + perf_ctx_enable(&cpuctx->ctx, false); - perf_ctx_enable(ctx); + perf_ctx_enable(ctx, false); unlock: perf_ctx_unlock(cpuctx, ctx); -- cgit v1.2.3 From f995443f01b4dbcce723539b99050ce69b319e58 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Thu, 12 Oct 2023 16:31:58 +0200 Subject: locking/seqlock: Simplify SEQCOUNT_LOCKNAME() 1. Kill the "lockmember" argument. It is always s->lock plus __seqprop_##lockname##_sequence() already uses s->lock and ignores "lockmember". 2. Kill the "lock_acquire" argument. __seqprop_##lockname##_sequence() can use the same "lockbase" prefix for _lock and _unlock. Apart from line numbers, gcc -E outputs the same code. Signed-off-by: Oleg Nesterov Signed-off-by: Ingo Molnar Cc: Peter Zijlstra Cc: Waiman Long Cc: Will Deacon Cc: Thomas Gleixner Cc: Linus Torvalds Cc: Paul E. McKenney Link: https://lore.kernel.org/r/20231012143158.GA16133@redhat.com --- include/linux/seqlock.h | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h index af518e4d0c6b..7e7109dbc3f5 100644 --- a/include/linux/seqlock.h +++ b/include/linux/seqlock.h @@ -191,11 +191,9 @@ static inline void seqcount_lockdep_reader_access(const seqcount_t *s) * @lockname: "LOCKNAME" part of seqcount_LOCKNAME_t * @locktype: LOCKNAME canonical C data type * @preemptible: preemptibility of above locktype - * @lockmember: argument for lockdep_assert_held() - * @lockbase: associated lock release function (prefix only) - * @lock_acquire: associated lock acquisition function (full call) + * @lockbase: prefix for associated lock/unlock */ -#define SEQCOUNT_LOCKNAME(lockname, locktype, preemptible, lockmember, lockbase, lock_acquire) \ +#define SEQCOUNT_LOCKNAME(lockname, locktype, preemptible, lockbase) \ typedef struct seqcount_##lockname { \ seqcount_t seqcount; \ __SEQ_LOCK(locktype *lock); \ @@ -216,7 +214,7 @@ __seqprop_##lockname##_sequence(const seqcount_##lockname##_t *s) \ return seq; \ \ if (preemptible && unlikely(seq & 1)) { \ - __SEQ_LOCK(lock_acquire); \ + __SEQ_LOCK(lockbase##_lock(s->lock)); \ __SEQ_LOCK(lockbase##_unlock(s->lock)); \ \ /* \ @@ -242,7 +240,7 @@ __seqprop_##lockname##_preemptible(const seqcount_##lockname##_t *s) \ static __always_inline void \ __seqprop_##lockname##_assert(const seqcount_##lockname##_t *s) \ { \ - __SEQ_LOCK(lockdep_assert_held(lockmember)); \ + __SEQ_LOCK(lockdep_assert_held(s->lock)); \ } /* @@ -271,10 +269,10 @@ static inline void __seqprop_assert(const seqcount_t *s) #define __SEQ_RT IS_ENABLED(CONFIG_PREEMPT_RT) -SEQCOUNT_LOCKNAME(raw_spinlock, raw_spinlock_t, false, s->lock, raw_spin, raw_spin_lock(s->lock)) -SEQCOUNT_LOCKNAME(spinlock, spinlock_t, __SEQ_RT, s->lock, spin, spin_lock(s->lock)) -SEQCOUNT_LOCKNAME(rwlock, rwlock_t, __SEQ_RT, s->lock, read, read_lock(s->lock)) -SEQCOUNT_LOCKNAME(mutex, struct mutex, true, s->lock, mutex, mutex_lock(s->lock)) +SEQCOUNT_LOCKNAME(raw_spinlock, raw_spinlock_t, false, raw_spin) +SEQCOUNT_LOCKNAME(spinlock, spinlock_t, __SEQ_RT, spin) +SEQCOUNT_LOCKNAME(rwlock, rwlock_t, __SEQ_RT, read) +SEQCOUNT_LOCKNAME(mutex, struct mutex, true, mutex) /* * SEQCNT_LOCKNAME_ZERO - static initializer for seqcount_LOCKNAME_t -- cgit v1.2.3 From e6115c6f7a0ce3388cc60b69a284facf78b5dbfd Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Thu, 12 Oct 2023 16:32:27 +0200 Subject: locking/seqlock: Change __seqprop() to return the function pointer This simplifies the macro and makes it easy to add the new seqprop's with 2 or more args. Plus this way we do not lose the type info, the (void*) type cast is no longer needed. And the latter reveals the problem: a lot of seqcount_t helpers pass the "const seqcount_t *s" argument to __seqprop_ptr(seqcount_t *s) but (before this patch) "(void *)(s)" masked the problem. So this patch changes __seqprop_ptr() and __seqprop_##lockname##_ptr() to accept the "const LOCKNAME *s" argument. This is not nice either, they need to drop the constness on return because these helpers are used by both the readers and writers, but at least it is clear what's going on. Signed-off-by: Oleg Nesterov Signed-off-by: Ingo Molnar Cc: Peter Zijlstra Cc: Waiman Long Cc: Will Deacon Cc: Thomas Gleixner Cc: Linus Torvalds Cc: Paul E. McKenney Link: https://lore.kernel.org/r/20231012143227.GA16143@redhat.com --- include/linux/seqlock.h | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h index 7e7109dbc3f5..4b8dcd3a0d93 100644 --- a/include/linux/seqlock.h +++ b/include/linux/seqlock.h @@ -200,9 +200,9 @@ typedef struct seqcount_##lockname { \ } seqcount_##lockname##_t; \ \ static __always_inline seqcount_t * \ -__seqprop_##lockname##_ptr(seqcount_##lockname##_t *s) \ +__seqprop_##lockname##_ptr(const seqcount_##lockname##_t *s) \ { \ - return &s->seqcount; \ + return (void *)&s->seqcount; /* drop const */ \ } \ \ static __always_inline unsigned \ @@ -247,9 +247,9 @@ __seqprop_##lockname##_assert(const seqcount_##lockname##_t *s) \ * __seqprop() for seqcount_t */ -static inline seqcount_t *__seqprop_ptr(seqcount_t *s) +static inline seqcount_t *__seqprop_ptr(const seqcount_t *s) { - return s; + return (void *)s; /* drop const */ } static inline unsigned __seqprop_sequence(const seqcount_t *s) @@ -292,19 +292,19 @@ SEQCOUNT_LOCKNAME(mutex, struct mutex, true, mutex) #define SEQCNT_WW_MUTEX_ZERO(name, lock) SEQCOUNT_LOCKNAME_ZERO(name, lock) #define __seqprop_case(s, lockname, prop) \ - seqcount_##lockname##_t: __seqprop_##lockname##_##prop((void *)(s)) + seqcount_##lockname##_t: __seqprop_##lockname##_##prop #define __seqprop(s, prop) _Generic(*(s), \ - seqcount_t: __seqprop_##prop((void *)(s)), \ + seqcount_t: __seqprop_##prop, \ __seqprop_case((s), raw_spinlock, prop), \ __seqprop_case((s), spinlock, prop), \ __seqprop_case((s), rwlock, prop), \ __seqprop_case((s), mutex, prop)) -#define seqprop_ptr(s) __seqprop(s, ptr) -#define seqprop_sequence(s) __seqprop(s, sequence) -#define seqprop_preemptible(s) __seqprop(s, preemptible) -#define seqprop_assert(s) __seqprop(s, assert) +#define seqprop_ptr(s) __seqprop(s, ptr)(s) +#define seqprop_sequence(s) __seqprop(s, sequence)(s) +#define seqprop_preemptible(s) __seqprop(s, preemptible)(s) +#define seqprop_assert(s) __seqprop(s, assert)(s) /** * __read_seqcount_begin() - begin a seqcount_t read section w/o barrier -- cgit v1.2.3 From 7cbabed16464ad980c8d80b601559b36e1bc0b0c Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Mon, 31 Jul 2023 05:25:36 +0300 Subject: thunderbolt: Fix typo in enum tb_link_width kernel-doc Typo trasmitters -> transmitters. Signed-off-by: Mika Westerberg Signed-off-by: Gil Fine --- include/linux/thunderbolt.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/thunderbolt.h b/include/linux/thunderbolt.h index 02333f47c994..6151c210d987 100644 --- a/include/linux/thunderbolt.h +++ b/include/linux/thunderbolt.h @@ -175,7 +175,7 @@ void tb_unregister_property_dir(const char *key, struct tb_property_dir *dir); * enum tb_link_width - Thunderbolt/USB4 link width * @TB_LINK_WIDTH_SINGLE: Single lane link * @TB_LINK_WIDTH_DUAL: Dual lane symmetric link - * @TB_LINK_WIDTH_ASYM_TX: Dual lane asymmetric Gen 4 link with 3 trasmitters + * @TB_LINK_WIDTH_ASYM_TX: Dual lane asymmetric Gen 4 link with 3 transmitters * @TB_LINK_WIDTH_ASYM_RX: Dual lane asymmetric Gen 4 link with 3 receivers */ enum tb_link_width { -- cgit v1.2.3 From 1559d14977b694570f010854b8192e6de034bc27 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Wed, 11 Oct 2023 15:02:02 +0200 Subject: gpiolib: provide gpio_device_to_device() There are users in the kernel who need to retrieve the address of the struct device backing the GPIO device. Currently they needlessly poke in the internals of GPIOLIB. Add a dedicated getter function. Signed-off-by: Bartosz Golaszewski Reviewed-by: Peter Rosin Reviewed-by: Andy Shevchenko Acked-by: Linus Walleij --- drivers/gpio/gpiolib.c | 17 +++++++++++++++++ include/linux/gpio/driver.h | 2 ++ 2 files changed, 19 insertions(+) (limited to 'include/linux') diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index dd18dd56c33c..7e297ae35f8c 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -1189,6 +1189,23 @@ void gpio_device_put(struct gpio_device *gdev) } EXPORT_SYMBOL_GPL(gpio_device_put); +/** + * gpio_device_to_device() - Retrieve the address of the underlying struct + * device. + * @gdev: GPIO device for which to return the address. + * + * This does not increase the reference count of the GPIO device nor the + * underlying struct device. + * + * Returns: + * Address of struct device backing this GPIO device. + */ +struct device *gpio_device_to_device(struct gpio_device *gdev) +{ + return &gdev->dev; +} +EXPORT_SYMBOL_GPL(gpio_device_to_device); + #ifdef CONFIG_GPIOLIB_IRQCHIP /* diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index ae4162d3f1d3..0e40098aa283 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -619,6 +619,8 @@ void gpio_device_put(struct gpio_device *gdev); DEFINE_FREE(gpio_device_put, struct gpio_device *, if (IS_ERR_OR_NULL(_T)) gpio_device_put(_T)); +struct device *gpio_device_to_device(struct gpio_device *gdev); + bool gpiochip_line_is_irq(struct gpio_chip *gc, unsigned int offset); int gpiochip_reqres_irq(struct gpio_chip *gc, unsigned int offset); void gpiochip_relres_irq(struct gpio_chip *gc, unsigned int offset); -- cgit v1.2.3 From 370232d096e3fe188e4596f77bc6560636bd40c1 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Wed, 11 Oct 2023 15:02:03 +0200 Subject: gpiolib: provide gpiod_to_gpio_device() Accessing struct gpio_chip backing a GPIO device is only allowed for the actual providers of that chip. Similarly to how we introduced gpio_device_find() in order to replace the abused gpiochip_find(), let's introduce a counterpart to gpiod_to_chip() that returns a reference to the GPIO device owning the descriptor. This is done in order to later remove gpiod_to_chip() entirely. Signed-off-by: Bartosz Golaszewski Reviewed-by: Peter Rosin Acked-by: Linus Walleij --- drivers/gpio/gpiolib.c | 21 +++++++++++++++++++++ include/linux/gpio/driver.h | 1 + 2 files changed, 22 insertions(+) (limited to 'include/linux') diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index 7e297ae35f8c..9febaef6767d 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -220,6 +220,27 @@ struct gpio_chip *gpiod_to_chip(const struct gpio_desc *desc) } EXPORT_SYMBOL_GPL(gpiod_to_chip); +/** + * gpiod_to_gpio_device() - Return the GPIO device to which this descriptor + * belongs. + * @desc: Descriptor for which to return the GPIO device. + * + * This *DOES NOT* increase the reference count of the GPIO device as it's + * expected that the descriptor is requested and the users already holds a + * reference to the device. + * + * Returns: + * Address of the GPIO device owning this descriptor. + */ +struct gpio_device *gpiod_to_gpio_device(struct gpio_desc *desc) +{ + if (!desc) + return NULL; + + return desc->gdev; +} +EXPORT_SYMBOL_GPL(gpiod_to_gpio_device); + /** * gpio_device_get_chip() - Get the gpio_chip implementation of this GPIO device * @gdev: GPIO device diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index 0e40098aa283..d231c4f31cb4 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -785,6 +785,7 @@ int gpiochip_lock_as_irq(struct gpio_chip *gc, unsigned int offset); void gpiochip_unlock_as_irq(struct gpio_chip *gc, unsigned int offset); struct gpio_chip *gpiod_to_chip(const struct gpio_desc *desc); +struct gpio_device *gpiod_to_gpio_device(struct gpio_desc *desc); #else /* CONFIG_GPIOLIB */ -- cgit v1.2.3 From 8c85a102fc4e5c0c942c10677fa43f7a19baa92f Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Tue, 5 Sep 2023 20:52:55 +0200 Subject: gpiolib: provide gpio_device_get_base() Let's start adding getters for the opaque struct gpio_device. Start with a function allowing to retrieve the base GPIO number. Signed-off-by: Bartosz Golaszewski Acked-by: Linus Walleij --- drivers/gpio/gpiolib.c | 13 +++++++++++++ include/linux/gpio/driver.h | 3 +++ 2 files changed, 16 insertions(+) (limited to 'include/linux') diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index 9febaef6767d..cbafcd95243e 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -241,6 +241,19 @@ struct gpio_device *gpiod_to_gpio_device(struct gpio_desc *desc) } EXPORT_SYMBOL_GPL(gpiod_to_gpio_device); +/** + * gpio_device_get_base() - Get the base GPIO number allocated by this device + * @gdev: GPIO device + * + * Returns: + * First GPIO number in the global GPIO numberspace for this device. + */ +int gpio_device_get_base(struct gpio_device *gdev) +{ + return gdev->base; +} +EXPORT_SYMBOL_GPL(gpio_device_get_base); + /** * gpio_device_get_chip() - Get the gpio_chip implementation of this GPIO device * @gdev: GPIO device diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index d231c4f31cb4..1d454dc944b3 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -787,6 +787,9 @@ void gpiochip_unlock_as_irq(struct gpio_chip *gc, unsigned int offset); struct gpio_chip *gpiod_to_chip(const struct gpio_desc *desc); struct gpio_device *gpiod_to_gpio_device(struct gpio_desc *desc); +/* struct gpio_device getters */ +int gpio_device_get_base(struct gpio_device *gdev); + #else /* CONFIG_GPIOLIB */ #include -- cgit v1.2.3 From 384461abcab6602abc06c2dfb8fb99beeeaa12b0 Mon Sep 17 00:00:00 2001 From: Uwe Kleine-König Date: Fri, 4 Aug 2023 16:27:06 +0200 Subject: pwm: Manage owner assignment implicitly for drivers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instead of requiring each driver to care for assigning the owner member of struct pwm_ops, handle that implicitly using a macro. Note that the owner member has to be moved to struct pwm_chip, as the ops structure usually lives in read-only memory and so cannot be modified. The upside is that new low level drivers cannot forget the assignment and save one line each. The pwm-crc driver didn't assign .owner, that's not a problem in practice though as the driver cannot be compiled as a module. Acked-by: Andy Shevchenko # Intel LPSS Reviewed-by: Florian Fainelli # pwm-{bcm,brcm}*.c Acked-by: Jernej Skrabec # sun4i Acked-by: Andi Shyti Acked-by: Nobuhiro Iwamatsu # pwm-visconti Acked-by: Heiko Stuebner # pwm-rockchip Acked-by: Michael Walle # pwm-sl28cpld Acked-by: Neil Armstrong # pwm-meson Reviewed-by: Linus Walleij Link: https://lore.kernel.org/r/20230804142707.412137-2-u.kleine-koenig@pengutronix.de Signed-off-by: Uwe Kleine-König Signed-off-by: Thierry Reding --- drivers/gpio/gpio-mvebu.c | 1 - drivers/gpu/drm/bridge/ti-sn65dsi86.c | 1 - drivers/leds/rgb/leds-qcom-lpg.c | 1 - drivers/pwm/core.c | 24 ++++++++++++++---------- drivers/pwm/pwm-ab8500.c | 1 - drivers/pwm/pwm-apple.c | 1 - drivers/pwm/pwm-atmel-hlcdc.c | 1 - drivers/pwm/pwm-atmel-tcb.c | 1 - drivers/pwm/pwm-atmel.c | 1 - drivers/pwm/pwm-bcm-iproc.c | 1 - drivers/pwm/pwm-bcm-kona.c | 1 - drivers/pwm/pwm-bcm2835.c | 1 - drivers/pwm/pwm-berlin.c | 1 - drivers/pwm/pwm-brcmstb.c | 1 - drivers/pwm/pwm-clk.c | 1 - drivers/pwm/pwm-clps711x.c | 1 - drivers/pwm/pwm-cros-ec.c | 1 - drivers/pwm/pwm-dwc.c | 1 - drivers/pwm/pwm-ep93xx.c | 1 - drivers/pwm/pwm-fsl-ftm.c | 1 - drivers/pwm/pwm-hibvt.c | 1 - drivers/pwm/pwm-img.c | 1 - drivers/pwm/pwm-imx-tpm.c | 1 - drivers/pwm/pwm-imx1.c | 1 - drivers/pwm/pwm-imx27.c | 1 - drivers/pwm/pwm-intel-lgm.c | 1 - drivers/pwm/pwm-iqs620a.c | 1 - drivers/pwm/pwm-jz4740.c | 1 - drivers/pwm/pwm-keembay.c | 1 - drivers/pwm/pwm-lp3943.c | 1 - drivers/pwm/pwm-lpc18xx-sct.c | 1 - drivers/pwm/pwm-lpc32xx.c | 1 - drivers/pwm/pwm-lpss.c | 1 - drivers/pwm/pwm-mediatek.c | 1 - drivers/pwm/pwm-meson.c | 1 - drivers/pwm/pwm-microchip-core.c | 1 - drivers/pwm/pwm-mtk-disp.c | 1 - drivers/pwm/pwm-mxs.c | 1 - drivers/pwm/pwm-ntxec.c | 1 - drivers/pwm/pwm-omap-dmtimer.c | 1 - drivers/pwm/pwm-pca9685.c | 1 - drivers/pwm/pwm-pxa.c | 1 - drivers/pwm/pwm-raspberrypi-poe.c | 1 - drivers/pwm/pwm-rcar.c | 1 - drivers/pwm/pwm-renesas-tpu.c | 1 - drivers/pwm/pwm-rockchip.c | 1 - drivers/pwm/pwm-rz-mtu3.c | 1 - drivers/pwm/pwm-samsung.c | 1 - drivers/pwm/pwm-sifive.c | 1 - drivers/pwm/pwm-sl28cpld.c | 1 - drivers/pwm/pwm-spear.c | 1 - drivers/pwm/pwm-sprd.c | 1 - drivers/pwm/pwm-sti.c | 1 - drivers/pwm/pwm-stm32-lp.c | 1 - drivers/pwm/pwm-stm32.c | 1 - drivers/pwm/pwm-stmpe.c | 1 - drivers/pwm/pwm-sun4i.c | 1 - drivers/pwm/pwm-sunplus.c | 1 - drivers/pwm/pwm-tegra.c | 1 - drivers/pwm/pwm-tiecap.c | 1 - drivers/pwm/pwm-tiehrpwm.c | 1 - drivers/pwm/pwm-twl-led.c | 2 -- drivers/pwm/pwm-twl.c | 2 -- drivers/pwm/pwm-visconti.c | 1 - drivers/pwm/pwm-vt8500.c | 1 - drivers/pwm/pwm-xilinx.c | 1 - drivers/staging/greybus/pwm.c | 1 - include/linux/pwm.h | 10 ++++++---- 68 files changed, 20 insertions(+), 82 deletions(-) (limited to 'include/linux') diff --git a/drivers/gpio/gpio-mvebu.c b/drivers/gpio/gpio-mvebu.c index 67497116ce27..64acd497a341 100644 --- a/drivers/gpio/gpio-mvebu.c +++ b/drivers/gpio/gpio-mvebu.c @@ -756,7 +756,6 @@ static const struct pwm_ops mvebu_pwm_ops = { .free = mvebu_pwm_free, .get_state = mvebu_pwm_get_state, .apply = mvebu_pwm_apply, - .owner = THIS_MODULE, }; static void __maybe_unused mvebu_pwm_suspend(struct mvebu_gpio_chip *mvchip) diff --git a/drivers/gpu/drm/bridge/ti-sn65dsi86.c b/drivers/gpu/drm/bridge/ti-sn65dsi86.c index f448b903e190..08e8c4f333e3 100644 --- a/drivers/gpu/drm/bridge/ti-sn65dsi86.c +++ b/drivers/gpu/drm/bridge/ti-sn65dsi86.c @@ -1580,7 +1580,6 @@ static const struct pwm_ops ti_sn_pwm_ops = { .free = ti_sn_pwm_free, .apply = ti_sn_pwm_apply, .get_state = ti_sn_pwm_get_state, - .owner = THIS_MODULE, }; static int ti_sn_pwm_probe(struct auxiliary_device *adev, diff --git a/drivers/leds/rgb/leds-qcom-lpg.c b/drivers/leds/rgb/leds-qcom-lpg.c index df469aaa7e6e..c8525f59748c 100644 --- a/drivers/leds/rgb/leds-qcom-lpg.c +++ b/drivers/leds/rgb/leds-qcom-lpg.c @@ -1085,7 +1085,6 @@ static const struct pwm_ops lpg_pwm_ops = { .request = lpg_pwm_request, .apply = lpg_pwm_apply, .get_state = lpg_pwm_get_state, - .owner = THIS_MODULE, }; static int lpg_add_pwm(struct lpg *lpg) diff --git a/drivers/pwm/core.c b/drivers/pwm/core.c index dc66e3405bf5..a2824eb4e236 100644 --- a/drivers/pwm/core.c +++ b/drivers/pwm/core.c @@ -89,13 +89,13 @@ static int pwm_device_request(struct pwm_device *pwm, const char *label) if (test_bit(PWMF_REQUESTED, &pwm->flags)) return -EBUSY; - if (!try_module_get(pwm->chip->ops->owner)) + if (!try_module_get(pwm->chip->owner)) return -ENODEV; if (pwm->chip->ops->request) { err = pwm->chip->ops->request(pwm->chip, pwm); if (err) { - module_put(pwm->chip->ops->owner); + module_put(pwm->chip->owner); return err; } } @@ -253,14 +253,16 @@ static bool pwm_ops_check(const struct pwm_chip *chip) } /** - * pwmchip_add() - register a new PWM chip + * __pwmchip_add() - register a new PWM chip * @chip: the PWM chip to add + * @owner: reference to the module providing the chip. * - * Register a new PWM chip. + * Register a new PWM chip. @owner is supposed to be THIS_MODULE, use the + * pwmchip_add wrapper to do this right. * * Returns: 0 on success or a negative error code on failure. */ -int pwmchip_add(struct pwm_chip *chip) +int __pwmchip_add(struct pwm_chip *chip, struct module *owner) { struct pwm_device *pwm; unsigned int i; @@ -272,6 +274,8 @@ int pwmchip_add(struct pwm_chip *chip) if (!pwm_ops_check(chip)) return -EINVAL; + chip->owner = owner; + chip->pwms = kcalloc(chip->npwm, sizeof(*pwm), GFP_KERNEL); if (!chip->pwms) return -ENOMEM; @@ -306,7 +310,7 @@ int pwmchip_add(struct pwm_chip *chip) return 0; } -EXPORT_SYMBOL_GPL(pwmchip_add); +EXPORT_SYMBOL_GPL(__pwmchip_add); /** * pwmchip_remove() - remove a PWM chip @@ -338,17 +342,17 @@ static void devm_pwmchip_remove(void *data) pwmchip_remove(chip); } -int devm_pwmchip_add(struct device *dev, struct pwm_chip *chip) +int __devm_pwmchip_add(struct device *dev, struct pwm_chip *chip, struct module *owner) { int ret; - ret = pwmchip_add(chip); + ret = __pwmchip_add(chip, owner); if (ret) return ret; return devm_add_action_or_reset(dev, devm_pwmchip_remove, chip); } -EXPORT_SYMBOL_GPL(devm_pwmchip_add); +EXPORT_SYMBOL_GPL(__devm_pwmchip_add); /** * pwm_request_from_chip() - request a PWM device relative to a PWM chip @@ -979,7 +983,7 @@ void pwm_put(struct pwm_device *pwm) pwm_set_chip_data(pwm, NULL); pwm->label = NULL; - module_put(pwm->chip->ops->owner); + module_put(pwm->chip->owner); out: mutex_unlock(&pwm_lock); } diff --git a/drivers/pwm/pwm-ab8500.c b/drivers/pwm/pwm-ab8500.c index 583a7d69c741..670d33daea84 100644 --- a/drivers/pwm/pwm-ab8500.c +++ b/drivers/pwm/pwm-ab8500.c @@ -181,7 +181,6 @@ static int ab8500_pwm_get_state(struct pwm_chip *chip, struct pwm_device *pwm, static const struct pwm_ops ab8500_pwm_ops = { .apply = ab8500_pwm_apply, .get_state = ab8500_pwm_get_state, - .owner = THIS_MODULE, }; static int ab8500_pwm_probe(struct platform_device *pdev) diff --git a/drivers/pwm/pwm-apple.c b/drivers/pwm/pwm-apple.c index 8e7d67fb5fbe..4d755b628d9e 100644 --- a/drivers/pwm/pwm-apple.c +++ b/drivers/pwm/pwm-apple.c @@ -99,7 +99,6 @@ static int apple_pwm_get_state(struct pwm_chip *chip, struct pwm_device *pwm, static const struct pwm_ops apple_pwm_ops = { .apply = apple_pwm_apply, .get_state = apple_pwm_get_state, - .owner = THIS_MODULE, }; static int apple_pwm_probe(struct platform_device *pdev) diff --git a/drivers/pwm/pwm-atmel-hlcdc.c b/drivers/pwm/pwm-atmel-hlcdc.c index e271d920151e..07920e034757 100644 --- a/drivers/pwm/pwm-atmel-hlcdc.c +++ b/drivers/pwm/pwm-atmel-hlcdc.c @@ -170,7 +170,6 @@ static int atmel_hlcdc_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm, static const struct pwm_ops atmel_hlcdc_pwm_ops = { .apply = atmel_hlcdc_pwm_apply, - .owner = THIS_MODULE, }; static const struct atmel_hlcdc_pwm_errata atmel_hlcdc_pwm_at91sam9x5_errata = { diff --git a/drivers/pwm/pwm-atmel-tcb.c b/drivers/pwm/pwm-atmel-tcb.c index c00dd37c5fbd..98b33c016c3c 100644 --- a/drivers/pwm/pwm-atmel-tcb.c +++ b/drivers/pwm/pwm-atmel-tcb.c @@ -364,7 +364,6 @@ static const struct pwm_ops atmel_tcb_pwm_ops = { .request = atmel_tcb_pwm_request, .free = atmel_tcb_pwm_free, .apply = atmel_tcb_pwm_apply, - .owner = THIS_MODULE, }; static struct atmel_tcb_config tcb_rm9200_config = { diff --git a/drivers/pwm/pwm-atmel.c b/drivers/pwm/pwm-atmel.c index 35ae876bf2ea..47bcc8a3bf9d 100644 --- a/drivers/pwm/pwm-atmel.c +++ b/drivers/pwm/pwm-atmel.c @@ -402,7 +402,6 @@ static int atmel_pwm_get_state(struct pwm_chip *chip, struct pwm_device *pwm, static const struct pwm_ops atmel_pwm_ops = { .apply = atmel_pwm_apply, .get_state = atmel_pwm_get_state, - .owner = THIS_MODULE, }; static const struct atmel_pwm_data atmel_sam9rl_pwm_data = { diff --git a/drivers/pwm/pwm-bcm-iproc.c b/drivers/pwm/pwm-bcm-iproc.c index 7d70b6f186a6..1da902440df2 100644 --- a/drivers/pwm/pwm-bcm-iproc.c +++ b/drivers/pwm/pwm-bcm-iproc.c @@ -183,7 +183,6 @@ static int iproc_pwmc_apply(struct pwm_chip *chip, struct pwm_device *pwm, static const struct pwm_ops iproc_pwm_ops = { .apply = iproc_pwmc_apply, .get_state = iproc_pwmc_get_state, - .owner = THIS_MODULE, }; static int iproc_pwmc_probe(struct platform_device *pdev) diff --git a/drivers/pwm/pwm-bcm-kona.c b/drivers/pwm/pwm-bcm-kona.c index e5b00cc9f7a7..15d6ed03c3ce 100644 --- a/drivers/pwm/pwm-bcm-kona.c +++ b/drivers/pwm/pwm-bcm-kona.c @@ -269,7 +269,6 @@ static int kona_pwmc_apply(struct pwm_chip *chip, struct pwm_device *pwm, static const struct pwm_ops kona_pwm_ops = { .apply = kona_pwmc_apply, - .owner = THIS_MODULE, }; static int kona_pwmc_probe(struct platform_device *pdev) diff --git a/drivers/pwm/pwm-bcm2835.c b/drivers/pwm/pwm-bcm2835.c index bdfc2a5ec0d6..af318a35d510 100644 --- a/drivers/pwm/pwm-bcm2835.c +++ b/drivers/pwm/pwm-bcm2835.c @@ -129,7 +129,6 @@ static const struct pwm_ops bcm2835_pwm_ops = { .request = bcm2835_pwm_request, .free = bcm2835_pwm_free, .apply = bcm2835_pwm_apply, - .owner = THIS_MODULE, }; static int bcm2835_pwm_probe(struct platform_device *pdev) diff --git a/drivers/pwm/pwm-berlin.c b/drivers/pwm/pwm-berlin.c index 0971c666afd1..6a6641fc4fa1 100644 --- a/drivers/pwm/pwm-berlin.c +++ b/drivers/pwm/pwm-berlin.c @@ -205,7 +205,6 @@ static const struct pwm_ops berlin_pwm_ops = { .request = berlin_pwm_request, .free = berlin_pwm_free, .apply = berlin_pwm_apply, - .owner = THIS_MODULE, }; static const struct of_device_id berlin_pwm_match[] = { diff --git a/drivers/pwm/pwm-brcmstb.c b/drivers/pwm/pwm-brcmstb.c index a3faa9a3de7c..96967f8a8dad 100644 --- a/drivers/pwm/pwm-brcmstb.c +++ b/drivers/pwm/pwm-brcmstb.c @@ -220,7 +220,6 @@ static int brcmstb_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm, static const struct pwm_ops brcmstb_pwm_ops = { .apply = brcmstb_pwm_apply, - .owner = THIS_MODULE, }; static const struct of_device_id brcmstb_pwm_of_match[] = { diff --git a/drivers/pwm/pwm-clk.c b/drivers/pwm/pwm-clk.c index 0ee4d2aee4df..9dd88b386907 100644 --- a/drivers/pwm/pwm-clk.c +++ b/drivers/pwm/pwm-clk.c @@ -77,7 +77,6 @@ static int pwm_clk_apply(struct pwm_chip *chip, struct pwm_device *pwm, static const struct pwm_ops pwm_clk_ops = { .apply = pwm_clk_apply, - .owner = THIS_MODULE, }; static int pwm_clk_probe(struct platform_device *pdev) diff --git a/drivers/pwm/pwm-clps711x.c b/drivers/pwm/pwm-clps711x.c index b0d91142da8d..42179b3f7ec3 100644 --- a/drivers/pwm/pwm-clps711x.c +++ b/drivers/pwm/pwm-clps711x.c @@ -72,7 +72,6 @@ static int clps711x_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm, static const struct pwm_ops clps711x_pwm_ops = { .request = clps711x_pwm_request, .apply = clps711x_pwm_apply, - .owner = THIS_MODULE, }; static struct pwm_device *clps711x_pwm_xlate(struct pwm_chip *chip, diff --git a/drivers/pwm/pwm-cros-ec.c b/drivers/pwm/pwm-cros-ec.c index baaac0c33aa0..500504c7d459 100644 --- a/drivers/pwm/pwm-cros-ec.c +++ b/drivers/pwm/pwm-cros-ec.c @@ -241,7 +241,6 @@ static const struct pwm_ops cros_ec_pwm_ops = { .free = cros_ec_pwm_free, .get_state = cros_ec_pwm_get_state, .apply = cros_ec_pwm_apply, - .owner = THIS_MODULE, }; /* diff --git a/drivers/pwm/pwm-dwc.c b/drivers/pwm/pwm-dwc.c index 3bbb26c862c3..53fe00ccd47e 100644 --- a/drivers/pwm/pwm-dwc.c +++ b/drivers/pwm/pwm-dwc.c @@ -195,7 +195,6 @@ static int dwc_pwm_get_state(struct pwm_chip *chip, struct pwm_device *pwm, static const struct pwm_ops dwc_pwm_ops = { .apply = dwc_pwm_apply, .get_state = dwc_pwm_get_state, - .owner = THIS_MODULE, }; static struct dwc_pwm *dwc_pwm_alloc(struct device *dev) diff --git a/drivers/pwm/pwm-ep93xx.c b/drivers/pwm/pwm-ep93xx.c index c45a75e65c86..51e072572a87 100644 --- a/drivers/pwm/pwm-ep93xx.c +++ b/drivers/pwm/pwm-ep93xx.c @@ -159,7 +159,6 @@ static const struct pwm_ops ep93xx_pwm_ops = { .request = ep93xx_pwm_request, .free = ep93xx_pwm_free, .apply = ep93xx_pwm_apply, - .owner = THIS_MODULE, }; static int ep93xx_pwm_probe(struct platform_device *pdev) diff --git a/drivers/pwm/pwm-fsl-ftm.c b/drivers/pwm/pwm-fsl-ftm.c index b7c6045c5d08..d1b6d1aa4773 100644 --- a/drivers/pwm/pwm-fsl-ftm.c +++ b/drivers/pwm/pwm-fsl-ftm.c @@ -350,7 +350,6 @@ static const struct pwm_ops fsl_pwm_ops = { .request = fsl_pwm_request, .free = fsl_pwm_free, .apply = fsl_pwm_apply, - .owner = THIS_MODULE, }; static int fsl_pwm_init(struct fsl_pwm_chip *fpc) diff --git a/drivers/pwm/pwm-hibvt.c b/drivers/pwm/pwm-hibvt.c index f7ba6fe9a349..c435776e2f78 100644 --- a/drivers/pwm/pwm-hibvt.c +++ b/drivers/pwm/pwm-hibvt.c @@ -185,7 +185,6 @@ static const struct pwm_ops hibvt_pwm_ops = { .get_state = hibvt_pwm_get_state, .apply = hibvt_pwm_apply, - .owner = THIS_MODULE, }; static int hibvt_pwm_probe(struct platform_device *pdev) diff --git a/drivers/pwm/pwm-img.c b/drivers/pwm/pwm-img.c index 326af85888e7..116fa060e302 100644 --- a/drivers/pwm/pwm-img.c +++ b/drivers/pwm/pwm-img.c @@ -208,7 +208,6 @@ static int img_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm, static const struct pwm_ops img_pwm_ops = { .apply = img_pwm_apply, - .owner = THIS_MODULE, }; static const struct img_pwm_soc_data pistachio_pwm = { diff --git a/drivers/pwm/pwm-imx-tpm.c b/drivers/pwm/pwm-imx-tpm.c index 98ab65c89685..fe8bf598d388 100644 --- a/drivers/pwm/pwm-imx-tpm.c +++ b/drivers/pwm/pwm-imx-tpm.c @@ -332,7 +332,6 @@ static const struct pwm_ops imx_tpm_pwm_ops = { .free = pwm_imx_tpm_free, .get_state = pwm_imx_tpm_get_state, .apply = pwm_imx_tpm_apply, - .owner = THIS_MODULE, }; static int pwm_imx_tpm_probe(struct platform_device *pdev) diff --git a/drivers/pwm/pwm-imx1.c b/drivers/pwm/pwm-imx1.c index 0651983bed19..d175d895f22a 100644 --- a/drivers/pwm/pwm-imx1.c +++ b/drivers/pwm/pwm-imx1.c @@ -146,7 +146,6 @@ static int pwm_imx1_apply(struct pwm_chip *chip, struct pwm_device *pwm, static const struct pwm_ops pwm_imx1_ops = { .apply = pwm_imx1_apply, - .owner = THIS_MODULE, }; static const struct of_device_id pwm_imx1_dt_ids[] = { diff --git a/drivers/pwm/pwm-imx27.c b/drivers/pwm/pwm-imx27.c index 29a3089c534c..7d9bc43f12b0 100644 --- a/drivers/pwm/pwm-imx27.c +++ b/drivers/pwm/pwm-imx27.c @@ -296,7 +296,6 @@ static int pwm_imx27_apply(struct pwm_chip *chip, struct pwm_device *pwm, static const struct pwm_ops pwm_imx27_ops = { .apply = pwm_imx27_apply, .get_state = pwm_imx27_get_state, - .owner = THIS_MODULE, }; static const struct of_device_id pwm_imx27_dt_ids[] = { diff --git a/drivers/pwm/pwm-intel-lgm.c b/drivers/pwm/pwm-intel-lgm.c index 0cd7dd548e82..54ecae7f937e 100644 --- a/drivers/pwm/pwm-intel-lgm.c +++ b/drivers/pwm/pwm-intel-lgm.c @@ -107,7 +107,6 @@ static int lgm_pwm_get_state(struct pwm_chip *chip, struct pwm_device *pwm, static const struct pwm_ops lgm_pwm_ops = { .get_state = lgm_pwm_get_state, .apply = lgm_pwm_apply, - .owner = THIS_MODULE, }; static void lgm_pwm_init(struct lgm_pwm_chip *pc) diff --git a/drivers/pwm/pwm-iqs620a.c b/drivers/pwm/pwm-iqs620a.c index 47b3141135f3..378ab036edfe 100644 --- a/drivers/pwm/pwm-iqs620a.c +++ b/drivers/pwm/pwm-iqs620a.c @@ -166,7 +166,6 @@ static int iqs620_pwm_notifier(struct notifier_block *notifier, static const struct pwm_ops iqs620_pwm_ops = { .apply = iqs620_pwm_apply, .get_state = iqs620_pwm_get_state, - .owner = THIS_MODULE, }; static void iqs620_pwm_notifier_unregister(void *context) diff --git a/drivers/pwm/pwm-jz4740.c b/drivers/pwm/pwm-jz4740.c index 6b2124266273..49a6e28d1d98 100644 --- a/drivers/pwm/pwm-jz4740.c +++ b/drivers/pwm/pwm-jz4740.c @@ -216,7 +216,6 @@ static const struct pwm_ops jz4740_pwm_ops = { .request = jz4740_pwm_request, .free = jz4740_pwm_free, .apply = jz4740_pwm_apply, - .owner = THIS_MODULE, }; static int jz4740_pwm_probe(struct platform_device *pdev) diff --git a/drivers/pwm/pwm-keembay.c b/drivers/pwm/pwm-keembay.c index ac02d8bb4a0b..ac824ecc3f64 100644 --- a/drivers/pwm/pwm-keembay.c +++ b/drivers/pwm/pwm-keembay.c @@ -178,7 +178,6 @@ static int keembay_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm, } static const struct pwm_ops keembay_pwm_ops = { - .owner = THIS_MODULE, .apply = keembay_pwm_apply, .get_state = keembay_pwm_get_state, }; diff --git a/drivers/pwm/pwm-lp3943.c b/drivers/pwm/pwm-lp3943.c index 4b133a17f4be..a1a106747372 100644 --- a/drivers/pwm/pwm-lp3943.c +++ b/drivers/pwm/pwm-lp3943.c @@ -216,7 +216,6 @@ static const struct pwm_ops lp3943_pwm_ops = { .request = lp3943_pwm_request, .free = lp3943_pwm_free, .apply = lp3943_pwm_apply, - .owner = THIS_MODULE, }; static int lp3943_pwm_parse_dt(struct device *dev, diff --git a/drivers/pwm/pwm-lpc18xx-sct.c b/drivers/pwm/pwm-lpc18xx-sct.c index 7a19a840bca5..ef7d0da137ed 100644 --- a/drivers/pwm/pwm-lpc18xx-sct.c +++ b/drivers/pwm/pwm-lpc18xx-sct.c @@ -341,7 +341,6 @@ static const struct pwm_ops lpc18xx_pwm_ops = { .apply = lpc18xx_pwm_apply, .request = lpc18xx_pwm_request, .free = lpc18xx_pwm_free, - .owner = THIS_MODULE, }; static const struct of_device_id lpc18xx_pwm_of_match[] = { diff --git a/drivers/pwm/pwm-lpc32xx.c b/drivers/pwm/pwm-lpc32xx.c index 806f0bb3ad6d..78f664e41e6e 100644 --- a/drivers/pwm/pwm-lpc32xx.c +++ b/drivers/pwm/pwm-lpc32xx.c @@ -115,7 +115,6 @@ static int lpc32xx_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm, static const struct pwm_ops lpc32xx_pwm_ops = { .apply = lpc32xx_pwm_apply, - .owner = THIS_MODULE, }; static int lpc32xx_pwm_probe(struct platform_device *pdev) diff --git a/drivers/pwm/pwm-lpss.c b/drivers/pwm/pwm-lpss.c index 23fe332b2394..a6ea3ce7e019 100644 --- a/drivers/pwm/pwm-lpss.c +++ b/drivers/pwm/pwm-lpss.c @@ -243,7 +243,6 @@ static int pwm_lpss_get_state(struct pwm_chip *chip, struct pwm_device *pwm, static const struct pwm_ops pwm_lpss_ops = { .apply = pwm_lpss_apply, .get_state = pwm_lpss_get_state, - .owner = THIS_MODULE, }; struct pwm_lpss_chip *devm_pwm_lpss_probe(struct device *dev, void __iomem *base, diff --git a/drivers/pwm/pwm-mediatek.c b/drivers/pwm/pwm-mediatek.c index 6adb0ed01906..373abfd25acb 100644 --- a/drivers/pwm/pwm-mediatek.c +++ b/drivers/pwm/pwm-mediatek.c @@ -229,7 +229,6 @@ static int pwm_mediatek_apply(struct pwm_chip *chip, struct pwm_device *pwm, static const struct pwm_ops pwm_mediatek_ops = { .apply = pwm_mediatek_apply, - .owner = THIS_MODULE, }; static int pwm_mediatek_probe(struct platform_device *pdev) diff --git a/drivers/pwm/pwm-meson.c b/drivers/pwm/pwm-meson.c index 25519cddc2a9..5bea53243ed2 100644 --- a/drivers/pwm/pwm-meson.c +++ b/drivers/pwm/pwm-meson.c @@ -335,7 +335,6 @@ static const struct pwm_ops meson_pwm_ops = { .free = meson_pwm_free, .apply = meson_pwm_apply, .get_state = meson_pwm_get_state, - .owner = THIS_MODULE, }; static const char * const pwm_meson8b_parent_names[] = { diff --git a/drivers/pwm/pwm-microchip-core.c b/drivers/pwm/pwm-microchip-core.c index e7525c98105e..c0c53968f3e9 100644 --- a/drivers/pwm/pwm-microchip-core.c +++ b/drivers/pwm/pwm-microchip-core.c @@ -435,7 +435,6 @@ static int mchp_core_pwm_get_state(struct pwm_chip *chip, struct pwm_device *pwm static const struct pwm_ops mchp_core_pwm_ops = { .apply = mchp_core_pwm_apply, .get_state = mchp_core_pwm_get_state, - .owner = THIS_MODULE, }; static const struct of_device_id mchp_core_of_match[] = { diff --git a/drivers/pwm/pwm-mtk-disp.c b/drivers/pwm/pwm-mtk-disp.c index a83bd6e18b07..7748e3eaa818 100644 --- a/drivers/pwm/pwm-mtk-disp.c +++ b/drivers/pwm/pwm-mtk-disp.c @@ -227,7 +227,6 @@ static int mtk_disp_pwm_get_state(struct pwm_chip *chip, static const struct pwm_ops mtk_disp_pwm_ops = { .apply = mtk_disp_pwm_apply, .get_state = mtk_disp_pwm_get_state, - .owner = THIS_MODULE, }; static int mtk_disp_pwm_probe(struct platform_device *pdev) diff --git a/drivers/pwm/pwm-mxs.c b/drivers/pwm/pwm-mxs.c index 766dbc58dad8..1b5e787d78f1 100644 --- a/drivers/pwm/pwm-mxs.c +++ b/drivers/pwm/pwm-mxs.c @@ -115,7 +115,6 @@ static int mxs_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm, static const struct pwm_ops mxs_pwm_ops = { .apply = mxs_pwm_apply, - .owner = THIS_MODULE, }; static int mxs_pwm_probe(struct platform_device *pdev) diff --git a/drivers/pwm/pwm-ntxec.c b/drivers/pwm/pwm-ntxec.c index 7514ea384ec5..78606039eda2 100644 --- a/drivers/pwm/pwm-ntxec.c +++ b/drivers/pwm/pwm-ntxec.c @@ -126,7 +126,6 @@ static int ntxec_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm_dev, } static const struct pwm_ops ntxec_pwm_ops = { - .owner = THIS_MODULE, .apply = ntxec_pwm_apply, /* * No .get_state callback, because the current state cannot be read diff --git a/drivers/pwm/pwm-omap-dmtimer.c b/drivers/pwm/pwm-omap-dmtimer.c index 94faa4650686..13161e08dd6e 100644 --- a/drivers/pwm/pwm-omap-dmtimer.c +++ b/drivers/pwm/pwm-omap-dmtimer.c @@ -311,7 +311,6 @@ unlock_mutex: static const struct pwm_ops pwm_omap_dmtimer_ops = { .apply = pwm_omap_dmtimer_apply, - .owner = THIS_MODULE, }; static int pwm_omap_dmtimer_probe(struct platform_device *pdev) diff --git a/drivers/pwm/pwm-pca9685.c b/drivers/pwm/pwm-pca9685.c index 3038a68412a7..e79b1de8c4d8 100644 --- a/drivers/pwm/pwm-pca9685.c +++ b/drivers/pwm/pwm-pca9685.c @@ -505,7 +505,6 @@ static const struct pwm_ops pca9685_pwm_ops = { .get_state = pca9685_pwm_get_state, .request = pca9685_pwm_request, .free = pca9685_pwm_free, - .owner = THIS_MODULE, }; static const struct regmap_config pca9685_regmap_i2c_config = { diff --git a/drivers/pwm/pwm-pxa.c b/drivers/pwm/pwm-pxa.c index 1e475ed10180..faf6b35b9336 100644 --- a/drivers/pwm/pwm-pxa.c +++ b/drivers/pwm/pwm-pxa.c @@ -135,7 +135,6 @@ static int pxa_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm, static const struct pwm_ops pxa_pwm_ops = { .apply = pxa_pwm_apply, - .owner = THIS_MODULE, }; #ifdef CONFIG_OF diff --git a/drivers/pwm/pwm-raspberrypi-poe.c b/drivers/pwm/pwm-raspberrypi-poe.c index 2939b71a7ba7..1ad814fdec6b 100644 --- a/drivers/pwm/pwm-raspberrypi-poe.c +++ b/drivers/pwm/pwm-raspberrypi-poe.c @@ -135,7 +135,6 @@ static int raspberrypi_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm, static const struct pwm_ops raspberrypi_pwm_ops = { .get_state = raspberrypi_pwm_get_state, .apply = raspberrypi_pwm_apply, - .owner = THIS_MODULE, }; static int raspberrypi_pwm_probe(struct platform_device *pdev) diff --git a/drivers/pwm/pwm-rcar.c b/drivers/pwm/pwm-rcar.c index 5b5f357c44de..13269f55fccf 100644 --- a/drivers/pwm/pwm-rcar.c +++ b/drivers/pwm/pwm-rcar.c @@ -198,7 +198,6 @@ static const struct pwm_ops rcar_pwm_ops = { .request = rcar_pwm_request, .free = rcar_pwm_free, .apply = rcar_pwm_apply, - .owner = THIS_MODULE, }; static int rcar_pwm_probe(struct platform_device *pdev) diff --git a/drivers/pwm/pwm-renesas-tpu.c b/drivers/pwm/pwm-renesas-tpu.c index d7311614c846..1b004e01829a 100644 --- a/drivers/pwm/pwm-renesas-tpu.c +++ b/drivers/pwm/pwm-renesas-tpu.c @@ -431,7 +431,6 @@ static const struct pwm_ops tpu_pwm_ops = { .request = tpu_pwm_request, .free = tpu_pwm_free, .apply = tpu_pwm_apply, - .owner = THIS_MODULE, }; /* ----------------------------------------------------------------------------- diff --git a/drivers/pwm/pwm-rockchip.c b/drivers/pwm/pwm-rockchip.c index 03ee18fb82d5..cce4381e188a 100644 --- a/drivers/pwm/pwm-rockchip.c +++ b/drivers/pwm/pwm-rockchip.c @@ -228,7 +228,6 @@ out: static const struct pwm_ops rockchip_pwm_ops = { .get_state = rockchip_pwm_get_state, .apply = rockchip_pwm_apply, - .owner = THIS_MODULE, }; static const struct rockchip_pwm_data pwm_data_v1 = { diff --git a/drivers/pwm/pwm-rz-mtu3.c b/drivers/pwm/pwm-rz-mtu3.c index a56cecb0e46e..bdda315b3bd3 100644 --- a/drivers/pwm/pwm-rz-mtu3.c +++ b/drivers/pwm/pwm-rz-mtu3.c @@ -438,7 +438,6 @@ static const struct pwm_ops rz_mtu3_pwm_ops = { .free = rz_mtu3_pwm_free, .get_state = rz_mtu3_pwm_get_state, .apply = rz_mtu3_pwm_apply, - .owner = THIS_MODULE, }; static int rz_mtu3_pwm_pm_runtime_suspend(struct device *dev) diff --git a/drivers/pwm/pwm-samsung.c b/drivers/pwm/pwm-samsung.c index e8828f57ab15..08a903257383 100644 --- a/drivers/pwm/pwm-samsung.c +++ b/drivers/pwm/pwm-samsung.c @@ -475,7 +475,6 @@ static const struct pwm_ops pwm_samsung_ops = { .request = pwm_samsung_request, .free = pwm_samsung_free, .apply = pwm_samsung_apply, - .owner = THIS_MODULE, }; #ifdef CONFIG_OF diff --git a/drivers/pwm/pwm-sifive.c b/drivers/pwm/pwm-sifive.c index eabddb7c7820..089e50bdbbf0 100644 --- a/drivers/pwm/pwm-sifive.c +++ b/drivers/pwm/pwm-sifive.c @@ -203,7 +203,6 @@ static const struct pwm_ops pwm_sifive_ops = { .free = pwm_sifive_free, .get_state = pwm_sifive_get_state, .apply = pwm_sifive_apply, - .owner = THIS_MODULE, }; static int pwm_sifive_clock_notifier(struct notifier_block *nb, diff --git a/drivers/pwm/pwm-sl28cpld.c b/drivers/pwm/pwm-sl28cpld.c index 9e42e3a74ad6..88b01ff9e460 100644 --- a/drivers/pwm/pwm-sl28cpld.c +++ b/drivers/pwm/pwm-sl28cpld.c @@ -200,7 +200,6 @@ static int sl28cpld_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm, static const struct pwm_ops sl28cpld_pwm_ops = { .apply = sl28cpld_pwm_apply, .get_state = sl28cpld_pwm_get_state, - .owner = THIS_MODULE, }; static int sl28cpld_pwm_probe(struct platform_device *pdev) diff --git a/drivers/pwm/pwm-spear.c b/drivers/pwm/pwm-spear.c index 4e1cfd8d7c03..2cbc34cf6799 100644 --- a/drivers/pwm/pwm-spear.c +++ b/drivers/pwm/pwm-spear.c @@ -189,7 +189,6 @@ static int spear_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm, static const struct pwm_ops spear_pwm_ops = { .apply = spear_pwm_apply, - .owner = THIS_MODULE, }; static int spear_pwm_probe(struct platform_device *pdev) diff --git a/drivers/pwm/pwm-sprd.c b/drivers/pwm/pwm-sprd.c index 1499c8c1fe37..dfda2152a7c1 100644 --- a/drivers/pwm/pwm-sprd.c +++ b/drivers/pwm/pwm-sprd.c @@ -210,7 +210,6 @@ static int sprd_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm, static const struct pwm_ops sprd_pwm_ops = { .apply = sprd_pwm_apply, .get_state = sprd_pwm_get_state, - .owner = THIS_MODULE, }; static int sprd_pwm_clk_init(struct sprd_pwm_chip *spc) diff --git a/drivers/pwm/pwm-sti.c b/drivers/pwm/pwm-sti.c index b1d1373648a3..5756f2e3b3c0 100644 --- a/drivers/pwm/pwm-sti.c +++ b/drivers/pwm/pwm-sti.c @@ -420,7 +420,6 @@ static const struct pwm_ops sti_pwm_ops = { .capture = sti_pwm_capture, .apply = sti_pwm_apply, .free = sti_pwm_free, - .owner = THIS_MODULE, }; static irqreturn_t sti_pwm_interrupt(int irq, void *data) diff --git a/drivers/pwm/pwm-stm32-lp.c b/drivers/pwm/pwm-stm32-lp.c index bb3a045a7334..b67974cc1872 100644 --- a/drivers/pwm/pwm-stm32-lp.c +++ b/drivers/pwm/pwm-stm32-lp.c @@ -189,7 +189,6 @@ static int stm32_pwm_lp_get_state(struct pwm_chip *chip, } static const struct pwm_ops stm32_pwm_lp_ops = { - .owner = THIS_MODULE, .apply = stm32_pwm_lp_apply, .get_state = stm32_pwm_lp_get_state, }; diff --git a/drivers/pwm/pwm-stm32.c b/drivers/pwm/pwm-stm32.c index 3d6be7749e23..3303a754ea02 100644 --- a/drivers/pwm/pwm-stm32.c +++ b/drivers/pwm/pwm-stm32.c @@ -487,7 +487,6 @@ static int stm32_pwm_apply_locked(struct pwm_chip *chip, struct pwm_device *pwm, } static const struct pwm_ops stm32pwm_ops = { - .owner = THIS_MODULE, .apply = stm32_pwm_apply_locked, .capture = IS_ENABLED(CONFIG_DMA_ENGINE) ? stm32_pwm_capture : NULL, }; diff --git a/drivers/pwm/pwm-stmpe.c b/drivers/pwm/pwm-stmpe.c index e205405c4828..a46f5b4dd816 100644 --- a/drivers/pwm/pwm-stmpe.c +++ b/drivers/pwm/pwm-stmpe.c @@ -287,7 +287,6 @@ static int stmpe_24xx_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm, static const struct pwm_ops stmpe_24xx_pwm_ops = { .apply = stmpe_24xx_pwm_apply, - .owner = THIS_MODULE, }; static int __init stmpe_pwm_probe(struct platform_device *pdev) diff --git a/drivers/pwm/pwm-sun4i.c b/drivers/pwm/pwm-sun4i.c index c84fcf1a13dc..1a439025540d 100644 --- a/drivers/pwm/pwm-sun4i.c +++ b/drivers/pwm/pwm-sun4i.c @@ -325,7 +325,6 @@ static int sun4i_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm, static const struct pwm_ops sun4i_pwm_ops = { .apply = sun4i_pwm_apply, .get_state = sun4i_pwm_get_state, - .owner = THIS_MODULE, }; static const struct sun4i_pwm_data sun4i_pwm_dual_nobypass = { diff --git a/drivers/pwm/pwm-sunplus.c b/drivers/pwm/pwm-sunplus.c index 7705c7b86c3a..773e2f80526e 100644 --- a/drivers/pwm/pwm-sunplus.c +++ b/drivers/pwm/pwm-sunplus.c @@ -163,7 +163,6 @@ static int sunplus_pwm_get_state(struct pwm_chip *chip, struct pwm_device *pwm, static const struct pwm_ops sunplus_pwm_ops = { .apply = sunplus_pwm_apply, .get_state = sunplus_pwm_get_state, - .owner = THIS_MODULE, }; static void sunplus_pwm_clk_release(void *data) diff --git a/drivers/pwm/pwm-tegra.c b/drivers/pwm/pwm-tegra.c index a169a34e0778..39ea51e08c94 100644 --- a/drivers/pwm/pwm-tegra.c +++ b/drivers/pwm/pwm-tegra.c @@ -268,7 +268,6 @@ static int tegra_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm, static const struct pwm_ops tegra_pwm_ops = { .apply = tegra_pwm_apply, - .owner = THIS_MODULE, }; static int tegra_pwm_probe(struct platform_device *pdev) diff --git a/drivers/pwm/pwm-tiecap.c b/drivers/pwm/pwm-tiecap.c index 8c94b266c1b2..11e3549cf103 100644 --- a/drivers/pwm/pwm-tiecap.c +++ b/drivers/pwm/pwm-tiecap.c @@ -205,7 +205,6 @@ static int ecap_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm, static const struct pwm_ops ecap_pwm_ops = { .apply = ecap_pwm_apply, - .owner = THIS_MODULE, }; static const struct of_device_id ecap_of_match[] = { diff --git a/drivers/pwm/pwm-tiehrpwm.c b/drivers/pwm/pwm-tiehrpwm.c index ecbfd7e954ec..66ac2655845f 100644 --- a/drivers/pwm/pwm-tiehrpwm.c +++ b/drivers/pwm/pwm-tiehrpwm.c @@ -437,7 +437,6 @@ static int ehrpwm_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm, static const struct pwm_ops ehrpwm_pwm_ops = { .free = ehrpwm_pwm_free, .apply = ehrpwm_pwm_apply, - .owner = THIS_MODULE, }; static const struct of_device_id ehrpwm_of_match[] = { diff --git a/drivers/pwm/pwm-twl-led.c b/drivers/pwm/pwm-twl-led.c index 8fb84b441853..625233f4703a 100644 --- a/drivers/pwm/pwm-twl-led.c +++ b/drivers/pwm/pwm-twl-led.c @@ -189,7 +189,6 @@ static int twl4030_pwmled_apply(struct pwm_chip *chip, struct pwm_device *pwm, static const struct pwm_ops twl4030_pwmled_ops = { .apply = twl4030_pwmled_apply, - .owner = THIS_MODULE, }; static int twl6030_pwmled_config(struct pwm_chip *chip, struct pwm_device *pwm, @@ -342,7 +341,6 @@ static const struct pwm_ops twl6030_pwmled_ops = { .apply = twl6030_pwmled_apply, .request = twl6030_pwmled_request, .free = twl6030_pwmled_free, - .owner = THIS_MODULE, }; static int twl_pwmled_probe(struct platform_device *pdev) diff --git a/drivers/pwm/pwm-twl.c b/drivers/pwm/pwm-twl.c index 86567add79db..603d31f27470 100644 --- a/drivers/pwm/pwm-twl.c +++ b/drivers/pwm/pwm-twl.c @@ -333,12 +333,10 @@ static const struct pwm_ops twl4030_pwm_ops = { .apply = twl4030_pwm_apply, .request = twl4030_pwm_request, .free = twl4030_pwm_free, - .owner = THIS_MODULE, }; static const struct pwm_ops twl6030_pwm_ops = { .apply = twl6030_pwm_apply, - .owner = THIS_MODULE, }; static int twl_pwm_probe(struct platform_device *pdev) diff --git a/drivers/pwm/pwm-visconti.c b/drivers/pwm/pwm-visconti.c index 7f7591a2384c..8d736d558122 100644 --- a/drivers/pwm/pwm-visconti.c +++ b/drivers/pwm/pwm-visconti.c @@ -129,7 +129,6 @@ static int visconti_pwm_get_state(struct pwm_chip *chip, struct pwm_device *pwm, static const struct pwm_ops visconti_pwm_ops = { .apply = visconti_pwm_apply, .get_state = visconti_pwm_get_state, - .owner = THIS_MODULE, }; static int visconti_pwm_probe(struct platform_device *pdev) diff --git a/drivers/pwm/pwm-vt8500.c b/drivers/pwm/pwm-vt8500.c index 6d46db51daac..a96c7f5d9099 100644 --- a/drivers/pwm/pwm-vt8500.c +++ b/drivers/pwm/pwm-vt8500.c @@ -221,7 +221,6 @@ static int vt8500_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm, static const struct pwm_ops vt8500_pwm_ops = { .apply = vt8500_pwm_apply, - .owner = THIS_MODULE, }; static const struct of_device_id vt8500_pwm_dt_ids[] = { diff --git a/drivers/pwm/pwm-xilinx.c b/drivers/pwm/pwm-xilinx.c index 85153ee90809..5f3c2a6fed11 100644 --- a/drivers/pwm/pwm-xilinx.c +++ b/drivers/pwm/pwm-xilinx.c @@ -198,7 +198,6 @@ static int xilinx_pwm_get_state(struct pwm_chip *chip, static const struct pwm_ops xilinx_pwm_ops = { .apply = xilinx_pwm_apply, .get_state = xilinx_pwm_get_state, - .owner = THIS_MODULE, }; static const struct regmap_config xilinx_pwm_regmap_config = { diff --git a/drivers/staging/greybus/pwm.c b/drivers/staging/greybus/pwm.c index 57cc1960d059..a3cb68cfa0f9 100644 --- a/drivers/staging/greybus/pwm.c +++ b/drivers/staging/greybus/pwm.c @@ -258,7 +258,6 @@ static const struct pwm_ops gb_pwm_ops = { .request = gb_pwm_request, .free = gb_pwm_free, .apply = gb_pwm_apply, - .owner = THIS_MODULE, }; static int gb_pwm_probe(struct gbphy_device *gbphy_dev, diff --git a/include/linux/pwm.h b/include/linux/pwm.h index d2f9f690a9c1..56e3b7a09824 100644 --- a/include/linux/pwm.h +++ b/include/linux/pwm.h @@ -267,7 +267,6 @@ struct pwm_capture { * @get_state: get the current PWM state. This function is only * called once per PWM device when the PWM chip is * registered. - * @owner: helps prevent removal of modules exporting active PWMs */ struct pwm_ops { int (*request)(struct pwm_chip *chip, struct pwm_device *pwm); @@ -278,13 +277,13 @@ struct pwm_ops { const struct pwm_state *state); int (*get_state)(struct pwm_chip *chip, struct pwm_device *pwm, struct pwm_state *state); - struct module *owner; }; /** * struct pwm_chip - abstract a PWM controller * @dev: device providing the PWMs * @ops: callbacks for this PWM controller + * @owner: module providing this chip * @base: number of first PWM controlled by this chip * @npwm: number of PWMs controlled by this chip * @of_xlate: request a PWM device given a device tree PWM specifier @@ -295,6 +294,7 @@ struct pwm_ops { struct pwm_chip { struct device *dev; const struct pwm_ops *ops; + struct module *owner; int base; unsigned int npwm; @@ -386,10 +386,12 @@ int pwm_capture(struct pwm_device *pwm, struct pwm_capture *result, int pwm_set_chip_data(struct pwm_device *pwm, void *data); void *pwm_get_chip_data(struct pwm_device *pwm); -int pwmchip_add(struct pwm_chip *chip); +int __pwmchip_add(struct pwm_chip *chip, struct module *owner); +#define pwmchip_add(chip) __pwmchip_add(chip, THIS_MODULE) void pwmchip_remove(struct pwm_chip *chip); -int devm_pwmchip_add(struct device *dev, struct pwm_chip *chip); +int __devm_pwmchip_add(struct device *dev, struct pwm_chip *chip, struct module *owner); +#define devm_pwmchip_add(dev, chip) __devm_pwmchip_add(dev, chip, THIS_MODULE) struct pwm_device *pwm_request_from_chip(struct pwm_chip *chip, unsigned int index, -- cgit v1.2.3 From a6e5654e0b8b53e2d0e316bc7cecb81dd8371f18 Mon Sep 17 00:00:00 2001 From: Uwe Kleine-König Date: Wed, 5 Jul 2023 10:06:50 +0200 Subject: pwm: Drop pwm_[sg]et_chip_data() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The semantic of chip_data is a bit surprising as it's cleared when pwm_put() is called. Also there is a big overlap with the standard driver data. All drivers were adapted to not make use of chip_data any more, so it can go away. Link: https://lore.kernel.org/r/20230705080650.2353391-9-u.kleine-koenig@pengutronix.de Signed-off-by: Uwe Kleine-König Signed-off-by: Thierry Reding --- drivers/pwm/core.c | 31 ------------------------------- include/linux/pwm.h | 14 -------------- 2 files changed, 45 deletions(-) (limited to 'include/linux') diff --git a/drivers/pwm/core.c b/drivers/pwm/core.c index a2824eb4e236..29078486534d 100644 --- a/drivers/pwm/core.c +++ b/drivers/pwm/core.c @@ -208,36 +208,6 @@ static void of_pwmchip_remove(struct pwm_chip *chip) of_node_put(chip->dev->of_node); } -/** - * pwm_set_chip_data() - set private chip data for a PWM - * @pwm: PWM device - * @data: pointer to chip-specific data - * - * Returns: 0 on success or a negative error code on failure. - */ -int pwm_set_chip_data(struct pwm_device *pwm, void *data) -{ - if (!pwm) - return -EINVAL; - - pwm->chip_data = data; - - return 0; -} -EXPORT_SYMBOL_GPL(pwm_set_chip_data); - -/** - * pwm_get_chip_data() - get private chip data for a PWM - * @pwm: PWM device - * - * Returns: A pointer to the chip-private data for the PWM device. - */ -void *pwm_get_chip_data(struct pwm_device *pwm) -{ - return pwm ? pwm->chip_data : NULL; -} -EXPORT_SYMBOL_GPL(pwm_get_chip_data); - static bool pwm_ops_check(const struct pwm_chip *chip) { const struct pwm_ops *ops = chip->ops; @@ -980,7 +950,6 @@ void pwm_put(struct pwm_device *pwm) if (pwm->chip->ops->free) pwm->chip->ops->free(pwm->chip, pwm); - pwm_set_chip_data(pwm, NULL); pwm->label = NULL; module_put(pwm->chip->owner); diff --git a/include/linux/pwm.h b/include/linux/pwm.h index 56e3b7a09824..e3b437587b32 100644 --- a/include/linux/pwm.h +++ b/include/linux/pwm.h @@ -71,7 +71,6 @@ struct pwm_state { * @hwpwm: per-chip relative index of the PWM device * @pwm: global index of the PWM device * @chip: PWM chip providing this PWM device - * @chip_data: chip-private data associated with the PWM device * @args: PWM arguments * @state: last applied state * @last: last implemented state (for PWM_DEBUG) @@ -82,7 +81,6 @@ struct pwm_device { unsigned int hwpwm; unsigned int pwm; struct pwm_chip *chip; - void *chip_data; struct pwm_args args; struct pwm_state state; @@ -383,8 +381,6 @@ static inline void pwm_disable(struct pwm_device *pwm) /* PWM provider APIs */ int pwm_capture(struct pwm_device *pwm, struct pwm_capture *result, unsigned long timeout); -int pwm_set_chip_data(struct pwm_device *pwm, void *data); -void *pwm_get_chip_data(struct pwm_device *pwm); int __pwmchip_add(struct pwm_chip *chip, struct module *owner); #define pwmchip_add(chip) __pwmchip_add(chip, THIS_MODULE) @@ -447,16 +443,6 @@ static inline int pwm_capture(struct pwm_device *pwm, return -EINVAL; } -static inline int pwm_set_chip_data(struct pwm_device *pwm, void *data) -{ - return -EINVAL; -} - -static inline void *pwm_get_chip_data(struct pwm_device *pwm) -{ - return NULL; -} - static inline int pwmchip_add(struct pwm_chip *chip) { return -EINVAL; -- cgit v1.2.3 From 38985e8c278b82e6d4d62d4acd57c761cc23ce63 Mon Sep 17 00:00:00 2001 From: Amit Cohen Date: Mon, 9 Oct 2023 13:06:08 +0300 Subject: net: Handle bulk delete policy in bridge driver The merge commit 92716869375b ("Merge branch 'br-flush-filtering'") added support for FDB flushing in bridge driver. The following patches will extend VXLAN driver to support FDB flushing as well. The netlink message for bulk delete is shared between the drivers. With the existing implementation, there is no way to prevent user from flushing with attributes that are not supported per driver. For example, when VNI will be added, user will not get an error for flush FDB entries in bridge with VNI, although this attribute is not relevant for bridge. As preparation for support of FDB flush in VXLAN driver, move the policy to be handled in bridge driver, later a new policy for VXLAN will be added in VXLAN driver. Do not pass 'vid' as part of ndo_fdb_del_bulk(), as this field is relevant only for bridge. Signed-off-by: Amit Cohen Reviewed-by: Ido Schimmel Signed-off-by: David S. Miller --- include/linux/netdevice.h | 8 ++------ net/bridge/br_fdb.c | 29 ++++++++++++++++++++++++----- net/bridge/br_private.h | 3 +-- net/core/rtnetlink.c | 27 ++++++++++----------------- 4 files changed, 37 insertions(+), 30 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index ae553f886796..1c7681263d30 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1287,9 +1287,7 @@ struct netdev_net_notifier { * struct net_device *dev, * const unsigned char *addr, u16 vid) * Deletes the FDB entry from dev coresponding to addr. - * int (*ndo_fdb_del_bulk)(struct ndmsg *ndm, struct nlattr *tb[], - * struct net_device *dev, - * u16 vid, + * int (*ndo_fdb_del_bulk)(struct nlmsghdr *nlh, struct net_device *dev, * struct netlink_ext_ack *extack); * int (*ndo_fdb_dump)(struct sk_buff *skb, struct netlink_callback *cb, * struct net_device *dev, struct net_device *filter_dev, @@ -1564,10 +1562,8 @@ struct net_device_ops { struct net_device *dev, const unsigned char *addr, u16 vid, struct netlink_ext_ack *extack); - int (*ndo_fdb_del_bulk)(struct ndmsg *ndm, - struct nlattr *tb[], + int (*ndo_fdb_del_bulk)(struct nlmsghdr *nlh, struct net_device *dev, - u16 vid, struct netlink_ext_ack *extack); int (*ndo_fdb_dump)(struct sk_buff *skb, struct netlink_callback *cb, diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index e69a872bfc1d..a98ad763b368 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -661,14 +661,30 @@ static int __fdb_flush_validate_ifindex(const struct net_bridge *br, return 0; } -int br_fdb_delete_bulk(struct ndmsg *ndm, struct nlattr *tb[], - struct net_device *dev, u16 vid, +static const struct nla_policy br_fdb_del_bulk_policy[NDA_MAX + 1] = { + [NDA_VLAN] = NLA_POLICY_RANGE(NLA_U16, 1, VLAN_N_VID - 2), + [NDA_IFINDEX] = NLA_POLICY_MIN(NLA_S32, 1), + [NDA_NDM_STATE_MASK] = { .type = NLA_U16 }, + [NDA_NDM_FLAGS_MASK] = { .type = NLA_U8 }, +}; + +int br_fdb_delete_bulk(struct nlmsghdr *nlh, struct net_device *dev, struct netlink_ext_ack *extack) { - u8 ndm_flags = ndm->ndm_flags & ~FDB_FLUSH_IGNORED_NDM_FLAGS; - struct net_bridge_fdb_flush_desc desc = { .vlan_id = vid }; + struct net_bridge_fdb_flush_desc desc = {}; + struct ndmsg *ndm = nlmsg_data(nlh); struct net_bridge_port *p = NULL; + struct nlattr *tb[NDA_MAX + 1]; struct net_bridge *br; + u8 ndm_flags; + int err; + + ndm_flags = ndm->ndm_flags & ~FDB_FLUSH_IGNORED_NDM_FLAGS; + + err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, + br_fdb_del_bulk_policy, extack); + if (err) + return err; if (netif_is_bridge_master(dev)) { br = netdev_priv(dev); @@ -681,6 +697,9 @@ int br_fdb_delete_bulk(struct ndmsg *ndm, struct nlattr *tb[], br = p->br; } + if (tb[NDA_VLAN]) + desc.vlan_id = nla_get_u16(tb[NDA_VLAN]); + if (ndm_flags & ~FDB_FLUSH_ALLOWED_NDM_FLAGS) { NL_SET_ERR_MSG(extack, "Unsupported fdb flush ndm flag bits set"); return -EINVAL; @@ -703,7 +722,7 @@ int br_fdb_delete_bulk(struct ndmsg *ndm, struct nlattr *tb[], desc.flags_mask |= __ndm_flags_to_fdb_flags(ndm_flags_mask); } if (tb[NDA_IFINDEX]) { - int err, ifidx = nla_get_s32(tb[NDA_IFINDEX]); + int ifidx = nla_get_s32(tb[NDA_IFINDEX]); err = __fdb_flush_validate_ifindex(br, ifidx, extack); if (err) diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index a1f4acfa6994..cbbe35278459 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -847,8 +847,7 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source, int br_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[], struct net_device *dev, const unsigned char *addr, u16 vid, struct netlink_ext_ack *extack); -int br_fdb_delete_bulk(struct ndmsg *ndm, struct nlattr *tb[], - struct net_device *dev, u16 vid, +int br_fdb_delete_bulk(struct nlmsghdr *nlh, struct net_device *dev, struct netlink_ext_ack *extack); int br_fdb_add(struct ndmsg *nlh, struct nlattr *tb[], struct net_device *dev, const unsigned char *addr, u16 vid, u16 nlh_flags, diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 7452a6d190c5..eef7f7788996 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -4367,13 +4367,6 @@ int ndo_dflt_fdb_del(struct ndmsg *ndm, } EXPORT_SYMBOL(ndo_dflt_fdb_del); -static const struct nla_policy fdb_del_bulk_policy[NDA_MAX + 1] = { - [NDA_VLAN] = { .type = NLA_U16 }, - [NDA_IFINDEX] = NLA_POLICY_MIN(NLA_S32, 1), - [NDA_NDM_STATE_MASK] = { .type = NLA_U16 }, - [NDA_NDM_FLAGS_MASK] = { .type = NLA_U8 }, -}; - static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { @@ -4394,8 +4387,10 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, err = nlmsg_parse_deprecated(nlh, sizeof(*ndm), tb, NDA_MAX, NULL, extack); } else { - err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, - fdb_del_bulk_policy, extack); + /* For bulk delete, the drivers will parse the message with + * policy. + */ + err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL, extack); } if (err < 0) return err; @@ -4418,6 +4413,10 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, return -EINVAL; } addr = nla_data(tb[NDA_LLADDR]); + + err = fdb_vid_parse(tb[NDA_VLAN], &vid, extack); + if (err) + return err; } if (dev->type != ARPHRD_ETHER) { @@ -4425,10 +4424,6 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, return -EINVAL; } - err = fdb_vid_parse(tb[NDA_VLAN], &vid, extack); - if (err) - return err; - err = -EOPNOTSUPP; /* Support fdb on master device the net/bridge default case */ @@ -4442,8 +4437,7 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, err = ops->ndo_fdb_del(ndm, tb, dev, addr, vid, extack); } else { if (ops->ndo_fdb_del_bulk) - err = ops->ndo_fdb_del_bulk(ndm, tb, dev, vid, - extack); + err = ops->ndo_fdb_del_bulk(nlh, dev, extack); } if (err) @@ -4464,8 +4458,7 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, /* in case err was cleared by NTF_MASTER call */ err = -EOPNOTSUPP; if (ops->ndo_fdb_del_bulk) - err = ops->ndo_fdb_del_bulk(ndm, tb, dev, vid, - extack); + err = ops->ndo_fdb_del_bulk(nlh, dev, extack); } if (!err) { -- cgit v1.2.3 From 03adc61edad49e1bbecfb53f7ea5d78f398fe368 Mon Sep 17 00:00:00 2001 From: Dan Clash Date: Thu, 12 Oct 2023 14:55:18 -0700 Subject: audit,io_uring: io_uring openat triggers audit reference count underflow An io_uring openat operation can update an audit reference count from multiple threads resulting in the call trace below. A call to io_uring_submit() with a single openat op with a flag of IOSQE_ASYNC results in the following reference count updates. These first part of the system call performs two increments that do not race. do_syscall_64() __do_sys_io_uring_enter() io_submit_sqes() io_openat_prep() __io_openat_prep() getname() getname_flags() /* update 1 (increment) */ __audit_getname() /* update 2 (increment) */ The openat op is queued to an io_uring worker thread which starts the opportunity for a race. The system call exit performs one decrement. do_syscall_64() syscall_exit_to_user_mode() syscall_exit_to_user_mode_prepare() __audit_syscall_exit() audit_reset_context() putname() /* update 3 (decrement) */ The io_uring worker thread performs one increment and two decrements. These updates can race with the system call decrement. io_wqe_worker() io_worker_handle_work() io_wq_submit_work() io_issue_sqe() io_openat() io_openat2() do_filp_open() path_openat() __audit_inode() /* update 4 (increment) */ putname() /* update 5 (decrement) */ __audit_uring_exit() audit_reset_context() putname() /* update 6 (decrement) */ The fix is to change the refcnt member of struct audit_names from int to atomic_t. kernel BUG at fs/namei.c:262! Call Trace: ... ? putname+0x68/0x70 audit_reset_context.part.0.constprop.0+0xe1/0x300 __audit_uring_exit+0xda/0x1c0 io_issue_sqe+0x1f3/0x450 ? lock_timer_base+0x3b/0xd0 io_wq_submit_work+0x8d/0x2b0 ? __try_to_del_timer_sync+0x67/0xa0 io_worker_handle_work+0x17c/0x2b0 io_wqe_worker+0x10a/0x350 Cc: stable@vger.kernel.org Link: https://lore.kernel.org/lkml/MW2PR2101MB1033FFF044A258F84AEAA584F1C9A@MW2PR2101MB1033.namprd21.prod.outlook.com/ Fixes: 5bd2182d58e9 ("audit,io_uring,io-wq: add some basic audit support to io_uring") Signed-off-by: Dan Clash Link: https://lore.kernel.org/r/20231012215518.GA4048@linuxonhyperv3.guj3yctzbm1etfxqx2vob5hsef.xx.internal.cloudapp.net Reviewed-by: Jens Axboe Signed-off-by: Christian Brauner --- fs/namei.c | 9 +++++---- include/linux/fs.h | 2 +- kernel/auditsc.c | 8 ++++---- 3 files changed, 10 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/fs/namei.c b/fs/namei.c index 567ee547492b..94565bd7e73f 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -188,7 +188,7 @@ getname_flags(const char __user *filename, int flags, int *empty) } } - result->refcnt = 1; + atomic_set(&result->refcnt, 1); /* The empty path is special. */ if (unlikely(!len)) { if (empty) @@ -249,7 +249,7 @@ getname_kernel(const char * filename) memcpy((char *)result->name, filename, len); result->uptr = NULL; result->aname = NULL; - result->refcnt = 1; + atomic_set(&result->refcnt, 1); audit_getname(result); return result; @@ -261,9 +261,10 @@ void putname(struct filename *name) if (IS_ERR(name)) return; - BUG_ON(name->refcnt <= 0); + if (WARN_ON_ONCE(!atomic_read(&name->refcnt))) + return; - if (--name->refcnt > 0) + if (!atomic_dec_and_test(&name->refcnt)) return; if (name->name != name->iname) { diff --git a/include/linux/fs.h b/include/linux/fs.h index b528f063e8ff..4a40823c3c67 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2403,7 +2403,7 @@ struct audit_names; struct filename { const char *name; /* pointer to actual string */ const __user char *uptr; /* original userland pointer */ - int refcnt; + atomic_t refcnt; struct audit_names *aname; const char iname[]; }; diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 21d2fa815e78..6f0d6fb6523f 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -2212,7 +2212,7 @@ __audit_reusename(const __user char *uptr) if (!n->name) continue; if (n->name->uptr == uptr) { - n->name->refcnt++; + atomic_inc(&n->name->refcnt); return n->name; } } @@ -2241,7 +2241,7 @@ void __audit_getname(struct filename *name) n->name = name; n->name_len = AUDIT_NAME_FULL; name->aname = n; - name->refcnt++; + atomic_inc(&name->refcnt); } static inline int audit_copy_fcaps(struct audit_names *name, @@ -2373,7 +2373,7 @@ out_alloc: return; if (name) { n->name = name; - name->refcnt++; + atomic_inc(&name->refcnt); } out: @@ -2500,7 +2500,7 @@ void __audit_inode_child(struct inode *parent, if (found_parent) { found_child->name = found_parent->name; found_child->name_len = AUDIT_NAME_FULL; - found_child->name->refcnt++; + atomic_inc(&found_child->name->refcnt); } } -- cgit v1.2.3 From 236334aeec0f93217cf9235f2004e61a0a1a5985 Mon Sep 17 00:00:00 2001 From: Yafang Shao Date: Fri, 13 Oct 2023 08:39:16 +0000 Subject: bpf: Avoid unnecessary audit log for CPU security mitigations Check cpu_mitigations_off() first to avoid calling capable() if it is off. This can avoid unnecessary audit log. Fixes: bc5bc309db45 ("bpf: Inherit system settings for CPU security mitigations") Suggested-by: Andrii Nakryiko Signed-off-by: Yafang Shao Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/CAEf4Bza6UVUWqcWQ-66weZ-nMDr+TFU3Mtq=dumZFD-pSqU7Ow@mail.gmail.com/ Link: https://lore.kernel.org/bpf/20231013083916.4199-1-laoar.shao@gmail.com --- include/linux/bpf.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 61bde4520f5c..f0891ba24cb1 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -2164,12 +2164,12 @@ static inline bool bpf_allow_uninit_stack(void) static inline bool bpf_bypass_spec_v1(void) { - return perfmon_capable() || cpu_mitigations_off(); + return cpu_mitigations_off() || perfmon_capable(); } static inline bool bpf_bypass_spec_v4(void) { - return perfmon_capable() || cpu_mitigations_off(); + return cpu_mitigations_off() || perfmon_capable(); } int bpf_map_new_fd(struct bpf_map *map, int flags); -- cgit v1.2.3 From 84aefafe6b294041b7fa0757414c4a29c1bdeea2 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sat, 30 Sep 2023 15:14:26 -0700 Subject: clk: linux/clk-provider.h: fix kernel-doc warnings and typos Fix spelling of "Structure". Fix multiple kernel-doc warnings: clk-provider.h:269: warning: Function parameter or member 'recalc_rate' not described in 'clk_ops' clk-provider.h:468: warning: Function parameter or member 'parent_data' not described in 'clk_hw_register_fixed_rate_with_accuracy_parent_data' clk-provider.h:468: warning: Excess function parameter 'parent_name' description in 'clk_hw_register_fixed_rate_with_accuracy_parent_data' clk-provider.h:482: warning: Function parameter or member 'parent_data' not described in 'clk_hw_register_fixed_rate_parent_accuracy' clk-provider.h:482: warning: Excess function parameter 'parent_name' description in 'clk_hw_register_fixed_rate_parent_accuracy' clk-provider.h:687: warning: Function parameter or member 'flags' not described in 'clk_divider' clk-provider.h:1164: warning: Function parameter or member 'flags' not described in 'clk_fractional_divider' clk-provider.h:1164: warning: Function parameter or member 'approximation' not described in 'clk_fractional_divider' clk-provider.h:1213: warning: Function parameter or member 'flags' not described in 'clk_multiplier' Fixes: 9fba738a53dd ("clk: add duty cycle support") Fixes: b2476490ef11 ("clk: introduce the common clock framework") Fixes: 2d34f09e79c9 ("clk: fixed-rate: Add support for specifying parents via DT/pointers") Fixes: f5290d8e4f0c ("clk: asm9260: use parent index to link the reference clock") Fixes: 9d9f78ed9af0 ("clk: basic clock hardware types") Fixes: e2d0e90fae82 ("clk: new basic clk type for fractional divider") Fixes: f2e0a53271a4 ("clk: Add a basic multiplier clock") Signed-off-by: Randy Dunlap Cc: Michael Turquette Cc: Stephen Boyd Cc: linux-clk@vger.kernel.org Link: https://lore.kernel.org/r/20230930221428.18463-1-rdunlap@infradead.org Signed-off-by: Stephen Boyd --- include/linux/clk-provider.h | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h index ec32ec58c59f..ace3a4ce2fc9 100644 --- a/include/linux/clk-provider.h +++ b/include/linux/clk-provider.h @@ -74,7 +74,7 @@ void clk_hw_forward_rate_request(const struct clk_hw *core, unsigned long parent_rate); /** - * struct clk_duty - Struture encoding the duty cycle ratio of a clock + * struct clk_duty - Structure encoding the duty cycle ratio of a clock * * @num: Numerator of the duty cycle ratio * @den: Denominator of the duty cycle ratio @@ -129,7 +129,7 @@ struct clk_duty { * @restore_context: Restore the context of the clock after a restoration * of power. * - * @recalc_rate Recalculate the rate of this clock, by querying hardware. The + * @recalc_rate: Recalculate the rate of this clock, by querying hardware. The * parent rate is an input parameter. It is up to the caller to * ensure that the prepare_mutex is held across this call. If the * driver cannot figure out a rate for this clock, it must return @@ -456,7 +456,7 @@ struct clk *clk_register_fixed_rate(struct device *dev, const char *name, * clock with the clock framework * @dev: device that is registering this clock * @name: name of this clock - * @parent_name: name of clock's parent + * @parent_data: name of clock's parent * @flags: framework-specific flags * @fixed_rate: non-adjustable clock rate * @fixed_accuracy: non-adjustable clock accuracy @@ -471,7 +471,7 @@ struct clk *clk_register_fixed_rate(struct device *dev, const char *name, * the clock framework * @dev: device that is registering this clock * @name: name of this clock - * @parent_name: name of clock's parent + * @parent_data: name of clock's parent * @flags: framework-specific flags * @fixed_rate: non-adjustable clock rate */ @@ -649,7 +649,7 @@ struct clk_div_table { * Clock with an adjustable divider affecting its output frequency. Implements * .recalc_rate, .set_rate and .round_rate * - * Flags: + * @flags: * CLK_DIVIDER_ONE_BASED - by default the divisor is the value read from the * register plus one. If CLK_DIVIDER_ONE_BASED is set then the divider is * the raw value read from the register, with the value of zero considered @@ -1130,11 +1130,12 @@ struct clk_hw *clk_hw_register_fixed_factor_parent_hw(struct device *dev, * @mwidth: width of the numerator bit field * @nshift: shift to the denominator bit field * @nwidth: width of the denominator bit field + * @approximation: clk driver's callback for calculating the divider clock * @lock: register lock * * Clock with adjustable fractional divider affecting its output frequency. * - * Flags: + * @flags: * CLK_FRAC_DIVIDER_ZERO_BASED - by default the numerator and denominator * is the value read from the register. If CLK_FRAC_DIVIDER_ZERO_BASED * is set then the numerator and denominator are both the value read @@ -1191,7 +1192,7 @@ void clk_hw_unregister_fractional_divider(struct clk_hw *hw); * Clock with an adjustable multiplier affecting its output frequency. * Implements .recalc_rate, .set_rate and .round_rate * - * Flags: + * @flags: * CLK_MULTIPLIER_ZERO_BYPASS - By default, the multiplier is the value read * from the register, with 0 being a valid value effectively * zeroing the output clock rate. If CLK_MULTIPLIER_ZERO_BYPASS is -- cgit v1.2.3 From 787650cc335201a0489905c5504a9179470ebc51 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 13 Oct 2023 20:04:12 -0700 Subject: Input: Annotate struct ff_device with __counted_by Prepare for the coming implementation by GCC and Clang of the __counted_by attribute. Flexible array members annotated with __counted_by can have their accesses bounds-checked at run-time via CONFIG_UBSAN_BOUNDS (for array indexing) and CONFIG_FORTIFY_SOURCE (for strcpy/memcpy-family functions). As found with Coccinelle[1], add __counted_by for struct ff_device. Signed-off-by: Kees Cook Reviewed-by: Gustavo A. R. Silva Link: https://lore.kernel.org/r/20231006201739.work.350-kees@kernel.org Signed-off-by: Dmitry Torokhov --- include/linux/input.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/input.h b/include/linux/input.h index 49790c1bd2c4..de6503c0edb8 100644 --- a/include/linux/input.h +++ b/include/linux/input.h @@ -562,7 +562,7 @@ struct ff_device { int max_effects; struct ff_effect *effects; - struct file *effect_owners[]; + struct file *effect_owners[] __counted_by(max_effects); }; int input_ff_create(struct input_dev *dev, unsigned int max_effects); -- cgit v1.2.3 From 5b90073defd1a52aa8120403d79f6e0fc10c87ee Mon Sep 17 00:00:00 2001 From: Weili Qian Date: Sun, 8 Oct 2023 20:36:17 +0800 Subject: crypto: hisilicon/qm - alloc buffer to set and get xqc If the temporarily applied memory is used to set or get the xqc information, the driver releases the memory immediately after the hardware mailbox operation time exceeds the driver waiting time. However, the hardware does not cancel the operation, so the hardware may write data to released memory. Therefore, when the driver is bound to a device, the driver reserves memory for the xqc configuration. The subsequent xqc configuration uses the reserved memory to prevent hardware from accessing the released memory. Signed-off-by: Weili Qian Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/debugfs.c | 75 ++++---- drivers/crypto/hisilicon/qm.c | 332 ++++++++++++++++------------------- drivers/crypto/hisilicon/qm_common.h | 5 +- include/linux/hisi_acc_qm.h | 13 ++ 4 files changed, 191 insertions(+), 234 deletions(-) (limited to 'include/linux') diff --git a/drivers/crypto/hisilicon/debugfs.c b/drivers/crypto/hisilicon/debugfs.c index 2cc1591949db..7e8186fe0512 100644 --- a/drivers/crypto/hisilicon/debugfs.c +++ b/drivers/crypto/hisilicon/debugfs.c @@ -137,8 +137,8 @@ static void dump_show(struct hisi_qm *qm, void *info, static int qm_sqc_dump(struct hisi_qm *qm, char *s, char *name) { struct device *dev = &qm->pdev->dev; - struct qm_sqc *sqc, *sqc_curr; - dma_addr_t sqc_dma; + struct qm_sqc *sqc_curr; + struct qm_sqc sqc; u32 qp_id; int ret; @@ -151,35 +151,29 @@ static int qm_sqc_dump(struct hisi_qm *qm, char *s, char *name) return -EINVAL; } - sqc = hisi_qm_ctx_alloc(qm, sizeof(*sqc), &sqc_dma); - if (IS_ERR(sqc)) - return PTR_ERR(sqc); + ret = qm_set_and_get_xqc(qm, QM_MB_CMD_SQC, &sqc, qp_id, 1); + if (!ret) { + dump_show(qm, &sqc, sizeof(struct qm_sqc), name); - ret = hisi_qm_mb(qm, QM_MB_CMD_SQC, sqc_dma, qp_id, 1); - if (ret) { - down_read(&qm->qps_lock); - if (qm->sqc) { - sqc_curr = qm->sqc + qp_id; + return 0; + } - dump_show(qm, sqc_curr, sizeof(*sqc), "SOFT SQC"); - } - up_read(&qm->qps_lock); + down_read(&qm->qps_lock); + if (qm->sqc) { + sqc_curr = qm->sqc + qp_id; - goto free_ctx; + dump_show(qm, sqc_curr, sizeof(*sqc_curr), "SOFT SQC"); } + up_read(&qm->qps_lock); - dump_show(qm, sqc, sizeof(*sqc), name); - -free_ctx: - hisi_qm_ctx_free(qm, sizeof(*sqc), sqc, &sqc_dma); return 0; } static int qm_cqc_dump(struct hisi_qm *qm, char *s, char *name) { struct device *dev = &qm->pdev->dev; - struct qm_cqc *cqc, *cqc_curr; - dma_addr_t cqc_dma; + struct qm_cqc *cqc_curr; + struct qm_cqc cqc; u32 qp_id; int ret; @@ -192,34 +186,29 @@ static int qm_cqc_dump(struct hisi_qm *qm, char *s, char *name) return -EINVAL; } - cqc = hisi_qm_ctx_alloc(qm, sizeof(*cqc), &cqc_dma); - if (IS_ERR(cqc)) - return PTR_ERR(cqc); + ret = qm_set_and_get_xqc(qm, QM_MB_CMD_CQC, &cqc, qp_id, 1); + if (!ret) { + dump_show(qm, &cqc, sizeof(struct qm_cqc), name); - ret = hisi_qm_mb(qm, QM_MB_CMD_CQC, cqc_dma, qp_id, 1); - if (ret) { - down_read(&qm->qps_lock); - if (qm->cqc) { - cqc_curr = qm->cqc + qp_id; + return 0; + } - dump_show(qm, cqc_curr, sizeof(*cqc), "SOFT CQC"); - } - up_read(&qm->qps_lock); + down_read(&qm->qps_lock); + if (qm->cqc) { + cqc_curr = qm->cqc + qp_id; - goto free_ctx; + dump_show(qm, cqc_curr, sizeof(*cqc_curr), "SOFT CQC"); } + up_read(&qm->qps_lock); - dump_show(qm, cqc, sizeof(*cqc), name); - -free_ctx: - hisi_qm_ctx_free(qm, sizeof(*cqc), cqc, &cqc_dma); return 0; } static int qm_eqc_aeqc_dump(struct hisi_qm *qm, char *s, char *name) { struct device *dev = &qm->pdev->dev; - dma_addr_t xeqc_dma; + struct qm_aeqc aeqc; + struct qm_eqc eqc; size_t size; void *xeqc; int ret; @@ -233,23 +222,19 @@ static int qm_eqc_aeqc_dump(struct hisi_qm *qm, char *s, char *name) if (!strcmp(name, "EQC")) { cmd = QM_MB_CMD_EQC; size = sizeof(struct qm_eqc); + xeqc = &eqc; } else { cmd = QM_MB_CMD_AEQC; size = sizeof(struct qm_aeqc); + xeqc = &aeqc; } - xeqc = hisi_qm_ctx_alloc(qm, size, &xeqc_dma); - if (IS_ERR(xeqc)) - return PTR_ERR(xeqc); - - ret = hisi_qm_mb(qm, cmd, xeqc_dma, 0, 1); + ret = qm_set_and_get_xqc(qm, cmd, xeqc, 0, 1); if (ret) - goto err_free_ctx; + return ret; dump_show(qm, xeqc, size, name); -err_free_ctx: - hisi_qm_ctx_free(qm, size, xeqc, &xeqc_dma); return ret; } diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c index f3b55c044dd3..a1d0473f1931 100644 --- a/drivers/crypto/hisilicon/qm.c +++ b/drivers/crypto/hisilicon/qm.c @@ -46,7 +46,7 @@ #define QM_QC_PASID_ENABLE_SHIFT 7 #define QM_SQ_TYPE_MASK GENMASK(3, 0) -#define QM_SQ_TAIL_IDX(sqc) ((le16_to_cpu((sqc)->w11) >> 6) & 0x1) +#define QM_SQ_TAIL_IDX(sqc) ((le16_to_cpu((sqc).w11) >> 6) & 0x1) /* cqc shift */ #define QM_CQ_HOP_NUM_SHIFT 0 @@ -58,7 +58,7 @@ #define QM_CQE_PHASE(cqe) (le16_to_cpu((cqe)->w7) & 0x1) #define QM_QC_CQE_SIZE 4 -#define QM_CQ_TAIL_IDX(cqc) ((le16_to_cpu((cqc)->w11) >> 6) & 0x1) +#define QM_CQ_TAIL_IDX(cqc) ((le16_to_cpu((cqc).w11) >> 6) & 0x1) /* eqc shift */ #define QM_EQE_AEQE_SIZE (2UL << 12) @@ -252,19 +252,6 @@ #define QM_MK_SQC_DW3_V2(sqe_sz, sq_depth) \ ((((u32)sq_depth) - 1) | ((u32)ilog2(sqe_sz) << QM_SQ_SQE_SIZE_SHIFT)) -#define INIT_QC_COMMON(qc, base, pasid) do { \ - (qc)->head = 0; \ - (qc)->tail = 0; \ - (qc)->base_l = cpu_to_le32(lower_32_bits(base)); \ - (qc)->base_h = cpu_to_le32(upper_32_bits(base)); \ - (qc)->dw3 = 0; \ - (qc)->w8 = 0; \ - (qc)->rsvd0 = 0; \ - (qc)->pasid = cpu_to_le16(pasid); \ - (qc)->w11 = 0; \ - (qc)->rsvd1 = 0; \ -} while (0) - enum vft_type { SQC_VFT = 0, CQC_VFT, @@ -686,6 +673,59 @@ int hisi_qm_mb(struct hisi_qm *qm, u8 cmd, dma_addr_t dma_addr, u16 queue, } EXPORT_SYMBOL_GPL(hisi_qm_mb); +/* op 0: set xqc information to hardware, 1: get xqc information from hardware. */ +int qm_set_and_get_xqc(struct hisi_qm *qm, u8 cmd, void *xqc, u32 qp_id, bool op) +{ + struct hisi_qm *pf_qm = pci_get_drvdata(pci_physfn(qm->pdev)); + struct qm_mailbox mailbox; + dma_addr_t xqc_dma; + void *tmp_xqc; + size_t size; + int ret; + + switch (cmd) { + case QM_MB_CMD_SQC: + size = sizeof(struct qm_sqc); + tmp_xqc = qm->xqc_buf.sqc; + xqc_dma = qm->xqc_buf.sqc_dma; + break; + case QM_MB_CMD_CQC: + size = sizeof(struct qm_cqc); + tmp_xqc = qm->xqc_buf.cqc; + xqc_dma = qm->xqc_buf.cqc_dma; + break; + case QM_MB_CMD_EQC: + size = sizeof(struct qm_eqc); + tmp_xqc = qm->xqc_buf.eqc; + xqc_dma = qm->xqc_buf.eqc_dma; + break; + case QM_MB_CMD_AEQC: + size = sizeof(struct qm_aeqc); + tmp_xqc = qm->xqc_buf.aeqc; + xqc_dma = qm->xqc_buf.aeqc_dma; + break; + } + + /* Setting xqc will fail if master OOO is blocked. */ + if (qm_check_dev_error(pf_qm)) { + dev_err(&qm->pdev->dev, "failed to send mailbox since qm is stop!\n"); + return -EIO; + } + + mutex_lock(&qm->mailbox_lock); + if (!op) + memcpy(tmp_xqc, xqc, size); + + qm_mb_pre_init(&mailbox, cmd, xqc_dma, qp_id, op); + ret = qm_mb_nolock(qm, &mailbox); + if (!ret && op) + memcpy(xqc, tmp_xqc, size); + + mutex_unlock(&qm->mailbox_lock); + + return ret; +} + static void qm_db_v1(struct hisi_qm *qm, u16 qn, u8 cmd, u16 index, u8 priority) { u64 doorbell; @@ -1321,45 +1361,6 @@ static int qm_get_vft_v2(struct hisi_qm *qm, u32 *base, u32 *number) return 0; } -void *hisi_qm_ctx_alloc(struct hisi_qm *qm, size_t ctx_size, - dma_addr_t *dma_addr) -{ - struct device *dev = &qm->pdev->dev; - void *ctx_addr; - - ctx_addr = kzalloc(ctx_size, GFP_KERNEL); - if (!ctx_addr) - return ERR_PTR(-ENOMEM); - - *dma_addr = dma_map_single(dev, ctx_addr, ctx_size, DMA_FROM_DEVICE); - if (dma_mapping_error(dev, *dma_addr)) { - dev_err(dev, "DMA mapping error!\n"); - kfree(ctx_addr); - return ERR_PTR(-ENOMEM); - } - - return ctx_addr; -} - -void hisi_qm_ctx_free(struct hisi_qm *qm, size_t ctx_size, - const void *ctx_addr, dma_addr_t *dma_addr) -{ - struct device *dev = &qm->pdev->dev; - - dma_unmap_single(dev, *dma_addr, ctx_size, DMA_FROM_DEVICE); - kfree(ctx_addr); -} - -static int qm_dump_sqc_raw(struct hisi_qm *qm, dma_addr_t dma_addr, u16 qp_id) -{ - return hisi_qm_mb(qm, QM_MB_CMD_SQC, dma_addr, qp_id, 1); -} - -static int qm_dump_cqc_raw(struct hisi_qm *qm, dma_addr_t dma_addr, u16 qp_id) -{ - return hisi_qm_mb(qm, QM_MB_CMD_CQC, dma_addr, qp_id, 1); -} - static void qm_hw_error_init_v1(struct hisi_qm *qm) { writel(QM_ABNORMAL_INT_MASK_VALUE, qm->io_base + QM_ABNORMAL_INT_MASK); @@ -1952,84 +1953,51 @@ static void hisi_qm_release_qp(struct hisi_qp *qp) static int qm_sq_ctx_cfg(struct hisi_qp *qp, int qp_id, u32 pasid) { struct hisi_qm *qm = qp->qm; - struct device *dev = &qm->pdev->dev; enum qm_hw_ver ver = qm->ver; - struct qm_sqc *sqc; - dma_addr_t sqc_dma; - int ret; + struct qm_sqc sqc = {0}; - sqc = kzalloc(sizeof(struct qm_sqc), GFP_KERNEL); - if (!sqc) - return -ENOMEM; - - INIT_QC_COMMON(sqc, qp->sqe_dma, pasid); if (ver == QM_HW_V1) { - sqc->dw3 = cpu_to_le32(QM_MK_SQC_DW3_V1(0, 0, 0, qm->sqe_size)); - sqc->w8 = cpu_to_le16(qp->sq_depth - 1); + sqc.dw3 = cpu_to_le32(QM_MK_SQC_DW3_V1(0, 0, 0, qm->sqe_size)); + sqc.w8 = cpu_to_le16(qp->sq_depth - 1); } else { - sqc->dw3 = cpu_to_le32(QM_MK_SQC_DW3_V2(qm->sqe_size, qp->sq_depth)); - sqc->w8 = 0; /* rand_qc */ + sqc.dw3 = cpu_to_le32(QM_MK_SQC_DW3_V2(qm->sqe_size, qp->sq_depth)); + sqc.w8 = 0; /* rand_qc */ } - sqc->cq_num = cpu_to_le16(qp_id); - sqc->w13 = cpu_to_le16(QM_MK_SQC_W13(0, 1, qp->alg_type)); + sqc.w13 = cpu_to_le16(QM_MK_SQC_W13(0, 1, qp->alg_type)); + sqc.base_l = cpu_to_le32(lower_32_bits(qp->sqe_dma)); + sqc.base_h = cpu_to_le32(upper_32_bits(qp->sqe_dma)); + sqc.cq_num = cpu_to_le16(qp_id); + sqc.pasid = cpu_to_le16(pasid); if (ver >= QM_HW_V3 && qm->use_sva && !qp->is_in_kernel) - sqc->w11 = cpu_to_le16(QM_QC_PASID_ENABLE << - QM_QC_PASID_ENABLE_SHIFT); - - sqc_dma = dma_map_single(dev, sqc, sizeof(struct qm_sqc), - DMA_TO_DEVICE); - if (dma_mapping_error(dev, sqc_dma)) { - kfree(sqc); - return -ENOMEM; - } + sqc.w11 = cpu_to_le16(QM_QC_PASID_ENABLE << + QM_QC_PASID_ENABLE_SHIFT); - ret = hisi_qm_mb(qm, QM_MB_CMD_SQC, sqc_dma, qp_id, 0); - dma_unmap_single(dev, sqc_dma, sizeof(struct qm_sqc), DMA_TO_DEVICE); - kfree(sqc); - - return ret; + return qm_set_and_get_xqc(qm, QM_MB_CMD_SQC, &sqc, qp_id, 0); } static int qm_cq_ctx_cfg(struct hisi_qp *qp, int qp_id, u32 pasid) { struct hisi_qm *qm = qp->qm; - struct device *dev = &qm->pdev->dev; enum qm_hw_ver ver = qm->ver; - struct qm_cqc *cqc; - dma_addr_t cqc_dma; - int ret; - - cqc = kzalloc(sizeof(struct qm_cqc), GFP_KERNEL); - if (!cqc) - return -ENOMEM; + struct qm_cqc cqc = {0}; - INIT_QC_COMMON(cqc, qp->cqe_dma, pasid); if (ver == QM_HW_V1) { - cqc->dw3 = cpu_to_le32(QM_MK_CQC_DW3_V1(0, 0, 0, - QM_QC_CQE_SIZE)); - cqc->w8 = cpu_to_le16(qp->cq_depth - 1); + cqc.dw3 = cpu_to_le32(QM_MK_CQC_DW3_V1(0, 0, 0, QM_QC_CQE_SIZE)); + cqc.w8 = cpu_to_le16(qp->cq_depth - 1); } else { - cqc->dw3 = cpu_to_le32(QM_MK_CQC_DW3_V2(QM_QC_CQE_SIZE, qp->cq_depth)); - cqc->w8 = 0; /* rand_qc */ + cqc.dw3 = cpu_to_le32(QM_MK_CQC_DW3_V2(QM_QC_CQE_SIZE, qp->cq_depth)); + cqc.w8 = 0; /* rand_qc */ } - cqc->dw6 = cpu_to_le32(1 << QM_CQ_PHASE_SHIFT | 1 << QM_CQ_FLAG_SHIFT); + cqc.dw6 = cpu_to_le32(1 << QM_CQ_PHASE_SHIFT | 1 << QM_CQ_FLAG_SHIFT); + cqc.base_l = cpu_to_le32(lower_32_bits(qp->cqe_dma)); + cqc.base_h = cpu_to_le32(upper_32_bits(qp->cqe_dma)); + cqc.pasid = cpu_to_le16(pasid); if (ver >= QM_HW_V3 && qm->use_sva && !qp->is_in_kernel) - cqc->w11 = cpu_to_le16(QM_QC_PASID_ENABLE); - - cqc_dma = dma_map_single(dev, cqc, sizeof(struct qm_cqc), - DMA_TO_DEVICE); - if (dma_mapping_error(dev, cqc_dma)) { - kfree(cqc); - return -ENOMEM; - } + cqc.w11 = cpu_to_le16(QM_QC_PASID_ENABLE); - ret = hisi_qm_mb(qm, QM_MB_CMD_CQC, cqc_dma, qp_id, 0); - dma_unmap_single(dev, cqc_dma, sizeof(struct qm_cqc), DMA_TO_DEVICE); - kfree(cqc); - - return ret; + return qm_set_and_get_xqc(qm, QM_MB_CMD_CQC, &cqc, qp_id, 0); } static int qm_qp_ctx_cfg(struct hisi_qp *qp, int qp_id, u32 pasid) @@ -2119,14 +2087,11 @@ static void qp_stop_fail_cb(struct hisi_qp *qp) */ static int qm_drain_qp(struct hisi_qp *qp) { - size_t size = sizeof(struct qm_sqc) + sizeof(struct qm_cqc); struct hisi_qm *qm = qp->qm; struct device *dev = &qm->pdev->dev; - struct qm_sqc *sqc; - struct qm_cqc *cqc; - dma_addr_t dma_addr; - int ret = 0, i = 0; - void *addr; + struct qm_sqc sqc; + struct qm_cqc cqc; + int ret, i = 0; /* No need to judge if master OOO is blocked. */ if (qm_check_dev_error(qm)) @@ -2140,44 +2105,32 @@ static int qm_drain_qp(struct hisi_qp *qp) return ret; } - addr = hisi_qm_ctx_alloc(qm, size, &dma_addr); - if (IS_ERR(addr)) { - dev_err(dev, "Failed to alloc ctx for sqc and cqc!\n"); - return -ENOMEM; - } - while (++i) { - ret = qm_dump_sqc_raw(qm, dma_addr, qp->qp_id); + ret = qm_set_and_get_xqc(qm, QM_MB_CMD_SQC, &sqc, qp->qp_id, 1); if (ret) { dev_err_ratelimited(dev, "Failed to dump sqc!\n"); - break; + return ret; } - sqc = addr; - ret = qm_dump_cqc_raw(qm, (dma_addr + sizeof(struct qm_sqc)), - qp->qp_id); + ret = qm_set_and_get_xqc(qm, QM_MB_CMD_CQC, &cqc, qp->qp_id, 1); if (ret) { dev_err_ratelimited(dev, "Failed to dump cqc!\n"); - break; + return ret; } - cqc = addr + sizeof(struct qm_sqc); - if ((sqc->tail == cqc->tail) && + if ((sqc.tail == cqc.tail) && (QM_SQ_TAIL_IDX(sqc) == QM_CQ_TAIL_IDX(cqc))) break; if (i == MAX_WAIT_COUNTS) { dev_err(dev, "Fail to empty queue %u!\n", qp->qp_id); - ret = -EBUSY; - break; + return -EBUSY; } usleep_range(WAIT_PERIOD_US_MIN, WAIT_PERIOD_US_MAX); } - hisi_qm_ctx_free(qm, size, addr, &dma_addr); - - return ret; + return 0; } static int qm_stop_qp_nolock(struct hisi_qp *qp) @@ -2889,11 +2842,20 @@ static void hisi_qm_unint_work(struct hisi_qm *qm) destroy_workqueue(qm->wq); } +static void hisi_qm_free_rsv_buf(struct hisi_qm *qm) +{ + struct qm_dma *xqc_dma = &qm->xqc_buf.qcdma; + struct device *dev = &qm->pdev->dev; + + dma_free_coherent(dev, xqc_dma->size, xqc_dma->va, xqc_dma->dma); +} + static void hisi_qm_memory_uninit(struct hisi_qm *qm) { struct device *dev = &qm->pdev->dev; hisi_qp_memory_uninit(qm, qm->qp_num); + hisi_qm_free_rsv_buf(qm); if (qm->qdma.va) { hisi_qm_cache_wb(qm); dma_free_coherent(dev, qm->qdma.size, @@ -3015,62 +2977,26 @@ static void qm_disable_eq_aeq_interrupts(struct hisi_qm *qm) static int qm_eq_ctx_cfg(struct hisi_qm *qm) { - struct device *dev = &qm->pdev->dev; - struct qm_eqc *eqc; - dma_addr_t eqc_dma; - int ret; - - eqc = kzalloc(sizeof(struct qm_eqc), GFP_KERNEL); - if (!eqc) - return -ENOMEM; + struct qm_eqc eqc = {0}; - eqc->base_l = cpu_to_le32(lower_32_bits(qm->eqe_dma)); - eqc->base_h = cpu_to_le32(upper_32_bits(qm->eqe_dma)); + eqc.base_l = cpu_to_le32(lower_32_bits(qm->eqe_dma)); + eqc.base_h = cpu_to_le32(upper_32_bits(qm->eqe_dma)); if (qm->ver == QM_HW_V1) - eqc->dw3 = cpu_to_le32(QM_EQE_AEQE_SIZE); - eqc->dw6 = cpu_to_le32(((u32)qm->eq_depth - 1) | (1 << QM_EQC_PHASE_SHIFT)); + eqc.dw3 = cpu_to_le32(QM_EQE_AEQE_SIZE); + eqc.dw6 = cpu_to_le32(((u32)qm->eq_depth - 1) | (1 << QM_EQC_PHASE_SHIFT)); - eqc_dma = dma_map_single(dev, eqc, sizeof(struct qm_eqc), - DMA_TO_DEVICE); - if (dma_mapping_error(dev, eqc_dma)) { - kfree(eqc); - return -ENOMEM; - } - - ret = hisi_qm_mb(qm, QM_MB_CMD_EQC, eqc_dma, 0, 0); - dma_unmap_single(dev, eqc_dma, sizeof(struct qm_eqc), DMA_TO_DEVICE); - kfree(eqc); - - return ret; + return qm_set_and_get_xqc(qm, QM_MB_CMD_EQC, &eqc, 0, 0); } static int qm_aeq_ctx_cfg(struct hisi_qm *qm) { - struct device *dev = &qm->pdev->dev; - struct qm_aeqc *aeqc; - dma_addr_t aeqc_dma; - int ret; + struct qm_aeqc aeqc = {0}; - aeqc = kzalloc(sizeof(struct qm_aeqc), GFP_KERNEL); - if (!aeqc) - return -ENOMEM; - - aeqc->base_l = cpu_to_le32(lower_32_bits(qm->aeqe_dma)); - aeqc->base_h = cpu_to_le32(upper_32_bits(qm->aeqe_dma)); - aeqc->dw6 = cpu_to_le32(((u32)qm->aeq_depth - 1) | (1 << QM_EQC_PHASE_SHIFT)); + aeqc.base_l = cpu_to_le32(lower_32_bits(qm->aeqe_dma)); + aeqc.base_h = cpu_to_le32(upper_32_bits(qm->aeqe_dma)); + aeqc.dw6 = cpu_to_le32(((u32)qm->aeq_depth - 1) | (1 << QM_EQC_PHASE_SHIFT)); - aeqc_dma = dma_map_single(dev, aeqc, sizeof(struct qm_aeqc), - DMA_TO_DEVICE); - if (dma_mapping_error(dev, aeqc_dma)) { - kfree(aeqc); - return -ENOMEM; - } - - ret = hisi_qm_mb(qm, QM_MB_CMD_AEQC, aeqc_dma, 0, 0); - dma_unmap_single(dev, aeqc_dma, sizeof(struct qm_aeqc), DMA_TO_DEVICE); - kfree(aeqc); - - return ret; + return qm_set_and_get_xqc(qm, QM_MB_CMD_AEQC, &aeqc, 0, 0); } static int qm_eq_aeq_ctx_cfg(struct hisi_qm *qm) @@ -5296,6 +5222,36 @@ err_init_qp_mem: return ret; } +static int hisi_qm_alloc_rsv_buf(struct hisi_qm *qm) +{ + struct qm_rsv_buf *xqc_buf = &qm->xqc_buf; + struct qm_dma *xqc_dma = &xqc_buf->qcdma; + struct device *dev = &qm->pdev->dev; + size_t off = 0; + +#define QM_XQC_BUF_INIT(xqc_buf, type) do { \ + (xqc_buf)->type = ((xqc_buf)->qcdma.va + (off)); \ + (xqc_buf)->type##_dma = (xqc_buf)->qcdma.dma + (off); \ + off += QMC_ALIGN(sizeof(struct qm_##type)); \ +} while (0) + + xqc_dma->size = QMC_ALIGN(sizeof(struct qm_eqc)) + + QMC_ALIGN(sizeof(struct qm_aeqc)) + + QMC_ALIGN(sizeof(struct qm_sqc)) + + QMC_ALIGN(sizeof(struct qm_cqc)); + xqc_dma->va = dma_alloc_coherent(dev, xqc_dma->size, + &xqc_dma->dma, GFP_KERNEL); + if (!xqc_dma->va) + return -ENOMEM; + + QM_XQC_BUF_INIT(xqc_buf, eqc); + QM_XQC_BUF_INIT(xqc_buf, aeqc); + QM_XQC_BUF_INIT(xqc_buf, sqc); + QM_XQC_BUF_INIT(xqc_buf, cqc); + + return 0; +} + static int hisi_qm_memory_init(struct hisi_qm *qm) { struct device *dev = &qm->pdev->dev; @@ -5337,13 +5293,19 @@ static int hisi_qm_memory_init(struct hisi_qm *qm) QM_INIT_BUF(qm, sqc, qm->qp_num); QM_INIT_BUF(qm, cqc, qm->qp_num); + ret = hisi_qm_alloc_rsv_buf(qm); + if (ret) + goto err_free_qdma; + ret = hisi_qp_alloc_memory(qm); if (ret) - goto err_alloc_qp_array; + goto err_free_reserve_buf; return 0; -err_alloc_qp_array: +err_free_reserve_buf: + hisi_qm_free_rsv_buf(qm); +err_free_qdma: dma_free_coherent(dev, qm->qdma.size, qm->qdma.va, qm->qdma.dma); err_destroy_idr: idr_destroy(&qm->qp_idr); diff --git a/drivers/crypto/hisilicon/qm_common.h b/drivers/crypto/hisilicon/qm_common.h index 8e36aa9c681b..7b0b15c83ec1 100644 --- a/drivers/crypto/hisilicon/qm_common.h +++ b/drivers/crypto/hisilicon/qm_common.h @@ -76,10 +76,7 @@ static const char * const qm_s[] = { "init", "start", "close", "stop", }; -void *hisi_qm_ctx_alloc(struct hisi_qm *qm, size_t ctx_size, - dma_addr_t *dma_addr); -void hisi_qm_ctx_free(struct hisi_qm *qm, size_t ctx_size, - const void *ctx_addr, dma_addr_t *dma_addr); +int qm_set_and_get_xqc(struct hisi_qm *qm, u8 cmd, void *xqc, u32 qp_id, bool op); void hisi_qm_show_last_dfx_regs(struct hisi_qm *qm); void hisi_qm_set_algqos_init(struct hisi_qm *qm); diff --git a/include/linux/hisi_acc_qm.h b/include/linux/hisi_acc_qm.h index 34c64a02712c..44e0c44a2e20 100644 --- a/include/linux/hisi_acc_qm.h +++ b/include/linux/hisi_acc_qm.h @@ -292,6 +292,18 @@ struct qm_err_isolate { struct list_head qm_hw_errs; }; +struct qm_rsv_buf { + struct qm_sqc *sqc; + struct qm_cqc *cqc; + struct qm_eqc *eqc; + struct qm_aeqc *aeqc; + dma_addr_t sqc_dma; + dma_addr_t cqc_dma; + dma_addr_t eqc_dma; + dma_addr_t aeqc_dma; + struct qm_dma qcdma; +}; + struct hisi_qm { enum qm_hw_ver ver; enum qm_fun_type fun_type; @@ -324,6 +336,7 @@ struct hisi_qm { dma_addr_t cqc_dma; dma_addr_t eqe_dma; dma_addr_t aeqe_dma; + struct qm_rsv_buf xqc_buf; struct hisi_qm_status status; const struct hisi_qm_err_ini *err_ini; -- cgit v1.2.3 From 886ee55eabac0d46faf8bc0b22207ca2740847ba Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 13 Oct 2023 10:15:46 +0200 Subject: locking/seqlock: Propagate 'const' pointers within read-only methods, remove forced type casts Currently __seqprop_ptr() is an inline function that must chose to either use 'const' or non-const seqcount related pointers - but this results in the undesirable loss of 'const' propagation, via a forced type cast. The easiest solution would be to turn the pointer wrappers into macros that pass through whatever type is passed to them - but the clever maze of seqlock API instantiation macros relies on the GCC CPP '##' macro extension, which isn't recursive, so inline functions must be used here. So create two wrapper variants instead: 'ptr' and 'const_ptr', and pick the right one for the codepaths that are const: read_seqcount_begin() and read_seqcount_retry(). This cleans up type handling and allows the removal of all type forcing. No change in functionality. Signed-off-by: Ingo Molnar Reviewed-by: Oleg Nesterov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Waiman Long Cc: Will Deacon Cc: Thomas Gleixner Cc: Paul E. McKenney --- include/linux/seqlock.h | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h index 4b8dcd3a0d93..80f21d2ca2aa 100644 --- a/include/linux/seqlock.h +++ b/include/linux/seqlock.h @@ -200,9 +200,15 @@ typedef struct seqcount_##lockname { \ } seqcount_##lockname##_t; \ \ static __always_inline seqcount_t * \ -__seqprop_##lockname##_ptr(const seqcount_##lockname##_t *s) \ +__seqprop_##lockname##_ptr(seqcount_##lockname##_t *s) \ { \ - return (void *)&s->seqcount; /* drop const */ \ + return &s->seqcount; \ +} \ + \ +static __always_inline const seqcount_t * \ +__seqprop_##lockname##_const_ptr(const seqcount_##lockname##_t *s) \ +{ \ + return &s->seqcount; \ } \ \ static __always_inline unsigned \ @@ -247,9 +253,14 @@ __seqprop_##lockname##_assert(const seqcount_##lockname##_t *s) \ * __seqprop() for seqcount_t */ -static inline seqcount_t *__seqprop_ptr(const seqcount_t *s) +static inline seqcount_t *__seqprop_ptr(seqcount_t *s) +{ + return s; +} + +static inline const seqcount_t *__seqprop_const_ptr(const seqcount_t *s) { - return (void *)s; /* drop const */ + return s; } static inline unsigned __seqprop_sequence(const seqcount_t *s) @@ -302,6 +313,7 @@ SEQCOUNT_LOCKNAME(mutex, struct mutex, true, mutex) __seqprop_case((s), mutex, prop)) #define seqprop_ptr(s) __seqprop(s, ptr)(s) +#define seqprop_const_ptr(s) __seqprop(s, const_ptr)(s) #define seqprop_sequence(s) __seqprop(s, sequence)(s) #define seqprop_preemptible(s) __seqprop(s, preemptible)(s) #define seqprop_assert(s) __seqprop(s, assert)(s) @@ -353,7 +365,7 @@ SEQCOUNT_LOCKNAME(mutex, struct mutex, true, mutex) */ #define read_seqcount_begin(s) \ ({ \ - seqcount_lockdep_reader_access(seqprop_ptr(s)); \ + seqcount_lockdep_reader_access(seqprop_const_ptr(s)); \ raw_read_seqcount_begin(s); \ }) @@ -419,7 +431,7 @@ SEQCOUNT_LOCKNAME(mutex, struct mutex, true, mutex) * Return: true if a read section retry is required, else false */ #define __read_seqcount_retry(s, start) \ - do___read_seqcount_retry(seqprop_ptr(s), start) + do___read_seqcount_retry(seqprop_const_ptr(s), start) static inline int do___read_seqcount_retry(const seqcount_t *s, unsigned start) { @@ -439,7 +451,7 @@ static inline int do___read_seqcount_retry(const seqcount_t *s, unsigned start) * Return: true if a read section retry is required, else false */ #define read_seqcount_retry(s, start) \ - do_read_seqcount_retry(seqprop_ptr(s), start) + do_read_seqcount_retry(seqprop_const_ptr(s), start) static inline int do_read_seqcount_retry(const seqcount_t *s, unsigned start) { -- cgit v1.2.3 From 3f7f31fff2510272334f3d0374c432bdaa4f1536 Mon Sep 17 00:00:00 2001 From: Wei Zhang Date: Thu, 12 Oct 2023 12:27:36 -0700 Subject: net/mlx5: Parallelize vhca event handling At present, mlx5 driver have a general purpose event handler which not only handles vhca event but also many other events. This incurs a huge bottleneck because the event handler is implemented by single threaded workqueue and all events are forced to be handled in serial manner even though application tries to create multiple SFs simultaneously. Introduce a dedicated vhca event handler which manages SFs parallel creation. Signed-off-by: Wei Zhang Reviewed-by: Moshe Shemesh Reviewed-by: Shay Drory Reviewed-by: Jacob Keller Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/events.c | 5 -- .../net/ethernet/mellanox/mlx5/core/mlx5_core.h | 3 +- .../ethernet/mellanox/mlx5/core/sf/vhca_event.c | 57 ++++++++++++++++++++-- include/linux/mlx5/driver.h | 1 + 4 files changed, 57 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/events.c b/drivers/net/ethernet/mellanox/mlx5/core/events.c index 3ec892d51f57..d91ea53eb394 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/events.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/events.c @@ -441,8 +441,3 @@ int mlx5_blocking_notifier_call_chain(struct mlx5_core_dev *dev, unsigned int ev return blocking_notifier_call_chain(&events->sw_nh, event, data); } - -void mlx5_events_work_enqueue(struct mlx5_core_dev *dev, struct work_struct *work) -{ - queue_work(dev->priv.events->wq, work); -} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index 19ffd1816474..d348a7f9511f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -159,6 +159,8 @@ enum mlx5_semaphore_space_address { #define MLX5_DEFAULT_PROF 2 #define MLX5_SF_PROF 3 +#define MLX5_NUM_FW_CMD_THREADS 8 +#define MLX5_DEV_MAX_WQS MLX5_NUM_FW_CMD_THREADS static inline int mlx5_flexible_inlen(struct mlx5_core_dev *dev, size_t fixed, size_t item_size, size_t num_items, @@ -347,7 +349,6 @@ int mlx5_vport_set_other_func_cap(struct mlx5_core_dev *dev, const void *hca_cap #define mlx5_vport_get_other_func_general_cap(dev, vport, out) \ mlx5_vport_get_other_func_cap(dev, vport, out, MLX5_CAP_GENERAL) -void mlx5_events_work_enqueue(struct mlx5_core_dev *dev, struct work_struct *work); static inline u32 mlx5_sriov_get_vf_total_msix(struct pci_dev *pdev) { struct mlx5_core_dev *dev = pci_get_drvdata(pdev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/vhca_event.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/vhca_event.c index d908fba968f0..c6fd729de8b2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sf/vhca_event.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/vhca_event.c @@ -21,6 +21,15 @@ struct mlx5_vhca_event_work { struct mlx5_vhca_state_event event; }; +struct mlx5_vhca_event_handler { + struct workqueue_struct *wq; +}; + +struct mlx5_vhca_events { + struct mlx5_core_dev *dev; + struct mlx5_vhca_event_handler handler[MLX5_DEV_MAX_WQS]; +}; + int mlx5_cmd_query_vhca_state(struct mlx5_core_dev *dev, u16 function_id, u32 *out, u32 outlen) { u32 in[MLX5_ST_SZ_DW(query_vhca_state_in)] = {}; @@ -99,6 +108,12 @@ static void mlx5_vhca_state_work_handler(struct work_struct *_work) kfree(work); } +static void +mlx5_vhca_events_work_enqueue(struct mlx5_core_dev *dev, int idx, struct work_struct *work) +{ + queue_work(dev->priv.vhca_events->handler[idx].wq, work); +} + static int mlx5_vhca_state_change_notifier(struct notifier_block *nb, unsigned long type, void *data) { @@ -106,6 +121,7 @@ mlx5_vhca_state_change_notifier(struct notifier_block *nb, unsigned long type, v mlx5_nb_cof(nb, struct mlx5_vhca_state_notifier, nb); struct mlx5_vhca_event_work *work; struct mlx5_eqe *eqe = data; + int wq_idx; work = kzalloc(sizeof(*work), GFP_ATOMIC); if (!work) @@ -113,7 +129,8 @@ mlx5_vhca_state_change_notifier(struct notifier_block *nb, unsigned long type, v INIT_WORK(&work->work, &mlx5_vhca_state_work_handler); work->notifier = notifier; work->event.function_id = be16_to_cpu(eqe->data.vhca_state.function_id); - mlx5_events_work_enqueue(notifier->dev, &work->work); + wq_idx = work->event.function_id % MLX5_DEV_MAX_WQS; + mlx5_vhca_events_work_enqueue(notifier->dev, wq_idx, &work->work); return NOTIFY_OK; } @@ -132,28 +149,62 @@ void mlx5_vhca_state_cap_handle(struct mlx5_core_dev *dev, void *set_hca_cap) int mlx5_vhca_event_init(struct mlx5_core_dev *dev) { struct mlx5_vhca_state_notifier *notifier; + char wq_name[MLX5_CMD_WQ_MAX_NAME]; + struct mlx5_vhca_events *events; + int err, i; if (!mlx5_vhca_event_supported(dev)) return 0; - notifier = kzalloc(sizeof(*notifier), GFP_KERNEL); - if (!notifier) + events = kzalloc(sizeof(*events), GFP_KERNEL); + if (!events) return -ENOMEM; + events->dev = dev; + dev->priv.vhca_events = events; + for (i = 0; i < MLX5_DEV_MAX_WQS; i++) { + snprintf(wq_name, MLX5_CMD_WQ_MAX_NAME, "mlx5_vhca_event%d", i); + events->handler[i].wq = create_singlethread_workqueue(wq_name); + if (!events->handler[i].wq) { + err = -ENOMEM; + goto err_create_wq; + } + } + + notifier = kzalloc(sizeof(*notifier), GFP_KERNEL); + if (!notifier) { + err = -ENOMEM; + goto err_notifier; + } + dev->priv.vhca_state_notifier = notifier; notifier->dev = dev; BLOCKING_INIT_NOTIFIER_HEAD(¬ifier->n_head); MLX5_NB_INIT(¬ifier->nb, mlx5_vhca_state_change_notifier, VHCA_STATE_CHANGE); return 0; + +err_notifier: +err_create_wq: + for (--i; i >= 0; i--) + destroy_workqueue(events->handler[i].wq); + kfree(events); + return err; } void mlx5_vhca_event_cleanup(struct mlx5_core_dev *dev) { + struct mlx5_vhca_events *vhca_events; + int i; + if (!mlx5_vhca_event_supported(dev)) return; kfree(dev->priv.vhca_state_notifier); dev->priv.vhca_state_notifier = NULL; + vhca_events = dev->priv.vhca_events; + for (i = 0; i < MLX5_DEV_MAX_WQS; i++) + destroy_workqueue(vhca_events->handler[i].wq); + kvfree(vhca_events); } void mlx5_vhca_event_start(struct mlx5_core_dev *dev) diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 52e982bc0f50..7968c5ee85c4 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -615,6 +615,7 @@ struct mlx5_priv { int adev_idx; int sw_vhca_id; struct mlx5_events *events; + struct mlx5_vhca_events *vhca_events; struct mlx5_flow_steering *steering; struct mlx5_mpfs *mpfs; -- cgit v1.2.3 From e534552c92a44690e48593f9567fe689545ded73 Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Thu, 12 Oct 2023 12:27:39 -0700 Subject: net/mlx5: Refactor LAG peer device lookout bus logic to mlx5 devcom LAG peer device lookout bus logic required the usage of global lock, mlx5_intf_mutex. As part of the effort to remove this global lock, refactor LAG peer device lookout to use mlx5 devcom layer. Signed-off-by: Shay Drory Reviewed-by: Mark Bloch Reviewed-by: Jacob Keller Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/dev.c | 68 ---------------------- drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c | 12 ++-- .../net/ethernet/mellanox/mlx5/core/lib/devcom.c | 14 +++++ .../net/ethernet/mellanox/mlx5/core/lib/devcom.h | 4 ++ drivers/net/ethernet/mellanox/mlx5/core/main.c | 25 ++++++++ .../net/ethernet/mellanox/mlx5/core/mlx5_core.h | 1 - include/linux/mlx5/driver.h | 1 + 7 files changed, 52 insertions(+), 73 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/dev.c b/drivers/net/ethernet/mellanox/mlx5/core/dev.c index 1fc03480c2ff..6e3a8c22881f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/dev.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/dev.c @@ -566,74 +566,6 @@ bool mlx5_same_hw_devs(struct mlx5_core_dev *dev, struct mlx5_core_dev *peer_dev return (fsystem_guid && psystem_guid && fsystem_guid == psystem_guid); } -static u32 mlx5_gen_pci_id(const struct mlx5_core_dev *dev) -{ - return (u32)((pci_domain_nr(dev->pdev->bus) << 16) | - (dev->pdev->bus->number << 8) | - PCI_SLOT(dev->pdev->devfn)); -} - -static int _next_phys_dev(struct mlx5_core_dev *mdev, - const struct mlx5_core_dev *curr) -{ - if (!mlx5_core_is_pf(mdev)) - return 0; - - if (mdev == curr) - return 0; - - if (!mlx5_same_hw_devs(mdev, (struct mlx5_core_dev *)curr) && - mlx5_gen_pci_id(mdev) != mlx5_gen_pci_id(curr)) - return 0; - - return 1; -} - -static void *pci_get_other_drvdata(struct device *this, struct device *other) -{ - if (this->driver != other->driver) - return NULL; - - return pci_get_drvdata(to_pci_dev(other)); -} - -static int next_phys_dev_lag(struct device *dev, const void *data) -{ - struct mlx5_core_dev *mdev, *this = (struct mlx5_core_dev *)data; - - mdev = pci_get_other_drvdata(this->device, dev); - if (!mdev) - return 0; - - if (!mlx5_lag_is_supported(mdev)) - return 0; - - return _next_phys_dev(mdev, data); -} - -static struct mlx5_core_dev *mlx5_get_next_dev(struct mlx5_core_dev *dev, - int (*match)(struct device *dev, const void *data)) -{ - struct device *next; - - if (!mlx5_core_is_pf(dev)) - return NULL; - - next = bus_find_device(&pci_bus_type, NULL, dev, match); - if (!next) - return NULL; - - put_device(next); - return pci_get_drvdata(to_pci_dev(next)); -} - -/* Must be called with intf_mutex held */ -struct mlx5_core_dev *mlx5_get_next_phys_dev_lag(struct mlx5_core_dev *dev) -{ - lockdep_assert_held(&mlx5_intf_mutex); - return mlx5_get_next_dev(dev, &next_phys_dev_lag); -} - void mlx5_dev_list_lock(void) { mutex_lock(&mlx5_intf_mutex); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c index af3fac090b82..f0b57f97739f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c @@ -1212,13 +1212,14 @@ static void mlx5_ldev_remove_mdev(struct mlx5_lag *ldev, dev->priv.lag = NULL; } -/* Must be called with intf_mutex held */ +/* Must be called with HCA devcom component lock held */ static int __mlx5_lag_dev_add_mdev(struct mlx5_core_dev *dev) { + struct mlx5_devcom_comp_dev *pos = NULL; struct mlx5_lag *ldev = NULL; struct mlx5_core_dev *tmp_dev; - tmp_dev = mlx5_get_next_phys_dev_lag(dev); + tmp_dev = mlx5_devcom_get_next_peer_data(dev->priv.hca_devcom_comp, &pos); if (tmp_dev) ldev = mlx5_lag_dev(tmp_dev); @@ -1275,10 +1276,13 @@ void mlx5_lag_add_mdev(struct mlx5_core_dev *dev) if (!mlx5_lag_is_supported(dev)) return; + if (IS_ERR_OR_NULL(dev->priv.hca_devcom_comp)) + return; + recheck: - mlx5_dev_list_lock(); + mlx5_devcom_comp_lock(dev->priv.hca_devcom_comp); err = __mlx5_lag_dev_add_mdev(dev); - mlx5_dev_list_unlock(); + mlx5_devcom_comp_unlock(dev->priv.hca_devcom_comp); if (err) { msleep(100); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c index 89ac3209277e..f4d5c300ddd6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c @@ -387,3 +387,17 @@ void *mlx5_devcom_get_next_peer_data_rcu(struct mlx5_devcom_comp_dev *devcom, *pos = tmp; return data; } + +void mlx5_devcom_comp_lock(struct mlx5_devcom_comp_dev *devcom) +{ + if (IS_ERR_OR_NULL(devcom)) + return; + down_write(&devcom->comp->sem); +} + +void mlx5_devcom_comp_unlock(struct mlx5_devcom_comp_dev *devcom) +{ + if (IS_ERR_OR_NULL(devcom)) + return; + up_write(&devcom->comp->sem); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h index 8220d180e33c..f06529cc3c61 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h @@ -9,6 +9,7 @@ enum mlx5_devcom_component { MLX5_DEVCOM_ESW_OFFLOADS, MLX5_DEVCOM_MPV, + MLX5_DEVCOM_HCA_PORTS, MLX5_DEVCOM_NUM_COMPONENTS, }; @@ -52,4 +53,7 @@ void *mlx5_devcom_get_next_peer_data_rcu(struct mlx5_devcom_comp_dev *devcom, data; \ data = mlx5_devcom_get_next_peer_data_rcu(devcom, &pos)) +void mlx5_devcom_comp_lock(struct mlx5_devcom_comp_dev *devcom); +void mlx5_devcom_comp_unlock(struct mlx5_devcom_comp_dev *devcom); + #endif /* __LIB_MLX5_DEVCOM_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 307ffe6300f8..a17152c1cbb2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -73,6 +73,7 @@ #include "sf/sf.h" #include "mlx5_irq.h" #include "hwmon.h" +#include "lag/lag.h" MODULE_AUTHOR("Eli Cohen "); MODULE_DESCRIPTION("Mellanox 5th generation network adapters (ConnectX series) core driver"); @@ -952,6 +953,27 @@ static void mlx5_pci_close(struct mlx5_core_dev *dev) mlx5_pci_disable_device(dev); } +static void mlx5_register_hca_devcom_comp(struct mlx5_core_dev *dev) +{ + /* This component is use to sync adding core_dev to lag_dev and to sync + * changes of mlx5_adev_devices between LAG layer and other layers. + */ + if (!mlx5_lag_is_supported(dev)) + return; + + dev->priv.hca_devcom_comp = + mlx5_devcom_register_component(dev->priv.devc, MLX5_DEVCOM_HCA_PORTS, + mlx5_query_nic_system_image_guid(dev), + NULL, dev); + if (IS_ERR_OR_NULL(dev->priv.hca_devcom_comp)) + mlx5_core_err(dev, "Failed to register devcom HCA component\n"); +} + +static void mlx5_unregister_hca_devcom_comp(struct mlx5_core_dev *dev) +{ + mlx5_devcom_unregister_component(dev->priv.hca_devcom_comp); +} + static int mlx5_init_once(struct mlx5_core_dev *dev) { int err; @@ -960,6 +982,7 @@ static int mlx5_init_once(struct mlx5_core_dev *dev) if (IS_ERR(dev->priv.devc)) mlx5_core_warn(dev, "failed to register devcom device %ld\n", PTR_ERR(dev->priv.devc)); + mlx5_register_hca_devcom_comp(dev); err = mlx5_query_board_id(dev); if (err) { @@ -1094,6 +1117,7 @@ err_eq_cleanup: err_irq_cleanup: mlx5_irq_table_cleanup(dev); err_devcom: + mlx5_unregister_hca_devcom_comp(dev); mlx5_devcom_unregister_device(dev->priv.devc); return err; @@ -1123,6 +1147,7 @@ static void mlx5_cleanup_once(struct mlx5_core_dev *dev) mlx5_events_cleanup(dev); mlx5_eq_table_cleanup(dev); mlx5_irq_table_cleanup(dev); + mlx5_unregister_hca_devcom_comp(dev); mlx5_devcom_unregister_device(dev->priv.devc); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index d348a7f9511f..f191a90f337b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -266,7 +266,6 @@ int mlx5_register_device(struct mlx5_core_dev *dev); void mlx5_unregister_device(struct mlx5_core_dev *dev); void mlx5_dev_set_lightweight(struct mlx5_core_dev *dev); bool mlx5_dev_is_lightweight(struct mlx5_core_dev *dev); -struct mlx5_core_dev *mlx5_get_next_phys_dev_lag(struct mlx5_core_dev *dev); void mlx5_dev_list_lock(void); void mlx5_dev_list_unlock(void); int mlx5_dev_list_trylock(void); diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 7968c5ee85c4..f60cdc9bd40f 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -624,6 +624,7 @@ struct mlx5_priv { struct mlx5_lag *lag; u32 flags; struct mlx5_devcom_dev *devc; + struct mlx5_devcom_comp_dev *hca_devcom_comp; struct mlx5_fw_reset *fw_reset; struct mlx5_core_roce roce; struct mlx5_fc_stats fc_stats; -- cgit v1.2.3 From 0d2d6bc7e74fee586f587d33484b797bb78f334c Mon Sep 17 00:00:00 2001 From: Yue Haibing Date: Thu, 12 Oct 2023 12:27:41 -0700 Subject: net/mlx5: Remove unused declaration Commit 2ac9cfe78223 ("net/mlx5e: IPSec, Add Innova IPSec offload TX data path") declared mlx5e_ipsec_inverse_table_init() but never implemented it. Commit f52f2faee581 ("net/mlx5e: Introduce flow steering API") declared mlx5e_fs_set_tc() but never implemented it. Commit f2f3df550139 ("net/mlx5: EQ, Privatize eq_table and friends") declared mlx5_eq_comp_cpumask() but never implemented it. Commit cac1eb2cf2e3 ("net/mlx5: Lag, properly lock eswitch if needed") removed mlx5_lag_update() but not its declaration. Commit 35ba005d820b ("net/mlx5: DR, Set flex parser for TNL_MPLS dynamically") removed mlx5dr_ste_build_tnl_mpls() but not its declaration. Commit e126ba97dba9 ("mlx5: Add driver for Mellanox Connect-IB adapters") declared but never implemented mlx5_alloc_cmd_mailbox_chain() and mlx5_free_cmd_mailbox_chain(). Commit 0cf53c124756 ("net/mlx5: FWPage, Use async events chain") removed mlx5_core_req_pages_handler() but not its declaration. Commit 938fe83c8dcb ("net/mlx5_core: New device capabilities handling") removed mlx5_query_odp_caps() but not its declaration. Commit f6a8a19bb11b ("RDMA/netdev: Hoist alloc_netdev_mqs out of the driver") removed mlx5_rdma_netdev_alloc() but not its declaration. Signed-off-by: Yue Haibing Reviewed-by: Leon Romanovsky Reviewed-by: Simon Horman Reviewed-by: Jacob Keller Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/fs.h | 1 - .../net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h | 1 - drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h | 1 - drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h | 2 -- .../net/ethernet/mellanox/mlx5/core/steering/dr_types.h | 4 ---- include/linux/mlx5/driver.h | 14 -------------- 6 files changed, 23 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h index e5a44b0b9616..4d6225e0eec7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h @@ -150,7 +150,6 @@ struct mlx5e_flow_steering *mlx5e_fs_init(const struct mlx5e_profile *profile, struct dentry *dfs_root); void mlx5e_fs_cleanup(struct mlx5e_flow_steering *fs); struct mlx5e_vlan_table *mlx5e_fs_get_vlan(struct mlx5e_flow_steering *fs); -void mlx5e_fs_set_tc(struct mlx5e_flow_steering *fs, struct mlx5e_tc_table *tc); struct mlx5e_tc_table *mlx5e_fs_get_tc(struct mlx5e_flow_steering *fs); struct mlx5e_l2_table *mlx5e_fs_get_l2(struct mlx5e_flow_steering *fs); struct mlx5_flow_namespace *mlx5e_fs_get_ns(struct mlx5e_flow_steering *fs, bool egress); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h index 9ee014a8ad24..2ed99772f168 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h @@ -54,7 +54,6 @@ struct mlx5e_accel_tx_ipsec_state { #ifdef CONFIG_MLX5_EN_IPSEC -void mlx5e_ipsec_inverse_table_init(void); void mlx5e_ipsec_set_iv_esn(struct sk_buff *skb, struct xfrm_state *x, struct xfrm_offload *xo); void mlx5e_ipsec_set_iv(struct sk_buff *skb, struct xfrm_state *x, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h index 69a75459775d..4b7f7131c560 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h @@ -85,7 +85,6 @@ void mlx5_eq_del_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq); struct mlx5_eq_comp *mlx5_eqn2comp_eq(struct mlx5_core_dev *dev, int eqn); struct mlx5_eq *mlx5_get_async_eq(struct mlx5_core_dev *dev); void mlx5_cq_tasklet_cb(struct tasklet_struct *t); -struct cpumask *mlx5_eq_comp_cpumask(struct mlx5_core_dev *dev, int ix); u32 mlx5_eq_poll_irq_disabled(struct mlx5_eq_comp *eq); void mlx5_cmd_eq_recover(struct mlx5_core_dev *dev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index 13af3ae2cce5..6b14e347d914 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -311,8 +311,6 @@ static inline int mlx5_rescan_drivers(struct mlx5_core_dev *dev) return ret; } -void mlx5_lag_update(struct mlx5_core_dev *dev); - enum { MLX5_NIC_IFC_FULL = 0, MLX5_NIC_IFC_DISABLED = 1, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h index 55dc7383477c..81eff6c410ce 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h @@ -436,10 +436,6 @@ void mlx5dr_ste_build_mpls(struct mlx5dr_ste_ctx *ste_ctx, struct mlx5dr_ste_build *sb, struct mlx5dr_match_param *mask, bool inner, bool rx); -void mlx5dr_ste_build_tnl_mpls(struct mlx5dr_ste_ctx *ste_ctx, - struct mlx5dr_ste_build *sb, - struct mlx5dr_match_param *mask, - bool inner, bool rx); void mlx5dr_ste_build_tnl_mpls_over_gre(struct mlx5dr_ste_ctx *ste_ctx, struct mlx5dr_ste_build *sb, struct mlx5dr_match_param *mask, diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index f60cdc9bd40f..d2b8d4a74a30 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -1043,10 +1043,6 @@ void mlx5_trigger_health_work(struct mlx5_core_dev *dev); int mlx5_frag_buf_alloc_node(struct mlx5_core_dev *dev, int size, struct mlx5_frag_buf *buf, int node); void mlx5_frag_buf_free(struct mlx5_core_dev *dev, struct mlx5_frag_buf *buf); -struct mlx5_cmd_mailbox *mlx5_alloc_cmd_mailbox_chain(struct mlx5_core_dev *dev, - gfp_t flags, int npages); -void mlx5_free_cmd_mailbox_chain(struct mlx5_core_dev *dev, - struct mlx5_cmd_mailbox *head); int mlx5_core_create_mkey(struct mlx5_core_dev *dev, u32 *mkey, u32 *in, int inlen); int mlx5_core_destroy_mkey(struct mlx5_core_dev *dev, u32 mkey); @@ -1060,8 +1056,6 @@ void mlx5_pagealloc_start(struct mlx5_core_dev *dev); void mlx5_pagealloc_stop(struct mlx5_core_dev *dev); void mlx5_pages_debugfs_init(struct mlx5_core_dev *dev); void mlx5_pages_debugfs_cleanup(struct mlx5_core_dev *dev); -void mlx5_core_req_pages_handler(struct mlx5_core_dev *dev, u16 func_id, - s32 npages, bool ec_function); int mlx5_satisfy_startup_pages(struct mlx5_core_dev *dev, int boot); int mlx5_reclaim_startup_pages(struct mlx5_core_dev *dev); void mlx5_register_debugfs(void); @@ -1101,8 +1095,6 @@ int mlx5_core_create_psv(struct mlx5_core_dev *dev, u32 pdn, int mlx5_core_destroy_psv(struct mlx5_core_dev *dev, int psv_num); __be32 mlx5_core_get_terminate_scatter_list_mkey(struct mlx5_core_dev *dev); void mlx5_core_put_rsc(struct mlx5_core_rsc_common *common); -int mlx5_query_odp_caps(struct mlx5_core_dev *dev, - struct mlx5_odp_caps *odp_caps); int mlx5_init_rl_table(struct mlx5_core_dev *dev); void mlx5_cleanup_rl_table(struct mlx5_core_dev *dev); @@ -1204,12 +1196,6 @@ int mlx5_sriov_blocking_notifier_register(struct mlx5_core_dev *mdev, void mlx5_sriov_blocking_notifier_unregister(struct mlx5_core_dev *mdev, int vf_id, struct notifier_block *nb); -#ifdef CONFIG_MLX5_CORE_IPOIB -struct net_device *mlx5_rdma_netdev_alloc(struct mlx5_core_dev *mdev, - struct ib_device *ibdev, - const char *name, - void (*setup)(struct net_device *)); -#endif /* CONFIG_MLX5_CORE_IPOIB */ int mlx5_rdma_rn_get_params(struct mlx5_core_dev *mdev, struct ib_device *device, struct rdma_netdev_alloc_params *params); -- cgit v1.2.3 From 57f728d59f005dffdbb52a03531e480a71599bc5 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 31 Jul 2023 22:08:17 -0700 Subject: cpumask: kernel-doc cleanups and additions Clean up some punctutation and abbreviations. Add kernel-doc notation for one function and function return value for 39 functions. cpumask.h: Fix some punctuation (plural vs. possessive). Fix some abbreviations (ie. -> i.e., id -> ID). Fix 35 warnings like this: include/linux/cpumask.h:161: warning: No description found for return value of 'cpumask_first' cpumask.c: Add Return: value for 4 functions. Add kernel-doc for cpumask_any_distribute(). Signed-off-by: Randy Dunlap Reviewed-by: Andy Shevchenko Signed-off-by: Yury Norov --- include/linux/cpumask.h | 113 +++++++++++++++++++++++++++++------------------- lib/cpumask.c | 17 +++++--- 2 files changed, 81 insertions(+), 49 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index f10fb87d49db..cfb545841a2c 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -4,7 +4,7 @@ /* * Cpumasks provide a bitmap suitable for representing the - * set of CPU's in a system, one bit position per CPU number. In general, + * set of CPUs in a system, one bit position per CPU number. In general, * only nr_cpu_ids (<= NR_CPUS) bits are valid. */ #include @@ -97,7 +97,7 @@ static inline void set_nr_cpu_ids(unsigned int nr) * * If !CONFIG_HOTPLUG_CPU, present == possible, and active == online. * - * The cpu_possible_mask is fixed at boot time, as the set of CPU id's + * The cpu_possible_mask is fixed at boot time, as the set of CPU IDs * that it is possible might ever be plugged in at anytime during the * life of that system boot. The cpu_present_mask is dynamic(*), * representing which CPUs are currently plugged in. And @@ -112,7 +112,7 @@ static inline void set_nr_cpu_ids(unsigned int nr) * hotplug, it's a copy of cpu_possible_mask, hence fixed at boot. * * Subtleties: - * 1) UP arch's (NR_CPUS == 1, CONFIG_SMP not defined) hardcode + * 1) UP ARCHes (NR_CPUS == 1, CONFIG_SMP not defined) hardcode * assumption that their single CPU is online. The UP * cpu_{online,possible,present}_masks are placebos. Changing them * will have no useful affect on the following num_*_cpus() @@ -155,7 +155,7 @@ static __always_inline unsigned int cpumask_check(unsigned int cpu) * cpumask_first - get the first cpu in a cpumask * @srcp: the cpumask pointer * - * Returns >= nr_cpu_ids if no cpus set. + * Return: >= nr_cpu_ids if no cpus set. */ static inline unsigned int cpumask_first(const struct cpumask *srcp) { @@ -166,7 +166,7 @@ static inline unsigned int cpumask_first(const struct cpumask *srcp) * cpumask_first_zero - get the first unset cpu in a cpumask * @srcp: the cpumask pointer * - * Returns >= nr_cpu_ids if all cpus are set. + * Return: >= nr_cpu_ids if all cpus are set. */ static inline unsigned int cpumask_first_zero(const struct cpumask *srcp) { @@ -178,7 +178,7 @@ static inline unsigned int cpumask_first_zero(const struct cpumask *srcp) * @srcp1: the first input * @srcp2: the second input * - * Returns >= nr_cpu_ids if no cpus set in both. See also cpumask_next_and(). + * Return: >= nr_cpu_ids if no cpus set in both. See also cpumask_next_and(). */ static inline unsigned int cpumask_first_and(const struct cpumask *srcp1, const struct cpumask *srcp2) @@ -190,7 +190,7 @@ unsigned int cpumask_first_and(const struct cpumask *srcp1, const struct cpumask * cpumask_last - get the last CPU in a cpumask * @srcp: - the cpumask pointer * - * Returns >= nr_cpumask_bits if no CPUs set. + * Return: >= nr_cpumask_bits if no CPUs set. */ static inline unsigned int cpumask_last(const struct cpumask *srcp) { @@ -199,10 +199,10 @@ static inline unsigned int cpumask_last(const struct cpumask *srcp) /** * cpumask_next - get the next cpu in a cpumask - * @n: the cpu prior to the place to search (ie. return will be > @n) + * @n: the cpu prior to the place to search (i.e. return will be > @n) * @srcp: the cpumask pointer * - * Returns >= nr_cpu_ids if no further cpus set. + * Return: >= nr_cpu_ids if no further cpus set. */ static inline unsigned int cpumask_next(int n, const struct cpumask *srcp) @@ -215,10 +215,10 @@ unsigned int cpumask_next(int n, const struct cpumask *srcp) /** * cpumask_next_zero - get the next unset cpu in a cpumask - * @n: the cpu prior to the place to search (ie. return will be > @n) + * @n: the cpu prior to the place to search (i.e. return will be > @n) * @srcp: the cpumask pointer * - * Returns >= nr_cpu_ids if no further cpus unset. + * Return: >= nr_cpu_ids if no further cpus unset. */ static inline unsigned int cpumask_next_zero(int n, const struct cpumask *srcp) { @@ -254,11 +254,11 @@ unsigned int cpumask_any_distribute(const struct cpumask *srcp); /** * cpumask_next_and - get the next cpu in *src1p & *src2p - * @n: the cpu prior to the place to search (ie. return will be > @n) + * @n: the cpu prior to the place to search (i.e. return will be > @n) * @src1p: the first cpumask pointer * @src2p: the second cpumask pointer * - * Returns >= nr_cpu_ids if no further cpus set in both. + * Return: >= nr_cpu_ids if no further cpus set in both. */ static inline unsigned int cpumask_next_and(int n, const struct cpumask *src1p, @@ -373,7 +373,7 @@ unsigned int __pure cpumask_next_wrap(int n, const struct cpumask *mask, int sta * @cpu: the cpu to ignore. * * Often used to find any cpu but smp_processor_id() in a mask. - * Returns >= nr_cpu_ids if no cpus set. + * Return: >= nr_cpu_ids if no cpus set. */ static inline unsigned int cpumask_any_but(const struct cpumask *mask, unsigned int cpu) @@ -388,11 +388,11 @@ unsigned int cpumask_any_but(const struct cpumask *mask, unsigned int cpu) } /** - * cpumask_nth - get the first cpu in a cpumask + * cpumask_nth - get the Nth cpu in a cpumask * @srcp: the cpumask pointer - * @cpu: the N'th cpu to find, starting from 0 + * @cpu: the Nth cpu to find, starting from 0 * - * Returns >= nr_cpu_ids if such cpu doesn't exist. + * Return: >= nr_cpu_ids if such cpu doesn't exist. */ static inline unsigned int cpumask_nth(unsigned int cpu, const struct cpumask *srcp) { @@ -400,12 +400,12 @@ static inline unsigned int cpumask_nth(unsigned int cpu, const struct cpumask *s } /** - * cpumask_nth_and - get the first cpu in 2 cpumasks + * cpumask_nth_and - get the Nth cpu in 2 cpumasks * @srcp1: the cpumask pointer * @srcp2: the cpumask pointer - * @cpu: the N'th cpu to find, starting from 0 + * @cpu: the Nth cpu to find, starting from 0 * - * Returns >= nr_cpu_ids if such cpu doesn't exist. + * Return: >= nr_cpu_ids if such cpu doesn't exist. */ static inline unsigned int cpumask_nth_and(unsigned int cpu, const struct cpumask *srcp1, @@ -416,12 +416,12 @@ unsigned int cpumask_nth_and(unsigned int cpu, const struct cpumask *srcp1, } /** - * cpumask_nth_andnot - get the first cpu set in 1st cpumask, and clear in 2nd. + * cpumask_nth_andnot - get the Nth cpu set in 1st cpumask, and clear in 2nd. * @srcp1: the cpumask pointer * @srcp2: the cpumask pointer - * @cpu: the N'th cpu to find, starting from 0 + * @cpu: the Nth cpu to find, starting from 0 * - * Returns >= nr_cpu_ids if such cpu doesn't exist. + * Return: >= nr_cpu_ids if such cpu doesn't exist. */ static inline unsigned int cpumask_nth_andnot(unsigned int cpu, const struct cpumask *srcp1, @@ -436,9 +436,9 @@ unsigned int cpumask_nth_andnot(unsigned int cpu, const struct cpumask *srcp1, * @srcp1: the cpumask pointer * @srcp2: the cpumask pointer * @srcp3: the cpumask pointer - * @cpu: the N'th cpu to find, starting from 0 + * @cpu: the Nth cpu to find, starting from 0 * - * Returns >= nr_cpu_ids if such cpu doesn't exist. + * Return: >= nr_cpu_ids if such cpu doesn't exist. */ static __always_inline unsigned int cpumask_nth_and_andnot(unsigned int cpu, const struct cpumask *srcp1, @@ -497,7 +497,7 @@ static __always_inline void __cpumask_clear_cpu(int cpu, struct cpumask *dstp) * @cpu: cpu number (< nr_cpu_ids) * @cpumask: the cpumask pointer * - * Returns true if @cpu is set in @cpumask, else returns false + * Return: true if @cpu is set in @cpumask, else returns false */ static __always_inline bool cpumask_test_cpu(int cpu, const struct cpumask *cpumask) { @@ -509,9 +509,9 @@ static __always_inline bool cpumask_test_cpu(int cpu, const struct cpumask *cpum * @cpu: cpu number (< nr_cpu_ids) * @cpumask: the cpumask pointer * - * Returns true if @cpu is set in old bitmap of @cpumask, else returns false - * * test_and_set_bit wrapper for cpumasks. + * + * Return: true if @cpu is set in old bitmap of @cpumask, else returns false */ static __always_inline bool cpumask_test_and_set_cpu(int cpu, struct cpumask *cpumask) { @@ -523,9 +523,9 @@ static __always_inline bool cpumask_test_and_set_cpu(int cpu, struct cpumask *cp * @cpu: cpu number (< nr_cpu_ids) * @cpumask: the cpumask pointer * - * Returns true if @cpu is set in old bitmap of @cpumask, else returns false - * * test_and_clear_bit wrapper for cpumasks. + * + * Return: true if @cpu is set in old bitmap of @cpumask, else returns false */ static __always_inline bool cpumask_test_and_clear_cpu(int cpu, struct cpumask *cpumask) { @@ -560,7 +560,7 @@ static inline void cpumask_clear(struct cpumask *dstp) * @src1p: the first input * @src2p: the second input * - * If *@dstp is empty, returns false, else returns true + * Return: false if *@dstp is empty, else returns true */ static inline bool cpumask_and(struct cpumask *dstp, const struct cpumask *src1p, @@ -603,7 +603,7 @@ static inline void cpumask_xor(struct cpumask *dstp, * @src1p: the first input * @src2p: the second input * - * If *@dstp is empty, returns false, else returns true + * Return: false if *@dstp is empty, else returns true */ static inline bool cpumask_andnot(struct cpumask *dstp, const struct cpumask *src1p, @@ -617,6 +617,8 @@ static inline bool cpumask_andnot(struct cpumask *dstp, * cpumask_equal - *src1p == *src2p * @src1p: the first input * @src2p: the second input + * + * Return: true if the cpumasks are equal, false if not */ static inline bool cpumask_equal(const struct cpumask *src1p, const struct cpumask *src2p) @@ -630,6 +632,9 @@ static inline bool cpumask_equal(const struct cpumask *src1p, * @src1p: the first input * @src2p: the second input * @src3p: the third input + * + * Return: true if first cpumask ORed with second cpumask == third cpumask, + * otherwise false */ static inline bool cpumask_or_equal(const struct cpumask *src1p, const struct cpumask *src2p, @@ -643,6 +648,9 @@ static inline bool cpumask_or_equal(const struct cpumask *src1p, * cpumask_intersects - (*src1p & *src2p) != 0 * @src1p: the first input * @src2p: the second input + * + * Return: true if first cpumask ANDed with second cpumask is non-empty, + * otherwise false */ static inline bool cpumask_intersects(const struct cpumask *src1p, const struct cpumask *src2p) @@ -656,7 +664,7 @@ static inline bool cpumask_intersects(const struct cpumask *src1p, * @src1p: the first input * @src2p: the second input * - * Returns true if *@src1p is a subset of *@src2p, else returns false + * Return: true if *@src1p is a subset of *@src2p, else returns false */ static inline bool cpumask_subset(const struct cpumask *src1p, const struct cpumask *src2p) @@ -668,6 +676,8 @@ static inline bool cpumask_subset(const struct cpumask *src1p, /** * cpumask_empty - *srcp == 0 * @srcp: the cpumask to that all cpus < nr_cpu_ids are clear. + * + * Return: true if srcp is empty (has no bits set), else false */ static inline bool cpumask_empty(const struct cpumask *srcp) { @@ -677,6 +687,8 @@ static inline bool cpumask_empty(const struct cpumask *srcp) /** * cpumask_full - *srcp == 0xFFFFFFFF... * @srcp: the cpumask to that all cpus < nr_cpu_ids are set. + * + * Return: true if srcp is full (has all bits set), else false */ static inline bool cpumask_full(const struct cpumask *srcp) { @@ -686,6 +698,8 @@ static inline bool cpumask_full(const struct cpumask *srcp) /** * cpumask_weight - Count of bits in *srcp * @srcp: the cpumask to count bits (< nr_cpu_ids) in. + * + * Return: count of bits set in *srcp */ static inline unsigned int cpumask_weight(const struct cpumask *srcp) { @@ -696,6 +710,8 @@ static inline unsigned int cpumask_weight(const struct cpumask *srcp) * cpumask_weight_and - Count of bits in (*srcp1 & *srcp2) * @srcp1: the cpumask to count bits (< nr_cpu_ids) in. * @srcp2: the cpumask to count bits (< nr_cpu_ids) in. + * + * Return: count of bits set in both *srcp1 and *srcp2 */ static inline unsigned int cpumask_weight_and(const struct cpumask *srcp1, const struct cpumask *srcp2) @@ -744,7 +760,7 @@ static inline void cpumask_copy(struct cpumask *dstp, * cpumask_any - pick a "random" cpu from *srcp * @srcp: the input cpumask * - * Returns >= nr_cpu_ids if no cpus set. + * Return: >= nr_cpu_ids if no cpus set. */ #define cpumask_any(srcp) cpumask_first(srcp) @@ -753,7 +769,7 @@ static inline void cpumask_copy(struct cpumask *dstp, * @mask1: the first input cpumask * @mask2: the second input cpumask * - * Returns >= nr_cpu_ids if no cpus set. + * Return: >= nr_cpu_ids if no cpus set. */ #define cpumask_any_and(mask1, mask2) cpumask_first_and((mask1), (mask2)) @@ -769,7 +785,7 @@ static inline void cpumask_copy(struct cpumask *dstp, * @len: the length of the buffer * @dstp: the cpumask to set. * - * Returns -errno, or 0 for success. + * Return: -errno, or 0 for success. */ static inline int cpumask_parse_user(const char __user *buf, int len, struct cpumask *dstp) @@ -783,7 +799,7 @@ static inline int cpumask_parse_user(const char __user *buf, int len, * @len: the length of the buffer * @dstp: the cpumask to set. * - * Returns -errno, or 0 for success. + * Return: -errno, or 0 for success. */ static inline int cpumask_parselist_user(const char __user *buf, int len, struct cpumask *dstp) @@ -797,7 +813,7 @@ static inline int cpumask_parselist_user(const char __user *buf, int len, * @buf: the buffer to extract from * @dstp: the cpumask to set. * - * Returns -errno, or 0 for success. + * Return: -errno, or 0 for success. */ static inline int cpumask_parse(const char *buf, struct cpumask *dstp) { @@ -809,7 +825,7 @@ static inline int cpumask_parse(const char *buf, struct cpumask *dstp) * @buf: the buffer to extract from * @dstp: the cpumask to set. * - * Returns -errno, or 0 for success. + * Return: -errno, or 0 for success. */ static inline int cpulist_parse(const char *buf, struct cpumask *dstp) { @@ -817,7 +833,9 @@ static inline int cpulist_parse(const char *buf, struct cpumask *dstp) } /** - * cpumask_size - size to allocate for a 'struct cpumask' in bytes + * cpumask_size - calculate size to allocate for a 'struct cpumask' in bytes + * + * Return: size to allocate for a &struct cpumask in bytes */ static inline unsigned int cpumask_size(void) { @@ -831,7 +849,7 @@ static inline unsigned int cpumask_size(void) * little more difficult, we typedef cpumask_var_t to an array or a * pointer: doing &mask on an array is a noop, so it still works. * - * ie. + * i.e. * cpumask_var_t tmpmask; * if (!alloc_cpumask_var(&tmpmask, GFP_KERNEL)) * return -ENOMEM; @@ -887,6 +905,8 @@ bool zalloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags, int node) * a nop returning a constant 1 (in ). * * See alloc_cpumask_var_node. + * + * Return: %true if allocation succeeded, %false if not */ static inline bool alloc_cpumask_var(cpumask_var_t *mask, gfp_t flags) @@ -1025,7 +1045,7 @@ set_cpu_dying(unsigned int cpu, bool dying) } /** - * to_cpumask - convert an NR_CPUS bitmap to a struct cpumask * + * to_cpumask - convert a NR_CPUS bitmap to a struct cpumask * * @bitmap: the bitmap * * There are a few places where cpumask_var_t isn't appropriate and @@ -1068,6 +1088,8 @@ static inline const struct cpumask *get_cpu_mask(unsigned int cpu) * interface gives only a momentary snapshot and is not protected against * concurrent CPU hotplug operations unless invoked from a cpuhp_lock held * region. + * + * Return: momentary snapshot of the number of online CPUs */ static __always_inline unsigned int num_online_cpus(void) { @@ -1160,7 +1182,7 @@ static inline bool cpu_dying(unsigned int cpu) * @mask: the cpumask to copy * @buf: the buffer to copy into * - * Returns the length of the (null-terminated) @buf string, zero if + * Return: the length of the (null-terminated) @buf string, zero if * nothing is copied. */ static inline ssize_t @@ -1183,7 +1205,7 @@ cpumap_print_to_pagebuf(bool list, char *buf, const struct cpumask *mask) * cpumask; Typically used by bin_attribute to export cpumask bitmask * ABI. * - * Returns the length of how many bytes have been copied, excluding + * Return: the length of how many bytes have been copied, excluding * terminating '\0'. */ static inline ssize_t @@ -1204,6 +1226,9 @@ cpumap_print_bitmask_to_buf(char *buf, const struct cpumask *mask, * * Everything is same with the above cpumap_print_bitmask_to_buf() * except the print format. + * + * Return: the length of how many bytes have been copied, excluding + * terminating '\0'. */ static inline ssize_t cpumap_print_list_to_buf(char *buf, const struct cpumask *mask, diff --git a/lib/cpumask.c b/lib/cpumask.c index a7fd02b5ae26..19277c6d551f 100644 --- a/lib/cpumask.c +++ b/lib/cpumask.c @@ -14,7 +14,7 @@ * @start: the start point of the iteration * @wrap: assume @n crossing @start terminates the iteration * - * Returns >= nr_cpu_ids on completion + * Return: >= nr_cpu_ids on completion * * Note: the @wrap argument is required for the start condition when * we cannot assume @start is set in @mask. @@ -48,8 +48,9 @@ EXPORT_SYMBOL(cpumask_next_wrap); * @node: memory node from which to allocate or %NUMA_NO_NODE * * Only defined when CONFIG_CPUMASK_OFFSTACK=y, otherwise is - * a nop returning a constant 1 (in ) - * Returns TRUE if memory allocation succeeded, FALSE otherwise. + * a nop returning a constant 1 (in ). + * + * Return: TRUE if memory allocation succeeded, FALSE otherwise. * * In addition, mask will be NULL if this fails. Note that gcc is * usually smart enough to know that mask can never be NULL if @@ -115,7 +116,7 @@ void __init free_bootmem_cpumask_var(cpumask_var_t mask) * @i: index number * @node: local numa_node * - * Returns online CPU according to a numa aware policy; local cpus are returned + * Return: online CPU according to a numa aware policy; local cpus are returned * first, followed by non-local ones, then it wraps around. * * For those who wants to enumerate all CPUs based on their NUMA distances, @@ -165,7 +166,7 @@ static DEFINE_PER_CPU(int, distribute_cpu_mask_prev); * Iterated calls using the same srcp1 and srcp2 will be distributed within * their intersection. * - * Returns >= nr_cpu_ids if the intersection is empty. + * Return: >= nr_cpu_ids if the intersection is empty. */ unsigned int cpumask_any_and_distribute(const struct cpumask *src1p, const struct cpumask *src2p) @@ -184,6 +185,12 @@ unsigned int cpumask_any_and_distribute(const struct cpumask *src1p, } EXPORT_SYMBOL(cpumask_any_and_distribute); +/** + * cpumask_any_distribute - Return an arbitrary cpu from srcp + * @srcp: &cpumask for selection + * + * Return: >= nr_cpu_ids if the intersection is empty. + */ unsigned int cpumask_any_distribute(const struct cpumask *srcp) { unsigned int next, prev; -- cgit v1.2.3 From 7733aa893847f021c674d0d30b723d892109369d Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 17 Aug 2023 19:20:54 +0300 Subject: bitmap: Remove dead code, i.e. bitmap_copy_le() Besides the fact it's not used anywhere it should be implemented differently, i.e. via helpers from linux/byteorder/generic.h. Yet the helpers themselves need to be introduced first. Also note, the function lacks of the test cases, they must be provided. Hence, drop the current dead code for good. Signed-off-by: Andy Shevchenko Signed-off-by: Yury Norov --- include/linux/bitmap.h | 5 ----- lib/bitmap.c | 23 ----------------------- 2 files changed, 28 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h index 03644237e1ef..1516ff979315 100644 --- a/include/linux/bitmap.h +++ b/include/linux/bitmap.h @@ -220,11 +220,6 @@ int bitmap_find_free_region(unsigned long *bitmap, unsigned int bits, int order) void bitmap_release_region(unsigned long *bitmap, unsigned int pos, int order); int bitmap_allocate_region(unsigned long *bitmap, unsigned int pos, int order); -#ifdef __BIG_ENDIAN -void bitmap_copy_le(unsigned long *dst, const unsigned long *src, unsigned int nbits); -#else -#define bitmap_copy_le bitmap_copy -#endif int bitmap_print_to_pagebuf(bool list, char *buf, const unsigned long *maskp, int nmaskbits); diff --git a/lib/bitmap.c b/lib/bitmap.c index 24284caadbcc..935e0f96e785 100644 --- a/lib/bitmap.c +++ b/lib/bitmap.c @@ -1360,29 +1360,6 @@ int bitmap_allocate_region(unsigned long *bitmap, unsigned int pos, int order) } EXPORT_SYMBOL(bitmap_allocate_region); -/** - * bitmap_copy_le - copy a bitmap, putting the bits into little-endian order. - * @dst: destination buffer - * @src: bitmap to copy - * @nbits: number of bits in the bitmap - * - * Require nbits % BITS_PER_LONG == 0. - */ -#ifdef __BIG_ENDIAN -void bitmap_copy_le(unsigned long *dst, const unsigned long *src, unsigned int nbits) -{ - unsigned int i; - - for (i = 0; i < nbits/BITS_PER_LONG; i++) { - if (BITS_PER_LONG == 64) - dst[i] = cpu_to_le64(src[i]); - else - dst[i] = cpu_to_le32(src[i]); - } -} -EXPORT_SYMBOL(bitmap_copy_le); -#endif - unsigned long *bitmap_alloc(unsigned int nbits, gfp_t flags) { return kmalloc_array(BITS_TO_LONGS(nbits), sizeof(unsigned long), -- cgit v1.2.3 From aae06fc1b5a2e4b52f8504a1f12f9b8b98e80641 Mon Sep 17 00:00:00 2001 From: Yury Norov Date: Sat, 7 Oct 2023 16:35:10 -0700 Subject: lib/bitmap: split-out string-related operations to a separate files lib/bitmap.c and corresponding include/linux/bitmap.h are intended to hold functions related to operations on bitmaps, like bitmap_shift or bitmap_set. Historically, some string-related operations like bitmap_parse are also reside in lib/bitmap.c. Now that the subsystem evolves, string-related bitmap operations became a significant part of the file. Because they are quite different from the other bitmap functions by nature, it's worth to split them to a separate source/header files. CC: Andrew Morton CC: Andy Shevchenko CC: Rasmus Villemoes Signed-off-by: Yury Norov --- MAINTAINERS | 2 + include/linux/bitmap-str.h | 16 ++ include/linux/bitmap.h | 18 +- lib/Makefile | 2 +- lib/bitmap-str.c | 510 ++++++++++++++++++++++++++++++++++++++++++++ lib/bitmap.c | 512 --------------------------------------------- 6 files changed, 530 insertions(+), 530 deletions(-) create mode 100644 include/linux/bitmap-str.h create mode 100644 lib/bitmap-str.c (limited to 'include/linux') diff --git a/MAINTAINERS b/MAINTAINERS index 6c4cce45a09d..561def8239f9 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3506,12 +3506,14 @@ R: Andy Shevchenko R: Rasmus Villemoes S: Maintained F: include/linux/bitfield.h +F: include/linux/bitmap-str.h F: include/linux/bitmap.h F: include/linux/bits.h F: include/linux/cpumask.h F: include/linux/find.h F: include/linux/nodemask.h F: include/vdso/bits.h +F: lib/bitmap-str.c F: lib/bitmap.c F: lib/cpumask.c F: lib/cpumask_kunit.c diff --git a/include/linux/bitmap-str.h b/include/linux/bitmap-str.h new file mode 100644 index 000000000000..17caeca94cab --- /dev/null +++ b/include/linux/bitmap-str.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __LINUX_BITMAP_STR_H +#define __LINUX_BITMAP_STR_H + +int bitmap_parse_user(const char __user *ubuf, unsigned int ulen, unsigned long *dst, int nbits); +int bitmap_print_to_pagebuf(bool list, char *buf, const unsigned long *maskp, int nmaskbits); +extern int bitmap_print_bitmask_to_buf(char *buf, const unsigned long *maskp, + int nmaskbits, loff_t off, size_t count); +extern int bitmap_print_list_to_buf(char *buf, const unsigned long *maskp, + int nmaskbits, loff_t off, size_t count); +int bitmap_parse(const char *buf, unsigned int buflen, unsigned long *dst, int nbits); +int bitmap_parselist(const char *buf, unsigned long *maskp, int nmaskbits); +int bitmap_parselist_user(const char __user *ubuf, unsigned int ulen, + unsigned long *dst, int nbits); + +#endif /* __LINUX_BITMAP_STR_H */ diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h index 1516ff979315..1cca950a54ae 100644 --- a/include/linux/bitmap.h +++ b/include/linux/bitmap.h @@ -10,6 +10,7 @@ #include #include #include +#include struct device; @@ -200,14 +201,6 @@ bitmap_find_next_zero_area(unsigned long *map, align_mask, 0); } -int bitmap_parse(const char *buf, unsigned int buflen, - unsigned long *dst, int nbits); -int bitmap_parse_user(const char __user *ubuf, unsigned int ulen, - unsigned long *dst, int nbits); -int bitmap_parselist(const char *buf, unsigned long *maskp, - int nmaskbits); -int bitmap_parselist_user(const char __user *ubuf, unsigned int ulen, - unsigned long *dst, int nbits); void bitmap_remap(unsigned long *dst, const unsigned long *src, const unsigned long *old, const unsigned long *new, unsigned int nbits); int bitmap_bitremap(int oldbit, @@ -220,15 +213,6 @@ int bitmap_find_free_region(unsigned long *bitmap, unsigned int bits, int order) void bitmap_release_region(unsigned long *bitmap, unsigned int pos, int order); int bitmap_allocate_region(unsigned long *bitmap, unsigned int pos, int order); -int bitmap_print_to_pagebuf(bool list, char *buf, - const unsigned long *maskp, int nmaskbits); - -extern int bitmap_print_bitmask_to_buf(char *buf, const unsigned long *maskp, - int nmaskbits, loff_t off, size_t count); - -extern int bitmap_print_list_to_buf(char *buf, const unsigned long *maskp, - int nmaskbits, loff_t off, size_t count); - #define BITMAP_FIRST_WORD_MASK(start) (~0UL << ((start) & (BITS_PER_LONG - 1))) #define BITMAP_LAST_WORD_MASK(nbits) (~0UL >> (-(nbits) & (BITS_PER_LONG - 1))) diff --git a/lib/Makefile b/lib/Makefile index 740109b6e2c8..9e8f9f6dd3b2 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -48,7 +48,7 @@ obj-y += bcd.o sort.o parser.o debug_locks.o random32.o \ bsearch.o find_bit.o llist.o memweight.o kfifo.o \ percpu-refcount.o rhashtable.o base64.o \ once.o refcount.o rcuref.o usercopy.o errseq.o bucket_locks.o \ - generic-radix-tree.o + generic-radix-tree.o bitmap-str.o obj-$(CONFIG_STRING_SELFTEST) += test_string.o obj-y += string_helpers.o obj-$(CONFIG_TEST_STRING_HELPERS) += test-string_helpers.o diff --git a/lib/bitmap-str.c b/lib/bitmap-str.c new file mode 100644 index 000000000000..be745209507a --- /dev/null +++ b/lib/bitmap-str.c @@ -0,0 +1,510 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "kstrtox.h" + +/** + * bitmap_parse_user - convert an ASCII hex string in a user buffer into a bitmap + * + * @ubuf: pointer to user buffer containing string. + * @ulen: buffer size in bytes. If string is smaller than this + * then it must be terminated with a \0. + * @maskp: pointer to bitmap array that will contain result. + * @nmaskbits: size of bitmap, in bits. + */ +int bitmap_parse_user(const char __user *ubuf, + unsigned int ulen, unsigned long *maskp, + int nmaskbits) +{ + char *buf; + int ret; + + buf = memdup_user_nul(ubuf, ulen); + if (IS_ERR(buf)) + return PTR_ERR(buf); + + ret = bitmap_parse(buf, UINT_MAX, maskp, nmaskbits); + + kfree(buf); + return ret; +} +EXPORT_SYMBOL(bitmap_parse_user); + +/** + * bitmap_print_to_pagebuf - convert bitmap to list or hex format ASCII string + * @list: indicates whether the bitmap must be list + * @buf: page aligned buffer into which string is placed + * @maskp: pointer to bitmap to convert + * @nmaskbits: size of bitmap, in bits + * + * Output format is a comma-separated list of decimal numbers and + * ranges if list is specified or hex digits grouped into comma-separated + * sets of 8 digits/set. Returns the number of characters written to buf. + * + * It is assumed that @buf is a pointer into a PAGE_SIZE, page-aligned + * area and that sufficient storage remains at @buf to accommodate the + * bitmap_print_to_pagebuf() output. Returns the number of characters + * actually printed to @buf, excluding terminating '\0'. + */ +int bitmap_print_to_pagebuf(bool list, char *buf, const unsigned long *maskp, + int nmaskbits) +{ + ptrdiff_t len = PAGE_SIZE - offset_in_page(buf); + + return list ? scnprintf(buf, len, "%*pbl\n", nmaskbits, maskp) : + scnprintf(buf, len, "%*pb\n", nmaskbits, maskp); +} +EXPORT_SYMBOL(bitmap_print_to_pagebuf); + +/** + * bitmap_print_to_buf - convert bitmap to list or hex format ASCII string + * @list: indicates whether the bitmap must be list + * true: print in decimal list format + * false: print in hexadecimal bitmask format + * @buf: buffer into which string is placed + * @maskp: pointer to bitmap to convert + * @nmaskbits: size of bitmap, in bits + * @off: in the string from which we are copying, We copy to @buf + * @count: the maximum number of bytes to print + */ +static int bitmap_print_to_buf(bool list, char *buf, const unsigned long *maskp, + int nmaskbits, loff_t off, size_t count) +{ + const char *fmt = list ? "%*pbl\n" : "%*pb\n"; + ssize_t size; + void *data; + + data = kasprintf(GFP_KERNEL, fmt, nmaskbits, maskp); + if (!data) + return -ENOMEM; + + size = memory_read_from_buffer(buf, count, &off, data, strlen(data) + 1); + kfree(data); + + return size; +} + +/** + * bitmap_print_bitmask_to_buf - convert bitmap to hex bitmask format ASCII string + * @buf: buffer into which string is placed + * @maskp: pointer to bitmap to convert + * @nmaskbits: size of bitmap, in bits + * @off: in the string from which we are copying, We copy to @buf + * @count: the maximum number of bytes to print + * + * The bitmap_print_to_pagebuf() is used indirectly via its cpumap wrapper + * cpumap_print_to_pagebuf() or directly by drivers to export hexadecimal + * bitmask and decimal list to userspace by sysfs ABI. + * Drivers might be using a normal attribute for this kind of ABIs. A + * normal attribute typically has show entry as below:: + * + * static ssize_t example_attribute_show(struct device *dev, + * struct device_attribute *attr, char *buf) + * { + * ... + * return bitmap_print_to_pagebuf(true, buf, &mask, nr_trig_max); + * } + * + * show entry of attribute has no offset and count parameters and this + * means the file is limited to one page only. + * bitmap_print_to_pagebuf() API works terribly well for this kind of + * normal attribute with buf parameter and without offset, count:: + * + * bitmap_print_to_pagebuf(bool list, char *buf, const unsigned long *maskp, + * int nmaskbits) + * { + * } + * + * The problem is once we have a large bitmap, we have a chance to get a + * bitmask or list more than one page. Especially for list, it could be + * as complex as 0,3,5,7,9,... We have no simple way to know it exact size. + * It turns out bin_attribute is a way to break this limit. bin_attribute + * has show entry as below:: + * + * static ssize_t + * example_bin_attribute_show(struct file *filp, struct kobject *kobj, + * struct bin_attribute *attr, char *buf, + * loff_t offset, size_t count) + * { + * ... + * } + * + * With the new offset and count parameters, this makes sysfs ABI be able + * to support file size more than one page. For example, offset could be + * >= 4096. + * bitmap_print_bitmask_to_buf(), bitmap_print_list_to_buf() wit their + * cpumap wrapper cpumap_print_bitmask_to_buf(), cpumap_print_list_to_buf() + * make those drivers be able to support large bitmask and list after they + * move to use bin_attribute. In result, we have to pass the corresponding + * parameters such as off, count from bin_attribute show entry to this API. + * + * The role of cpumap_print_bitmask_to_buf() and cpumap_print_list_to_buf() + * is similar with cpumap_print_to_pagebuf(), the difference is that + * bitmap_print_to_pagebuf() mainly serves sysfs attribute with the assumption + * the destination buffer is exactly one page and won't be more than one page. + * cpumap_print_bitmask_to_buf() and cpumap_print_list_to_buf(), on the other + * hand, mainly serves bin_attribute which doesn't work with exact one page, + * and it can break the size limit of converted decimal list and hexadecimal + * bitmask. + * + * WARNING! + * + * This function is not a replacement for sprintf() or bitmap_print_to_pagebuf(). + * It is intended to workaround sysfs limitations discussed above and should be + * used carefully in general case for the following reasons: + * + * - Time complexity is O(nbits^2/count), comparing to O(nbits) for snprintf(). + * - Memory complexity is O(nbits), comparing to O(1) for snprintf(). + * - @off and @count are NOT offset and number of bits to print. + * - If printing part of bitmap as list, the resulting string is not a correct + * list representation of bitmap. Particularly, some bits within or out of + * related interval may be erroneously set or unset. The format of the string + * may be broken, so bitmap_parselist-like parser may fail parsing it. + * - If printing the whole bitmap as list by parts, user must ensure the order + * of calls of the function such that the offset is incremented linearly. + * - If printing the whole bitmap as list by parts, user must keep bitmap + * unchanged between the very first and very last call. Otherwise concatenated + * result may be incorrect, and format may be broken. + * + * Returns the number of characters actually printed to @buf + */ +int bitmap_print_bitmask_to_buf(char *buf, const unsigned long *maskp, + int nmaskbits, loff_t off, size_t count) +{ + return bitmap_print_to_buf(false, buf, maskp, nmaskbits, off, count); +} +EXPORT_SYMBOL(bitmap_print_bitmask_to_buf); + +/** + * bitmap_print_list_to_buf - convert bitmap to decimal list format ASCII string + * @buf: buffer into which string is placed + * @maskp: pointer to bitmap to convert + * @nmaskbits: size of bitmap, in bits + * @off: in the string from which we are copying, We copy to @buf + * @count: the maximum number of bytes to print + * + * Everything is same with the above bitmap_print_bitmask_to_buf() except + * the print format. + */ +int bitmap_print_list_to_buf(char *buf, const unsigned long *maskp, + int nmaskbits, loff_t off, size_t count) +{ + return bitmap_print_to_buf(true, buf, maskp, nmaskbits, off, count); +} +EXPORT_SYMBOL(bitmap_print_list_to_buf); + +/* + * Region 9-38:4/10 describes the following bitmap structure: + * 0 9 12 18 38 N + * .........****......****......****.................. + * ^ ^ ^ ^ ^ + * start off group_len end nbits + */ +struct region { + unsigned int start; + unsigned int off; + unsigned int group_len; + unsigned int end; + unsigned int nbits; +}; + +static void bitmap_set_region(const struct region *r, unsigned long *bitmap) +{ + unsigned int start; + + for (start = r->start; start <= r->end; start += r->group_len) + bitmap_set(bitmap, start, min(r->end - start + 1, r->off)); +} + +static int bitmap_check_region(const struct region *r) +{ + if (r->start > r->end || r->group_len == 0 || r->off > r->group_len) + return -EINVAL; + + if (r->end >= r->nbits) + return -ERANGE; + + return 0; +} + +static const char *bitmap_getnum(const char *str, unsigned int *num, + unsigned int lastbit) +{ + unsigned long long n; + unsigned int len; + + if (str[0] == 'N') { + *num = lastbit; + return str + 1; + } + + len = _parse_integer(str, 10, &n); + if (!len) + return ERR_PTR(-EINVAL); + if (len & KSTRTOX_OVERFLOW || n != (unsigned int)n) + return ERR_PTR(-EOVERFLOW); + + *num = n; + return str + len; +} + +static inline bool end_of_str(char c) +{ + return c == '\0' || c == '\n'; +} + +static inline bool __end_of_region(char c) +{ + return isspace(c) || c == ','; +} + +static inline bool end_of_region(char c) +{ + return __end_of_region(c) || end_of_str(c); +} + +/* + * The format allows commas and whitespaces at the beginning + * of the region. + */ +static const char *bitmap_find_region(const char *str) +{ + while (__end_of_region(*str)) + str++; + + return end_of_str(*str) ? NULL : str; +} + +static const char *bitmap_find_region_reverse(const char *start, const char *end) +{ + while (start <= end && __end_of_region(*end)) + end--; + + return end; +} + +static const char *bitmap_parse_region(const char *str, struct region *r) +{ + unsigned int lastbit = r->nbits - 1; + + if (!strncasecmp(str, "all", 3)) { + r->start = 0; + r->end = lastbit; + str += 3; + + goto check_pattern; + } + + str = bitmap_getnum(str, &r->start, lastbit); + if (IS_ERR(str)) + return str; + + if (end_of_region(*str)) + goto no_end; + + if (*str != '-') + return ERR_PTR(-EINVAL); + + str = bitmap_getnum(str + 1, &r->end, lastbit); + if (IS_ERR(str)) + return str; + +check_pattern: + if (end_of_region(*str)) + goto no_pattern; + + if (*str != ':') + return ERR_PTR(-EINVAL); + + str = bitmap_getnum(str + 1, &r->off, lastbit); + if (IS_ERR(str)) + return str; + + if (*str != '/') + return ERR_PTR(-EINVAL); + + return bitmap_getnum(str + 1, &r->group_len, lastbit); + +no_end: + r->end = r->start; +no_pattern: + r->off = r->end + 1; + r->group_len = r->end + 1; + + return end_of_str(*str) ? NULL : str; +} + +/** + * bitmap_parselist - convert list format ASCII string to bitmap + * @buf: read user string from this buffer; must be terminated + * with a \0 or \n. + * @maskp: write resulting mask here + * @nmaskbits: number of bits in mask to be written + * + * Input format is a comma-separated list of decimal numbers and + * ranges. Consecutively set bits are shown as two hyphen-separated + * decimal numbers, the smallest and largest bit numbers set in + * the range. + * Optionally each range can be postfixed to denote that only parts of it + * should be set. The range will divided to groups of specific size. + * From each group will be used only defined amount of bits. + * Syntax: range:used_size/group_size + * Example: 0-1023:2/256 ==> 0,1,256,257,512,513,768,769 + * The value 'N' can be used as a dynamically substituted token for the + * maximum allowed value; i.e (nmaskbits - 1). Keep in mind that it is + * dynamic, so if system changes cause the bitmap width to change, such + * as more cores in a CPU list, then any ranges using N will also change. + * + * Returns: 0 on success, -errno on invalid input strings. Error values: + * + * - ``-EINVAL``: wrong region format + * - ``-EINVAL``: invalid character in string + * - ``-ERANGE``: bit number specified too large for mask + * - ``-EOVERFLOW``: integer overflow in the input parameters + */ +int bitmap_parselist(const char *buf, unsigned long *maskp, int nmaskbits) +{ + struct region r; + long ret; + + r.nbits = nmaskbits; + bitmap_zero(maskp, r.nbits); + + while (buf) { + buf = bitmap_find_region(buf); + if (buf == NULL) + return 0; + + buf = bitmap_parse_region(buf, &r); + if (IS_ERR(buf)) + return PTR_ERR(buf); + + ret = bitmap_check_region(&r); + if (ret) + return ret; + + bitmap_set_region(&r, maskp); + } + + return 0; +} +EXPORT_SYMBOL(bitmap_parselist); + + +/** + * bitmap_parselist_user() - convert user buffer's list format ASCII + * string to bitmap + * + * @ubuf: pointer to user buffer containing string. + * @ulen: buffer size in bytes. If string is smaller than this + * then it must be terminated with a \0. + * @maskp: pointer to bitmap array that will contain result. + * @nmaskbits: size of bitmap, in bits. + * + * Wrapper for bitmap_parselist(), providing it with user buffer. + */ +int bitmap_parselist_user(const char __user *ubuf, + unsigned int ulen, unsigned long *maskp, + int nmaskbits) +{ + char *buf; + int ret; + + buf = memdup_user_nul(ubuf, ulen); + if (IS_ERR(buf)) + return PTR_ERR(buf); + + ret = bitmap_parselist(buf, maskp, nmaskbits); + + kfree(buf); + return ret; +} +EXPORT_SYMBOL(bitmap_parselist_user); + +static const char *bitmap_get_x32_reverse(const char *start, + const char *end, u32 *num) +{ + u32 ret = 0; + int c, i; + + for (i = 0; i < 32; i += 4) { + c = hex_to_bin(*end--); + if (c < 0) + return ERR_PTR(-EINVAL); + + ret |= c << i; + + if (start > end || __end_of_region(*end)) + goto out; + } + + if (hex_to_bin(*end--) >= 0) + return ERR_PTR(-EOVERFLOW); +out: + *num = ret; + return end; +} + +/** + * bitmap_parse - convert an ASCII hex string into a bitmap. + * @start: pointer to buffer containing string. + * @buflen: buffer size in bytes. If string is smaller than this + * then it must be terminated with a \0 or \n. In that case, + * UINT_MAX may be provided instead of string length. + * @maskp: pointer to bitmap array that will contain result. + * @nmaskbits: size of bitmap, in bits. + * + * Commas group hex digits into chunks. Each chunk defines exactly 32 + * bits of the resultant bitmask. No chunk may specify a value larger + * than 32 bits (%-EOVERFLOW), and if a chunk specifies a smaller value + * then leading 0-bits are prepended. %-EINVAL is returned for illegal + * characters. Grouping such as "1,,5", ",44", "," or "" is allowed. + * Leading, embedded and trailing whitespace accepted. + */ +int bitmap_parse(const char *start, unsigned int buflen, + unsigned long *maskp, int nmaskbits) +{ + const char *end = strnchrnul(start, buflen, '\n') - 1; + int chunks = BITS_TO_U32(nmaskbits); + u32 *bitmap = (u32 *)maskp; + int unset_bit; + int chunk; + + for (chunk = 0; ; chunk++) { + end = bitmap_find_region_reverse(start, end); + if (start > end) + break; + + if (!chunks--) + return -EOVERFLOW; + +#if defined(CONFIG_64BIT) && defined(__BIG_ENDIAN) + end = bitmap_get_x32_reverse(start, end, &bitmap[chunk ^ 1]); +#else + end = bitmap_get_x32_reverse(start, end, &bitmap[chunk]); +#endif + if (IS_ERR(end)) + return PTR_ERR(end); + } + + unset_bit = (BITS_TO_U32(nmaskbits) - chunks) * 32; + if (unset_bit < nmaskbits) { + bitmap_clear(maskp, unset_bit, nmaskbits - unset_bit); + return 0; + } + + if (find_next_bit(maskp, unset_bit, nmaskbits) != unset_bit) + return -EOVERFLOW; + + return 0; +} +EXPORT_SYMBOL(bitmap_parse); diff --git a/lib/bitmap.c b/lib/bitmap.c index 935e0f96e785..abc5579768e9 100644 --- a/lib/bitmap.c +++ b/lib/bitmap.c @@ -6,21 +6,11 @@ #include #include -#include #include #include #include #include -#include -#include #include -#include -#include -#include - -#include - -#include "kstrtox.h" /** * DOC: bitmap introduction @@ -440,508 +430,6 @@ again: } EXPORT_SYMBOL(bitmap_find_next_zero_area_off); -/* - * Bitmap printing & parsing functions: first version by Nadia Yvette Chambers, - * second version by Paul Jackson, third by Joe Korty. - */ - -/** - * bitmap_parse_user - convert an ASCII hex string in a user buffer into a bitmap - * - * @ubuf: pointer to user buffer containing string. - * @ulen: buffer size in bytes. If string is smaller than this - * then it must be terminated with a \0. - * @maskp: pointer to bitmap array that will contain result. - * @nmaskbits: size of bitmap, in bits. - */ -int bitmap_parse_user(const char __user *ubuf, - unsigned int ulen, unsigned long *maskp, - int nmaskbits) -{ - char *buf; - int ret; - - buf = memdup_user_nul(ubuf, ulen); - if (IS_ERR(buf)) - return PTR_ERR(buf); - - ret = bitmap_parse(buf, UINT_MAX, maskp, nmaskbits); - - kfree(buf); - return ret; -} -EXPORT_SYMBOL(bitmap_parse_user); - -/** - * bitmap_print_to_pagebuf - convert bitmap to list or hex format ASCII string - * @list: indicates whether the bitmap must be list - * @buf: page aligned buffer into which string is placed - * @maskp: pointer to bitmap to convert - * @nmaskbits: size of bitmap, in bits - * - * Output format is a comma-separated list of decimal numbers and - * ranges if list is specified or hex digits grouped into comma-separated - * sets of 8 digits/set. Returns the number of characters written to buf. - * - * It is assumed that @buf is a pointer into a PAGE_SIZE, page-aligned - * area and that sufficient storage remains at @buf to accommodate the - * bitmap_print_to_pagebuf() output. Returns the number of characters - * actually printed to @buf, excluding terminating '\0'. - */ -int bitmap_print_to_pagebuf(bool list, char *buf, const unsigned long *maskp, - int nmaskbits) -{ - ptrdiff_t len = PAGE_SIZE - offset_in_page(buf); - - return list ? scnprintf(buf, len, "%*pbl\n", nmaskbits, maskp) : - scnprintf(buf, len, "%*pb\n", nmaskbits, maskp); -} -EXPORT_SYMBOL(bitmap_print_to_pagebuf); - -/** - * bitmap_print_to_buf - convert bitmap to list or hex format ASCII string - * @list: indicates whether the bitmap must be list - * true: print in decimal list format - * false: print in hexadecimal bitmask format - * @buf: buffer into which string is placed - * @maskp: pointer to bitmap to convert - * @nmaskbits: size of bitmap, in bits - * @off: in the string from which we are copying, We copy to @buf - * @count: the maximum number of bytes to print - */ -static int bitmap_print_to_buf(bool list, char *buf, const unsigned long *maskp, - int nmaskbits, loff_t off, size_t count) -{ - const char *fmt = list ? "%*pbl\n" : "%*pb\n"; - ssize_t size; - void *data; - - data = kasprintf(GFP_KERNEL, fmt, nmaskbits, maskp); - if (!data) - return -ENOMEM; - - size = memory_read_from_buffer(buf, count, &off, data, strlen(data) + 1); - kfree(data); - - return size; -} - -/** - * bitmap_print_bitmask_to_buf - convert bitmap to hex bitmask format ASCII string - * @buf: buffer into which string is placed - * @maskp: pointer to bitmap to convert - * @nmaskbits: size of bitmap, in bits - * @off: in the string from which we are copying, We copy to @buf - * @count: the maximum number of bytes to print - * - * The bitmap_print_to_pagebuf() is used indirectly via its cpumap wrapper - * cpumap_print_to_pagebuf() or directly by drivers to export hexadecimal - * bitmask and decimal list to userspace by sysfs ABI. - * Drivers might be using a normal attribute for this kind of ABIs. A - * normal attribute typically has show entry as below:: - * - * static ssize_t example_attribute_show(struct device *dev, - * struct device_attribute *attr, char *buf) - * { - * ... - * return bitmap_print_to_pagebuf(true, buf, &mask, nr_trig_max); - * } - * - * show entry of attribute has no offset and count parameters and this - * means the file is limited to one page only. - * bitmap_print_to_pagebuf() API works terribly well for this kind of - * normal attribute with buf parameter and without offset, count:: - * - * bitmap_print_to_pagebuf(bool list, char *buf, const unsigned long *maskp, - * int nmaskbits) - * { - * } - * - * The problem is once we have a large bitmap, we have a chance to get a - * bitmask or list more than one page. Especially for list, it could be - * as complex as 0,3,5,7,9,... We have no simple way to know it exact size. - * It turns out bin_attribute is a way to break this limit. bin_attribute - * has show entry as below:: - * - * static ssize_t - * example_bin_attribute_show(struct file *filp, struct kobject *kobj, - * struct bin_attribute *attr, char *buf, - * loff_t offset, size_t count) - * { - * ... - * } - * - * With the new offset and count parameters, this makes sysfs ABI be able - * to support file size more than one page. For example, offset could be - * >= 4096. - * bitmap_print_bitmask_to_buf(), bitmap_print_list_to_buf() wit their - * cpumap wrapper cpumap_print_bitmask_to_buf(), cpumap_print_list_to_buf() - * make those drivers be able to support large bitmask and list after they - * move to use bin_attribute. In result, we have to pass the corresponding - * parameters such as off, count from bin_attribute show entry to this API. - * - * The role of cpumap_print_bitmask_to_buf() and cpumap_print_list_to_buf() - * is similar with cpumap_print_to_pagebuf(), the difference is that - * bitmap_print_to_pagebuf() mainly serves sysfs attribute with the assumption - * the destination buffer is exactly one page and won't be more than one page. - * cpumap_print_bitmask_to_buf() and cpumap_print_list_to_buf(), on the other - * hand, mainly serves bin_attribute which doesn't work with exact one page, - * and it can break the size limit of converted decimal list and hexadecimal - * bitmask. - * - * WARNING! - * - * This function is not a replacement for sprintf() or bitmap_print_to_pagebuf(). - * It is intended to workaround sysfs limitations discussed above and should be - * used carefully in general case for the following reasons: - * - * - Time complexity is O(nbits^2/count), comparing to O(nbits) for snprintf(). - * - Memory complexity is O(nbits), comparing to O(1) for snprintf(). - * - @off and @count are NOT offset and number of bits to print. - * - If printing part of bitmap as list, the resulting string is not a correct - * list representation of bitmap. Particularly, some bits within or out of - * related interval may be erroneously set or unset. The format of the string - * may be broken, so bitmap_parselist-like parser may fail parsing it. - * - If printing the whole bitmap as list by parts, user must ensure the order - * of calls of the function such that the offset is incremented linearly. - * - If printing the whole bitmap as list by parts, user must keep bitmap - * unchanged between the very first and very last call. Otherwise concatenated - * result may be incorrect, and format may be broken. - * - * Returns the number of characters actually printed to @buf - */ -int bitmap_print_bitmask_to_buf(char *buf, const unsigned long *maskp, - int nmaskbits, loff_t off, size_t count) -{ - return bitmap_print_to_buf(false, buf, maskp, nmaskbits, off, count); -} -EXPORT_SYMBOL(bitmap_print_bitmask_to_buf); - -/** - * bitmap_print_list_to_buf - convert bitmap to decimal list format ASCII string - * @buf: buffer into which string is placed - * @maskp: pointer to bitmap to convert - * @nmaskbits: size of bitmap, in bits - * @off: in the string from which we are copying, We copy to @buf - * @count: the maximum number of bytes to print - * - * Everything is same with the above bitmap_print_bitmask_to_buf() except - * the print format. - */ -int bitmap_print_list_to_buf(char *buf, const unsigned long *maskp, - int nmaskbits, loff_t off, size_t count) -{ - return bitmap_print_to_buf(true, buf, maskp, nmaskbits, off, count); -} -EXPORT_SYMBOL(bitmap_print_list_to_buf); - -/* - * Region 9-38:4/10 describes the following bitmap structure: - * 0 9 12 18 38 N - * .........****......****......****.................. - * ^ ^ ^ ^ ^ - * start off group_len end nbits - */ -struct region { - unsigned int start; - unsigned int off; - unsigned int group_len; - unsigned int end; - unsigned int nbits; -}; - -static void bitmap_set_region(const struct region *r, unsigned long *bitmap) -{ - unsigned int start; - - for (start = r->start; start <= r->end; start += r->group_len) - bitmap_set(bitmap, start, min(r->end - start + 1, r->off)); -} - -static int bitmap_check_region(const struct region *r) -{ - if (r->start > r->end || r->group_len == 0 || r->off > r->group_len) - return -EINVAL; - - if (r->end >= r->nbits) - return -ERANGE; - - return 0; -} - -static const char *bitmap_getnum(const char *str, unsigned int *num, - unsigned int lastbit) -{ - unsigned long long n; - unsigned int len; - - if (str[0] == 'N') { - *num = lastbit; - return str + 1; - } - - len = _parse_integer(str, 10, &n); - if (!len) - return ERR_PTR(-EINVAL); - if (len & KSTRTOX_OVERFLOW || n != (unsigned int)n) - return ERR_PTR(-EOVERFLOW); - - *num = n; - return str + len; -} - -static inline bool end_of_str(char c) -{ - return c == '\0' || c == '\n'; -} - -static inline bool __end_of_region(char c) -{ - return isspace(c) || c == ','; -} - -static inline bool end_of_region(char c) -{ - return __end_of_region(c) || end_of_str(c); -} - -/* - * The format allows commas and whitespaces at the beginning - * of the region. - */ -static const char *bitmap_find_region(const char *str) -{ - while (__end_of_region(*str)) - str++; - - return end_of_str(*str) ? NULL : str; -} - -static const char *bitmap_find_region_reverse(const char *start, const char *end) -{ - while (start <= end && __end_of_region(*end)) - end--; - - return end; -} - -static const char *bitmap_parse_region(const char *str, struct region *r) -{ - unsigned int lastbit = r->nbits - 1; - - if (!strncasecmp(str, "all", 3)) { - r->start = 0; - r->end = lastbit; - str += 3; - - goto check_pattern; - } - - str = bitmap_getnum(str, &r->start, lastbit); - if (IS_ERR(str)) - return str; - - if (end_of_region(*str)) - goto no_end; - - if (*str != '-') - return ERR_PTR(-EINVAL); - - str = bitmap_getnum(str + 1, &r->end, lastbit); - if (IS_ERR(str)) - return str; - -check_pattern: - if (end_of_region(*str)) - goto no_pattern; - - if (*str != ':') - return ERR_PTR(-EINVAL); - - str = bitmap_getnum(str + 1, &r->off, lastbit); - if (IS_ERR(str)) - return str; - - if (*str != '/') - return ERR_PTR(-EINVAL); - - return bitmap_getnum(str + 1, &r->group_len, lastbit); - -no_end: - r->end = r->start; -no_pattern: - r->off = r->end + 1; - r->group_len = r->end + 1; - - return end_of_str(*str) ? NULL : str; -} - -/** - * bitmap_parselist - convert list format ASCII string to bitmap - * @buf: read user string from this buffer; must be terminated - * with a \0 or \n. - * @maskp: write resulting mask here - * @nmaskbits: number of bits in mask to be written - * - * Input format is a comma-separated list of decimal numbers and - * ranges. Consecutively set bits are shown as two hyphen-separated - * decimal numbers, the smallest and largest bit numbers set in - * the range. - * Optionally each range can be postfixed to denote that only parts of it - * should be set. The range will divided to groups of specific size. - * From each group will be used only defined amount of bits. - * Syntax: range:used_size/group_size - * Example: 0-1023:2/256 ==> 0,1,256,257,512,513,768,769 - * The value 'N' can be used as a dynamically substituted token for the - * maximum allowed value; i.e (nmaskbits - 1). Keep in mind that it is - * dynamic, so if system changes cause the bitmap width to change, such - * as more cores in a CPU list, then any ranges using N will also change. - * - * Returns: 0 on success, -errno on invalid input strings. Error values: - * - * - ``-EINVAL``: wrong region format - * - ``-EINVAL``: invalid character in string - * - ``-ERANGE``: bit number specified too large for mask - * - ``-EOVERFLOW``: integer overflow in the input parameters - */ -int bitmap_parselist(const char *buf, unsigned long *maskp, int nmaskbits) -{ - struct region r; - long ret; - - r.nbits = nmaskbits; - bitmap_zero(maskp, r.nbits); - - while (buf) { - buf = bitmap_find_region(buf); - if (buf == NULL) - return 0; - - buf = bitmap_parse_region(buf, &r); - if (IS_ERR(buf)) - return PTR_ERR(buf); - - ret = bitmap_check_region(&r); - if (ret) - return ret; - - bitmap_set_region(&r, maskp); - } - - return 0; -} -EXPORT_SYMBOL(bitmap_parselist); - - -/** - * bitmap_parselist_user() - convert user buffer's list format ASCII - * string to bitmap - * - * @ubuf: pointer to user buffer containing string. - * @ulen: buffer size in bytes. If string is smaller than this - * then it must be terminated with a \0. - * @maskp: pointer to bitmap array that will contain result. - * @nmaskbits: size of bitmap, in bits. - * - * Wrapper for bitmap_parselist(), providing it with user buffer. - */ -int bitmap_parselist_user(const char __user *ubuf, - unsigned int ulen, unsigned long *maskp, - int nmaskbits) -{ - char *buf; - int ret; - - buf = memdup_user_nul(ubuf, ulen); - if (IS_ERR(buf)) - return PTR_ERR(buf); - - ret = bitmap_parselist(buf, maskp, nmaskbits); - - kfree(buf); - return ret; -} -EXPORT_SYMBOL(bitmap_parselist_user); - -static const char *bitmap_get_x32_reverse(const char *start, - const char *end, u32 *num) -{ - u32 ret = 0; - int c, i; - - for (i = 0; i < 32; i += 4) { - c = hex_to_bin(*end--); - if (c < 0) - return ERR_PTR(-EINVAL); - - ret |= c << i; - - if (start > end || __end_of_region(*end)) - goto out; - } - - if (hex_to_bin(*end--) >= 0) - return ERR_PTR(-EOVERFLOW); -out: - *num = ret; - return end; -} - -/** - * bitmap_parse - convert an ASCII hex string into a bitmap. - * @start: pointer to buffer containing string. - * @buflen: buffer size in bytes. If string is smaller than this - * then it must be terminated with a \0 or \n. In that case, - * UINT_MAX may be provided instead of string length. - * @maskp: pointer to bitmap array that will contain result. - * @nmaskbits: size of bitmap, in bits. - * - * Commas group hex digits into chunks. Each chunk defines exactly 32 - * bits of the resultant bitmask. No chunk may specify a value larger - * than 32 bits (%-EOVERFLOW), and if a chunk specifies a smaller value - * then leading 0-bits are prepended. %-EINVAL is returned for illegal - * characters. Grouping such as "1,,5", ",44", "," or "" is allowed. - * Leading, embedded and trailing whitespace accepted. - */ -int bitmap_parse(const char *start, unsigned int buflen, - unsigned long *maskp, int nmaskbits) -{ - const char *end = strnchrnul(start, buflen, '\n') - 1; - int chunks = BITS_TO_U32(nmaskbits); - u32 *bitmap = (u32 *)maskp; - int unset_bit; - int chunk; - - for (chunk = 0; ; chunk++) { - end = bitmap_find_region_reverse(start, end); - if (start > end) - break; - - if (!chunks--) - return -EOVERFLOW; - -#if defined(CONFIG_64BIT) && defined(__BIG_ENDIAN) - end = bitmap_get_x32_reverse(start, end, &bitmap[chunk ^ 1]); -#else - end = bitmap_get_x32_reverse(start, end, &bitmap[chunk]); -#endif - if (IS_ERR(end)) - return PTR_ERR(end); - } - - unset_bit = (BITS_TO_U32(nmaskbits) - chunks) * 32; - if (unset_bit < nmaskbits) { - bitmap_clear(maskp, unset_bit, nmaskbits - unset_bit); - return 0; - } - - if (find_next_bit(maskp, unset_bit, nmaskbits) != unset_bit) - return -EOVERFLOW; - - return 0; -} -EXPORT_SYMBOL(bitmap_parse); - /** * bitmap_pos_to_ord - find ordinal of set bit at given position in bitmap * @buf: pointer to a bitmap -- cgit v1.2.3 From 49dbe25adac42d3e06f65d1420946bec65896222 Mon Sep 17 00:00:00 2001 From: Arseniy Krasnov Date: Tue, 10 Oct 2023 22:15:14 +0300 Subject: vsock: read from socket's error queue This adds handling of MSG_ERRQUEUE input flag in receive call. This flag is used to read socket's error queue instead of data queue. Possible scenario of error queue usage is receiving completions for transmission with MSG_ZEROCOPY flag. This patch also adds new defines: 'SOL_VSOCK' and 'VSOCK_RECVERR'. Signed-off-by: Arseniy Krasnov Reviewed-by: Stefano Garzarella Signed-off-by: David S. Miller --- include/linux/socket.h | 1 + include/uapi/linux/vm_sockets.h | 17 +++++++++++++++++ net/vmw_vsock/af_vsock.c | 6 ++++++ 3 files changed, 24 insertions(+) (limited to 'include/linux') diff --git a/include/linux/socket.h b/include/linux/socket.h index 39b74d83c7c4..cfcb7e2c3813 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -383,6 +383,7 @@ struct ucred { #define SOL_MPTCP 284 #define SOL_MCTP 285 #define SOL_SMC 286 +#define SOL_VSOCK 287 /* IPX options */ #define IPX_TYPE 1 diff --git a/include/uapi/linux/vm_sockets.h b/include/uapi/linux/vm_sockets.h index c60ca33eac59..ed07181d4eff 100644 --- a/include/uapi/linux/vm_sockets.h +++ b/include/uapi/linux/vm_sockets.h @@ -191,4 +191,21 @@ struct sockaddr_vm { #define IOCTL_VM_SOCKETS_GET_LOCAL_CID _IO(7, 0xb9) +/* MSG_ZEROCOPY notifications are encoded in the standard error format, + * sock_extended_err. See Documentation/networking/msg_zerocopy.rst in + * kernel source tree for more details. + */ + +/* 'cmsg_level' field value of 'struct cmsghdr' for notification parsing + * when MSG_ZEROCOPY flag is used on transmissions. + */ + +#define SOL_VSOCK 287 + +/* 'cmsg_type' field value of 'struct cmsghdr' for notification parsing + * when MSG_ZEROCOPY flag is used on transmissions. + */ + +#define VSOCK_RECVERR 1 + #endif /* _UAPI_VM_SOCKETS_H */ diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index d841f4de33b0..38486efd3d05 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -89,6 +89,7 @@ #include #include #include +#include #include #include #include @@ -110,6 +111,7 @@ #include #include #include +#include static int __vsock_bind(struct sock *sk, struct sockaddr_vm *addr); static void vsock_sk_destruct(struct sock *sk); @@ -2137,6 +2139,10 @@ vsock_connectible_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, int err; sk = sock->sk; + + if (unlikely(flags & MSG_ERRQUEUE)) + return sock_recv_errqueue(sk, msg, len, SOL_VSOCK, VSOCK_RECVERR); + vsk = vsock_sk(sk); err = 0; -- cgit v1.2.3 From d7fbc0b7e846e9e0e70ae766d274b8720fbab412 Mon Sep 17 00:00:00 2001 From: Arkadiusz Kubalewski Date: Wed, 11 Oct 2023 12:12:34 +0200 Subject: dpll: netlink/core: add support for pin-dpll signal phase offset/adjust Add callback ops for pin-dpll phase measurement. Add callback for pin signal phase adjustment. Add min and max phase adjustment values to pin proprties. Invoke callbacks in dpll_netlink.c when filling the pin details to provide user with phase related attribute values. Signed-off-by: Arkadiusz Kubalewski Signed-off-by: David S. Miller --- drivers/dpll/dpll_netlink.c | 138 +++++++++++++++++++++++++++++++++++++++++++- include/linux/dpll.h | 18 ++++++ 2 files changed, 155 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/dpll/dpll_netlink.c b/drivers/dpll/dpll_netlink.c index e20daba6896a..09a6c2a1ea92 100644 --- a/drivers/dpll/dpll_netlink.c +++ b/drivers/dpll/dpll_netlink.c @@ -212,6 +212,53 @@ dpll_msg_add_pin_direction(struct sk_buff *msg, struct dpll_pin *pin, return 0; } +static int +dpll_msg_add_pin_phase_adjust(struct sk_buff *msg, struct dpll_pin *pin, + struct dpll_pin_ref *ref, + struct netlink_ext_ack *extack) +{ + const struct dpll_pin_ops *ops = dpll_pin_ops(ref); + struct dpll_device *dpll = ref->dpll; + s32 phase_adjust; + int ret; + + if (!ops->phase_adjust_get) + return 0; + ret = ops->phase_adjust_get(pin, dpll_pin_on_dpll_priv(dpll, pin), + dpll, dpll_priv(dpll), + &phase_adjust, extack); + if (ret) + return ret; + if (nla_put_s32(msg, DPLL_A_PIN_PHASE_ADJUST, phase_adjust)) + return -EMSGSIZE; + + return 0; +} + +static int +dpll_msg_add_phase_offset(struct sk_buff *msg, struct dpll_pin *pin, + struct dpll_pin_ref *ref, + struct netlink_ext_ack *extack) +{ + const struct dpll_pin_ops *ops = dpll_pin_ops(ref); + struct dpll_device *dpll = ref->dpll; + s64 phase_offset; + int ret; + + if (!ops->phase_offset_get) + return 0; + ret = ops->phase_offset_get(pin, dpll_pin_on_dpll_priv(dpll, pin), + dpll, dpll_priv(dpll), &phase_offset, + extack); + if (ret) + return ret; + if (nla_put_64bit(msg, DPLL_A_PIN_PHASE_OFFSET, sizeof(phase_offset), + &phase_offset, DPLL_A_PIN_PAD)) + return -EMSGSIZE; + + return 0; +} + static int dpll_msg_add_pin_freq(struct sk_buff *msg, struct dpll_pin *pin, struct dpll_pin_ref *ref, struct netlink_ext_ack *extack) @@ -330,6 +377,9 @@ dpll_msg_add_pin_dplls(struct sk_buff *msg, struct dpll_pin *pin, if (ret) goto nest_cancel; ret = dpll_msg_add_pin_direction(msg, pin, ref, extack); + if (ret) + goto nest_cancel; + ret = dpll_msg_add_phase_offset(msg, pin, ref, extack); if (ret) goto nest_cancel; nla_nest_end(msg, attr); @@ -377,6 +427,15 @@ dpll_cmd_pin_get_one(struct sk_buff *msg, struct dpll_pin *pin, if (nla_put_u32(msg, DPLL_A_PIN_CAPABILITIES, prop->capabilities)) return -EMSGSIZE; ret = dpll_msg_add_pin_freq(msg, pin, ref, extack); + if (ret) + return ret; + if (nla_put_s32(msg, DPLL_A_PIN_PHASE_ADJUST_MIN, + prop->phase_range.min)) + return -EMSGSIZE; + if (nla_put_s32(msg, DPLL_A_PIN_PHASE_ADJUST_MAX, + prop->phase_range.max)) + return -EMSGSIZE; + ret = dpll_msg_add_pin_phase_adjust(msg, pin, ref, extack); if (ret) return ret; if (xa_empty(&pin->parent_refs)) @@ -416,7 +475,7 @@ dpll_device_get_one(struct dpll_device *dpll, struct sk_buff *msg, if (nla_put_u32(msg, DPLL_A_TYPE, dpll->type)) return -EMSGSIZE; - return ret; + return 0; } static int @@ -705,6 +764,78 @@ dpll_pin_direction_set(struct dpll_pin *pin, struct dpll_device *dpll, return 0; } +static int +dpll_pin_phase_adj_set(struct dpll_pin *pin, struct nlattr *phase_adj_attr, + struct netlink_ext_ack *extack) +{ + struct dpll_pin_ref *ref, *failed; + const struct dpll_pin_ops *ops; + s32 phase_adj, old_phase_adj; + struct dpll_device *dpll; + unsigned long i; + int ret; + + phase_adj = nla_get_s32(phase_adj_attr); + if (phase_adj > pin->prop->phase_range.max || + phase_adj < pin->prop->phase_range.min) { + NL_SET_ERR_MSG_ATTR(extack, phase_adj_attr, + "phase adjust value not supported"); + return -EINVAL; + } + + xa_for_each(&pin->dpll_refs, i, ref) { + ops = dpll_pin_ops(ref); + if (!ops->phase_adjust_set || !ops->phase_adjust_get) { + NL_SET_ERR_MSG(extack, "phase adjust not supported"); + return -EOPNOTSUPP; + } + } + ref = dpll_xa_ref_dpll_first(&pin->dpll_refs); + ops = dpll_pin_ops(ref); + dpll = ref->dpll; + ret = ops->phase_adjust_get(pin, dpll_pin_on_dpll_priv(dpll, pin), + dpll, dpll_priv(dpll), &old_phase_adj, + extack); + if (ret) { + NL_SET_ERR_MSG(extack, "unable to get old phase adjust value"); + return ret; + } + if (phase_adj == old_phase_adj) + return 0; + + xa_for_each(&pin->dpll_refs, i, ref) { + ops = dpll_pin_ops(ref); + dpll = ref->dpll; + ret = ops->phase_adjust_set(pin, + dpll_pin_on_dpll_priv(dpll, pin), + dpll, dpll_priv(dpll), phase_adj, + extack); + if (ret) { + failed = ref; + NL_SET_ERR_MSG_FMT(extack, + "phase adjust set failed for dpll_id:%u", + dpll->id); + goto rollback; + } + } + __dpll_pin_change_ntf(pin); + + return 0; + +rollback: + xa_for_each(&pin->dpll_refs, i, ref) { + if (ref == failed) + break; + ops = dpll_pin_ops(ref); + dpll = ref->dpll; + if (ops->phase_adjust_set(pin, dpll_pin_on_dpll_priv(dpll, pin), + dpll, dpll_priv(dpll), old_phase_adj, + extack)) + NL_SET_ERR_MSG(extack, "set phase adjust rollback failed"); + } + return ret; +} + static int dpll_pin_parent_device_set(struct dpll_pin *pin, struct nlattr *parent_nest, struct netlink_ext_ack *extack) @@ -793,6 +924,11 @@ dpll_pin_set_from_nlattr(struct dpll_pin *pin, struct genl_info *info) if (ret) return ret; break; + case DPLL_A_PIN_PHASE_ADJUST: + ret = dpll_pin_phase_adj_set(pin, a, info->extack); + if (ret) + return ret; + break; case DPLL_A_PIN_PARENT_DEVICE: ret = dpll_pin_parent_device_set(pin, a, info->extack); if (ret) diff --git a/include/linux/dpll.h b/include/linux/dpll.h index bbc480cd2932..578fc5fa3750 100644 --- a/include/linux/dpll.h +++ b/include/linux/dpll.h @@ -68,6 +68,18 @@ struct dpll_pin_ops { int (*prio_set)(const struct dpll_pin *pin, void *pin_priv, const struct dpll_device *dpll, void *dpll_priv, const u32 prio, struct netlink_ext_ack *extack); + int (*phase_offset_get)(const struct dpll_pin *pin, void *pin_priv, + const struct dpll_device *dpll, void *dpll_priv, + s64 *phase_offset, + struct netlink_ext_ack *extack); + int (*phase_adjust_get)(const struct dpll_pin *pin, void *pin_priv, + const struct dpll_device *dpll, void *dpll_priv, + s32 *phase_adjust, + struct netlink_ext_ack *extack); + int (*phase_adjust_set)(const struct dpll_pin *pin, void *pin_priv, + const struct dpll_device *dpll, void *dpll_priv, + const s32 phase_adjust, + struct netlink_ext_ack *extack); }; struct dpll_pin_frequency { @@ -91,6 +103,11 @@ struct dpll_pin_frequency { #define DPLL_PIN_FREQUENCY_DCF77 \ DPLL_PIN_FREQUENCY(DPLL_PIN_FREQUENCY_77_5_KHZ) +struct dpll_pin_phase_adjust_range { + s32 min; + s32 max; +}; + struct dpll_pin_properties { const char *board_label; const char *panel_label; @@ -99,6 +116,7 @@ struct dpll_pin_properties { unsigned long capabilities; u32 freq_supported_num; struct dpll_pin_frequency *freq_supported; + struct dpll_pin_phase_adjust_range phase_range; }; #if IS_ENABLED(CONFIG_DPLL) -- cgit v1.2.3 From fc8b2a619469378717e7270d2a4e1ef93c585f7a Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Wed, 11 Oct 2023 10:01:14 -0400 Subject: net: more strict VIRTIO_NET_HDR_GSO_UDP_L4 validation Syzbot reported two new paths to hit an internal WARNING using the new virtio gso type VIRTIO_NET_HDR_GSO_UDP_L4. RIP: 0010:skb_checksum_help+0x4a2/0x600 net/core/dev.c:3260 skb len=64521 gso_size=344 and RIP: 0010:skb_warn_bad_offload+0x118/0x240 net/core/dev.c:3262 Older virtio types have historically had loose restrictions, leading to many entirely impractical fuzzer generated packets causing problems deep in the kernel stack. Ideally, we would have had strict validation for all types from the start. New virtio types can have tighter validation. Limit UDP GSO packets inserted via virtio to the same limits imposed by the UDP_SEGMENT socket interface: 1. must use checksum offload 2. checksum offload matches UDP header 3. no more segments than UDP_MAX_SEGMENTS 4. UDP GSO does not take modifier flags, notably SKB_GSO_TCP_ECN Fixes: 860b7f27b8f7 ("linux/virtio_net.h: Support USO offload in vnet header.") Reported-by: syzbot+01cdbc31e9c0ae9b33ac@syzkaller.appspotmail.com Closes: https://lore.kernel.org/netdev/0000000000005039270605eb0b7f@google.com/ Reported-by: syzbot+c99d835ff081ca30f986@syzkaller.appspotmail.com Closes: https://lore.kernel.org/netdev/0000000000005426680605eb0b9f@google.com/ Signed-off-by: Willem de Bruijn Reviewed-by: Eric Dumazet Acked-by: Jason Wang Signed-off-by: David S. Miller --- include/linux/virtio_net.h | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h index 7b4dd69555e4..27cc1d464321 100644 --- a/include/linux/virtio_net.h +++ b/include/linux/virtio_net.h @@ -3,8 +3,8 @@ #define _LINUX_VIRTIO_NET_H #include +#include #include -#include #include static inline bool virtio_net_hdr_match_proto(__be16 protocol, __u8 gso_type) @@ -151,9 +151,22 @@ retry: unsigned int nh_off = p_off; struct skb_shared_info *shinfo = skb_shinfo(skb); - /* UFO may not include transport header in gso_size. */ - if (gso_type & SKB_GSO_UDP) + switch (gso_type & ~SKB_GSO_TCP_ECN) { + case SKB_GSO_UDP: + /* UFO may not include transport header in gso_size. */ nh_off -= thlen; + break; + case SKB_GSO_UDP_L4: + if (!(hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)) + return -EINVAL; + if (skb->csum_offset != offsetof(struct udphdr, check)) + return -EINVAL; + if (skb->len - p_off > gso_size * UDP_MAX_SEGMENTS) + return -EINVAL; + if (gso_type != SKB_GSO_UDP_L4) + return -EINVAL; + break; + } /* Kernel has a special handling for GSO_BY_FRAGS. */ if (gso_size == GSO_BY_FRAGS) -- cgit v1.2.3 From 60c6946675fc06dd2fd2b7a4b6fd1c1f046f1056 Mon Sep 17 00:00:00 2001 From: Xabier Marquiegui Date: Thu, 12 Oct 2023 00:39:53 +0200 Subject: posix-clock: introduce posix_clock_context concept Add the necessary structure to support custom private-data per posix-clock user. The previous implementation of posix-clock assumed all file open instances need access to the same clock structure on private_data. The need for individual data structures per file open instance has been identified when developing support for multiple timestamp event queue users for ptp_clock. Signed-off-by: Xabier Marquiegui Suggested-by: Richard Cochran Suggested-by: Vinicius Costa Gomes Signed-off-by: David S. Miller --- drivers/ptp/ptp_chardev.c | 21 +++++++++++++-------- drivers/ptp/ptp_private.h | 16 +++++++++------- include/linux/posix-clock.h | 35 +++++++++++++++++++++++++++-------- kernel/time/posix-clock.c | 36 +++++++++++++++++++++++++++--------- 4 files changed, 76 insertions(+), 32 deletions(-) (limited to 'include/linux') diff --git a/drivers/ptp/ptp_chardev.c b/drivers/ptp/ptp_chardev.c index 362bf756e6b7..0ba3e7064df2 100644 --- a/drivers/ptp/ptp_chardev.c +++ b/drivers/ptp/ptp_chardev.c @@ -101,14 +101,16 @@ int ptp_set_pinfunc(struct ptp_clock *ptp, unsigned int pin, return 0; } -int ptp_open(struct posix_clock *pc, fmode_t fmode) +int ptp_open(struct posix_clock_context *pccontext, fmode_t fmode) { return 0; } -long ptp_ioctl(struct posix_clock *pc, unsigned int cmd, unsigned long arg) +long ptp_ioctl(struct posix_clock_context *pccontext, unsigned int cmd, + unsigned long arg) { - struct ptp_clock *ptp = container_of(pc, struct ptp_clock, clock); + struct ptp_clock *ptp = + container_of(pccontext->clk, struct ptp_clock, clock); struct ptp_sys_offset_extended *extoff = NULL; struct ptp_sys_offset_precise precise_offset; struct system_device_crosststamp xtstamp; @@ -432,9 +434,11 @@ out: return err; } -__poll_t ptp_poll(struct posix_clock *pc, struct file *fp, poll_table *wait) +__poll_t ptp_poll(struct posix_clock_context *pccontext, struct file *fp, + poll_table *wait) { - struct ptp_clock *ptp = container_of(pc, struct ptp_clock, clock); + struct ptp_clock *ptp = + container_of(pccontext->clk, struct ptp_clock, clock); poll_wait(fp, &ptp->tsev_wq, wait); @@ -443,10 +447,11 @@ __poll_t ptp_poll(struct posix_clock *pc, struct file *fp, poll_table *wait) #define EXTTS_BUFSIZE (PTP_BUF_TIMESTAMPS * sizeof(struct ptp_extts_event)) -ssize_t ptp_read(struct posix_clock *pc, - uint rdflags, char __user *buf, size_t cnt) +ssize_t ptp_read(struct posix_clock_context *pccontext, uint rdflags, + char __user *buf, size_t cnt) { - struct ptp_clock *ptp = container_of(pc, struct ptp_clock, clock); + struct ptp_clock *ptp = + container_of(pccontext->clk, struct ptp_clock, clock); struct timestamp_event_queue *queue = &ptp->tsevq; struct ptp_extts_event *event; unsigned long flags; diff --git a/drivers/ptp/ptp_private.h b/drivers/ptp/ptp_private.h index 75f58fc468a7..a3110c85f694 100644 --- a/drivers/ptp/ptp_private.h +++ b/drivers/ptp/ptp_private.h @@ -117,16 +117,18 @@ extern struct class *ptp_class; int ptp_set_pinfunc(struct ptp_clock *ptp, unsigned int pin, enum ptp_pin_function func, unsigned int chan); -long ptp_ioctl(struct posix_clock *pc, - unsigned int cmd, unsigned long arg); +long ptp_ioctl(struct posix_clock_context *pccontext, unsigned int cmd, + unsigned long arg); -int ptp_open(struct posix_clock *pc, fmode_t fmode); +int ptp_open(struct posix_clock_context *pccontext, fmode_t fmode); -ssize_t ptp_read(struct posix_clock *pc, - uint flags, char __user *buf, size_t cnt); +int ptp_release(struct posix_clock_context *pccontext); -__poll_t ptp_poll(struct posix_clock *pc, - struct file *fp, poll_table *wait); +ssize_t ptp_read(struct posix_clock_context *pccontext, uint flags, char __user *buf, + size_t cnt); + +__poll_t ptp_poll(struct posix_clock_context *pccontext, struct file *fp, + poll_table *wait); /* * see ptp_sysfs.c diff --git a/include/linux/posix-clock.h b/include/linux/posix-clock.h index 468328b1e1dd..ef8619f48920 100644 --- a/include/linux/posix-clock.h +++ b/include/linux/posix-clock.h @@ -14,6 +14,7 @@ #include struct posix_clock; +struct posix_clock_context; /** * struct posix_clock_operations - functional interface to the clock @@ -50,18 +51,18 @@ struct posix_clock_operations { /* * Optional character device methods: */ - long (*ioctl) (struct posix_clock *pc, - unsigned int cmd, unsigned long arg); + long (*ioctl)(struct posix_clock_context *pccontext, unsigned int cmd, + unsigned long arg); - int (*open) (struct posix_clock *pc, fmode_t f_mode); + int (*open)(struct posix_clock_context *pccontext, fmode_t f_mode); - __poll_t (*poll) (struct posix_clock *pc, - struct file *file, poll_table *wait); + __poll_t (*poll)(struct posix_clock_context *pccontext, struct file *file, + poll_table *wait); - int (*release) (struct posix_clock *pc); + int (*release)(struct posix_clock_context *pccontext); - ssize_t (*read) (struct posix_clock *pc, - uint flags, char __user *buf, size_t cnt); + ssize_t (*read)(struct posix_clock_context *pccontext, uint flags, + char __user *buf, size_t cnt); }; /** @@ -90,6 +91,24 @@ struct posix_clock { bool zombie; }; +/** + * struct posix_clock_context - represents clock file operations context + * + * @clk: Pointer to the clock + * @private_clkdata: Pointer to user data + * + * Drivers should use struct posix_clock_context during specific character + * device file operation methods to access the posix clock. + * + * Drivers can store a private data structure during the open operation + * if they have specific information that is required in other file + * operations. + */ +struct posix_clock_context { + struct posix_clock *clk; + void *private_clkdata; +}; + /** * posix_clock_register() - register a new clock * @clk: Pointer to the clock. Caller must provide 'ops' field diff --git a/kernel/time/posix-clock.c b/kernel/time/posix-clock.c index 77c0c2370b6d..9de66bbbb3d1 100644 --- a/kernel/time/posix-clock.c +++ b/kernel/time/posix-clock.c @@ -19,7 +19,8 @@ */ static struct posix_clock *get_posix_clock(struct file *fp) { - struct posix_clock *clk = fp->private_data; + struct posix_clock_context *pccontext = fp->private_data; + struct posix_clock *clk = pccontext->clk; down_read(&clk->rwsem); @@ -39,6 +40,7 @@ static void put_posix_clock(struct posix_clock *clk) static ssize_t posix_clock_read(struct file *fp, char __user *buf, size_t count, loff_t *ppos) { + struct posix_clock_context *pccontext = fp->private_data; struct posix_clock *clk = get_posix_clock(fp); int err = -EINVAL; @@ -46,7 +48,7 @@ static ssize_t posix_clock_read(struct file *fp, char __user *buf, return -ENODEV; if (clk->ops.read) - err = clk->ops.read(clk, fp->f_flags, buf, count); + err = clk->ops.read(pccontext, fp->f_flags, buf, count); put_posix_clock(clk); @@ -55,6 +57,7 @@ static ssize_t posix_clock_read(struct file *fp, char __user *buf, static __poll_t posix_clock_poll(struct file *fp, poll_table *wait) { + struct posix_clock_context *pccontext = fp->private_data; struct posix_clock *clk = get_posix_clock(fp); __poll_t result = 0; @@ -62,7 +65,7 @@ static __poll_t posix_clock_poll(struct file *fp, poll_table *wait) return EPOLLERR; if (clk->ops.poll) - result = clk->ops.poll(clk, fp, wait); + result = clk->ops.poll(pccontext, fp, wait); put_posix_clock(clk); @@ -72,6 +75,7 @@ static __poll_t posix_clock_poll(struct file *fp, poll_table *wait) static long posix_clock_ioctl(struct file *fp, unsigned int cmd, unsigned long arg) { + struct posix_clock_context *pccontext = fp->private_data; struct posix_clock *clk = get_posix_clock(fp); int err = -ENOTTY; @@ -79,7 +83,7 @@ static long posix_clock_ioctl(struct file *fp, return -ENODEV; if (clk->ops.ioctl) - err = clk->ops.ioctl(clk, cmd, arg); + err = clk->ops.ioctl(pccontext, cmd, arg); put_posix_clock(clk); @@ -90,6 +94,7 @@ static long posix_clock_ioctl(struct file *fp, static long posix_clock_compat_ioctl(struct file *fp, unsigned int cmd, unsigned long arg) { + struct posix_clock_context *pccontext = fp->private_data; struct posix_clock *clk = get_posix_clock(fp); int err = -ENOTTY; @@ -97,7 +102,7 @@ static long posix_clock_compat_ioctl(struct file *fp, return -ENODEV; if (clk->ops.ioctl) - err = clk->ops.ioctl(clk, cmd, arg); + err = clk->ops.ioctl(pccontext, cmd, arg); put_posix_clock(clk); @@ -110,6 +115,7 @@ static int posix_clock_open(struct inode *inode, struct file *fp) int err; struct posix_clock *clk = container_of(inode->i_cdev, struct posix_clock, cdev); + struct posix_clock_context *pccontext; down_read(&clk->rwsem); @@ -117,14 +123,20 @@ static int posix_clock_open(struct inode *inode, struct file *fp) err = -ENODEV; goto out; } + pccontext = kzalloc(sizeof(*pccontext), GFP_KERNEL); + if (!pccontext) { + err = -ENOMEM; + goto out; + } + pccontext->clk = clk; + fp->private_data = pccontext; if (clk->ops.open) - err = clk->ops.open(clk, fp->f_mode); + err = clk->ops.open(pccontext, fp->f_mode); else err = 0; if (!err) { get_device(clk->dev); - fp->private_data = clk; } out: up_read(&clk->rwsem); @@ -133,14 +145,20 @@ out: static int posix_clock_release(struct inode *inode, struct file *fp) { - struct posix_clock *clk = fp->private_data; + struct posix_clock_context *pccontext = fp->private_data; + struct posix_clock *clk; int err = 0; + if (!pccontext) + return -ENODEV; + clk = pccontext->clk; + if (clk->ops.release) - err = clk->ops.release(clk); + err = clk->ops.release(pccontext); put_device(clk->dev); + kfree(pccontext); fp->private_data = NULL; return err; -- cgit v1.2.3 From 058b4d9de86b3f77cd23fbd43a0f5ee4ea8e0aeb Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 11 Oct 2023 11:01:21 +0300 Subject: iommu: change iommu_map_sgtable to return signed values The iommu_map_sgtable() function returns ssize_t and negative error codes but it's declared as size_t instead. I think that static checkers would have complained if this caused a bug, but even though it doesn't cause a bug, it's definitely worth fixing. Signed-off-by: Dan Carpenter Reviewed-by: Lu Baolu Link: https://lore.kernel.org/r/06672b96-23fd-424c-8880-1626e7bf119c@moroto.mountain Signed-off-by: Joerg Roedel --- include/linux/iommu.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 64bd20142cbe..c28178f3690a 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -1116,7 +1116,7 @@ static inline void iommu_free_global_pasid(ioasid_t pasid) {} * Creates a mapping at @iova for the buffer described by a scatterlist * stored in the given sg_table object in the provided IOMMU domain. */ -static inline size_t iommu_map_sgtable(struct iommu_domain *domain, +static inline ssize_t iommu_map_sgtable(struct iommu_domain *domain, unsigned long iova, struct sg_table *sgt, int prot) { return iommu_map_sg(domain, iova, sgt->sgl, sgt->orig_nents, prot, -- cgit v1.2.3 From f6ca3fb6978f94d95ee79f95085fc22e71ca17cc Mon Sep 17 00:00:00 2001 From: Rouven Czerwinski Date: Fri, 22 Sep 2023 16:17:16 +0200 Subject: mtd: rawnand: Ensure the nand chip supports cached reads Both the JEDEC and ONFI specification say that read cache sequential support is an optional command. This means that we not only need to check whether the individual controller supports the command, we also need to check the parameter pages for both ONFI and JEDEC NAND flashes before enabling sequential cache reads. This fixes support for NAND flashes which don't support enabling cache reads, i.e. Samsung K9F4G08U0F or Toshiba TC58NVG0S3HTA00. Sequential cache reads are now only available for ONFI and JEDEC devices, if individual vendors implement this, it needs to be enabled per vendor. Tested on i.MX6Q with a Samsung NAND flash chip that doesn't support sequential reads. Fixes: 003fe4b9545b ("mtd: rawnand: Support for sequential cache reads") Cc: stable@vger.kernel.org Signed-off-by: Rouven Czerwinski Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20230922141717.35977-1-r.czerwinski@pengutronix.de --- drivers/mtd/nand/raw/nand_base.c | 3 +++ drivers/mtd/nand/raw/nand_jedec.c | 3 +++ drivers/mtd/nand/raw/nand_onfi.c | 3 +++ include/linux/mtd/jedec.h | 3 +++ include/linux/mtd/onfi.h | 1 + include/linux/mtd/rawnand.h | 2 ++ 6 files changed, 15 insertions(+) (limited to 'include/linux') diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c index d4b55155aeae..1fcac403cee6 100644 --- a/drivers/mtd/nand/raw/nand_base.c +++ b/drivers/mtd/nand/raw/nand_base.c @@ -5110,6 +5110,9 @@ static void rawnand_check_cont_read_support(struct nand_chip *chip) { struct mtd_info *mtd = nand_to_mtd(chip); + if (!chip->parameters.supports_read_cache) + return; + if (chip->read_retries) return; diff --git a/drivers/mtd/nand/raw/nand_jedec.c b/drivers/mtd/nand/raw/nand_jedec.c index 836757717660..b3cc8f360529 100644 --- a/drivers/mtd/nand/raw/nand_jedec.c +++ b/drivers/mtd/nand/raw/nand_jedec.c @@ -94,6 +94,9 @@ int nand_jedec_detect(struct nand_chip *chip) goto free_jedec_param_page; } + if (p->opt_cmd[0] & JEDEC_OPT_CMD_READ_CACHE) + chip->parameters.supports_read_cache = true; + memorg->pagesize = le32_to_cpu(p->byte_per_page); mtd->writesize = memorg->pagesize; diff --git a/drivers/mtd/nand/raw/nand_onfi.c b/drivers/mtd/nand/raw/nand_onfi.c index f15ef90aec8c..861975e44b55 100644 --- a/drivers/mtd/nand/raw/nand_onfi.c +++ b/drivers/mtd/nand/raw/nand_onfi.c @@ -303,6 +303,9 @@ int nand_onfi_detect(struct nand_chip *chip) ONFI_FEATURE_ADDR_TIMING_MODE, 1); } + if (le16_to_cpu(p->opt_cmd) & ONFI_OPT_CMD_READ_CACHE) + chip->parameters.supports_read_cache = true; + onfi = kzalloc(sizeof(*onfi), GFP_KERNEL); if (!onfi) { ret = -ENOMEM; diff --git a/include/linux/mtd/jedec.h b/include/linux/mtd/jedec.h index 0b6b59f7cfbd..56047a4e54c9 100644 --- a/include/linux/mtd/jedec.h +++ b/include/linux/mtd/jedec.h @@ -21,6 +21,9 @@ struct jedec_ecc_info { /* JEDEC features */ #define JEDEC_FEATURE_16_BIT_BUS (1 << 0) +/* JEDEC Optional Commands */ +#define JEDEC_OPT_CMD_READ_CACHE BIT(1) + struct nand_jedec_params { /* rev info and features block */ /* 'J' 'E' 'S' 'D' */ diff --git a/include/linux/mtd/onfi.h b/include/linux/mtd/onfi.h index a7376f9beddf..55ab2e4d62f9 100644 --- a/include/linux/mtd/onfi.h +++ b/include/linux/mtd/onfi.h @@ -55,6 +55,7 @@ #define ONFI_SUBFEATURE_PARAM_LEN 4 /* ONFI optional commands SET/GET FEATURES supported? */ +#define ONFI_OPT_CMD_READ_CACHE BIT(1) #define ONFI_OPT_CMD_SET_GET_FEATURES BIT(2) struct nand_onfi_params { diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h index 90a141ba2a5a..c29ace15a053 100644 --- a/include/linux/mtd/rawnand.h +++ b/include/linux/mtd/rawnand.h @@ -225,6 +225,7 @@ struct gpio_desc; * struct nand_parameters - NAND generic parameters from the parameter page * @model: Model name * @supports_set_get_features: The NAND chip supports setting/getting features + * @supports_read_cache: The NAND chip supports read cache operations * @set_feature_list: Bitmap of features that can be set * @get_feature_list: Bitmap of features that can be get * @onfi: ONFI specific parameters @@ -233,6 +234,7 @@ struct nand_parameters { /* Generic parameters */ const char *model; bool supports_set_get_features; + bool supports_read_cache; DECLARE_BITMAP(set_feature_list, ONFI_FEATURE_NUMBER); DECLARE_BITMAP(get_feature_list, ONFI_FEATURE_NUMBER); -- cgit v1.2.3 From f447318fb1d156b4b6da79266724c7ee347d1b59 Mon Sep 17 00:00:00 2001 From: Martin Kurbanov Date: Mon, 2 Oct 2023 17:04:58 +0300 Subject: mtd: spinand: add support for FORESEE F35SQA002G Add support for FORESEE F35SQA002G SPI NAND. Datasheet: https://www.longsys.com/uploads/LM-00006FORESEEF35SQA002GDatasheet_1650183701.pdf Signed-off-by: Martin Kurbanov Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20231002140458.147605-1-mmkurbanov@salutedevices.com --- drivers/mtd/nand/spi/Makefile | 2 +- drivers/mtd/nand/spi/core.c | 1 + drivers/mtd/nand/spi/foresee.c | 95 ++++++++++++++++++++++++++++++++++++++++++ include/linux/mtd/spinand.h | 1 + 4 files changed, 98 insertions(+), 1 deletion(-) create mode 100644 drivers/mtd/nand/spi/foresee.c (limited to 'include/linux') diff --git a/drivers/mtd/nand/spi/Makefile b/drivers/mtd/nand/spi/Makefile index cd8b66bf7740..19cc77288ebb 100644 --- a/drivers/mtd/nand/spi/Makefile +++ b/drivers/mtd/nand/spi/Makefile @@ -1,4 +1,4 @@ # SPDX-License-Identifier: GPL-2.0 -spinand-objs := core.o alliancememory.o ato.o esmt.o gigadevice.o macronix.o +spinand-objs := core.o alliancememory.o ato.o esmt.o foresee.o gigadevice.o macronix.o spinand-objs += micron.o paragon.o toshiba.o winbond.o xtx.o obj-$(CONFIG_MTD_SPI_NAND) += spinand.o diff --git a/drivers/mtd/nand/spi/core.c b/drivers/mtd/nand/spi/core.c index 393ff37f0d23..849ccfedbc72 100644 --- a/drivers/mtd/nand/spi/core.c +++ b/drivers/mtd/nand/spi/core.c @@ -940,6 +940,7 @@ static const struct spinand_manufacturer *spinand_manufacturers[] = { &alliancememory_spinand_manufacturer, &ato_spinand_manufacturer, &esmt_c8_spinand_manufacturer, + &foresee_spinand_manufacturer, &gigadevice_spinand_manufacturer, ¯onix_spinand_manufacturer, µn_spinand_manufacturer, diff --git a/drivers/mtd/nand/spi/foresee.c b/drivers/mtd/nand/spi/foresee.c new file mode 100644 index 000000000000..e0d2d9257045 --- /dev/null +++ b/drivers/mtd/nand/spi/foresee.c @@ -0,0 +1,95 @@ +// SPDX-License-Identifier: (GPL-2.0+ OR MIT) +/* + * Copyright (c) 2023, SberDevices. All Rights Reserved. + * + * Author: Martin Kurbanov + */ + +#include +#include +#include + +#define SPINAND_MFR_FORESEE 0xCD + +static SPINAND_OP_VARIANTS(read_cache_variants, + SPINAND_PAGE_READ_FROM_CACHE_X4_OP(0, 1, NULL, 0), + SPINAND_PAGE_READ_FROM_CACHE_X2_OP(0, 1, NULL, 0), + SPINAND_PAGE_READ_FROM_CACHE_OP(true, 0, 1, NULL, 0), + SPINAND_PAGE_READ_FROM_CACHE_OP(false, 0, 1, NULL, 0)); + +static SPINAND_OP_VARIANTS(write_cache_variants, + SPINAND_PROG_LOAD_X4(true, 0, NULL, 0), + SPINAND_PROG_LOAD(true, 0, NULL, 0)); + +static SPINAND_OP_VARIANTS(update_cache_variants, + SPINAND_PROG_LOAD_X4(false, 0, NULL, 0), + SPINAND_PROG_LOAD(false, 0, NULL, 0)); + +static int f35sqa002g_ooblayout_ecc(struct mtd_info *mtd, int section, + struct mtd_oob_region *region) +{ + return -ERANGE; +} + +static int f35sqa002g_ooblayout_free(struct mtd_info *mtd, int section, + struct mtd_oob_region *region) +{ + if (section) + return -ERANGE; + + /* Reserve 2 bytes for the BBM. */ + region->offset = 2; + region->length = 62; + + return 0; +} + +static const struct mtd_ooblayout_ops f35sqa002g_ooblayout = { + .ecc = f35sqa002g_ooblayout_ecc, + .free = f35sqa002g_ooblayout_free, +}; + +static int f35sqa002g_ecc_get_status(struct spinand_device *spinand, u8 status) +{ + struct nand_device *nand = spinand_to_nand(spinand); + + switch (status & STATUS_ECC_MASK) { + case STATUS_ECC_NO_BITFLIPS: + return 0; + + case STATUS_ECC_HAS_BITFLIPS: + return nanddev_get_ecc_conf(nand)->strength; + + default: + break; + } + + /* More than 1-bit error was detected in one or more sectors and + * cannot be corrected. + */ + return -EBADMSG; +} + +static const struct spinand_info foresee_spinand_table[] = { + SPINAND_INFO("F35SQA002G", + SPINAND_ID(SPINAND_READID_METHOD_OPCODE_DUMMY, 0x72, 0x72), + NAND_MEMORG(1, 2048, 64, 64, 2048, 40, 1, 1, 1), + NAND_ECCREQ(1, 512), + SPINAND_INFO_OP_VARIANTS(&read_cache_variants, + &write_cache_variants, + &update_cache_variants), + SPINAND_HAS_QE_BIT, + SPINAND_ECCINFO(&f35sqa002g_ooblayout, + f35sqa002g_ecc_get_status)), +}; + +static const struct spinand_manufacturer_ops foresee_spinand_manuf_ops = { +}; + +const struct spinand_manufacturer foresee_spinand_manufacturer = { + .id = SPINAND_MFR_FORESEE, + .name = "FORESEE", + .chips = foresee_spinand_table, + .nchips = ARRAY_SIZE(foresee_spinand_table), + .ops = &foresee_spinand_manuf_ops, +}; diff --git a/include/linux/mtd/spinand.h b/include/linux/mtd/spinand.h index 3e285c09d16d..badb4c1ac079 100644 --- a/include/linux/mtd/spinand.h +++ b/include/linux/mtd/spinand.h @@ -263,6 +263,7 @@ struct spinand_manufacturer { extern const struct spinand_manufacturer alliancememory_spinand_manufacturer; extern const struct spinand_manufacturer ato_spinand_manufacturer; extern const struct spinand_manufacturer esmt_c8_spinand_manufacturer; +extern const struct spinand_manufacturer foresee_spinand_manufacturer; extern const struct spinand_manufacturer gigadevice_spinand_manufacturer; extern const struct spinand_manufacturer macronix_spinand_manufacturer; extern const struct spinand_manufacturer micron_spinand_manufacturer; -- cgit v1.2.3 From 0da28d5fc808dfcfbc910870b4b0277c1a7ccb6c Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Fri, 15 Sep 2023 10:53:27 +0200 Subject: drm: renesas: shmobile: Remove backlight support Backlight support should be implemented by panels, not by the LCDC driver. As the feature is currently unused anyway, remove it. Signed-off-by: Laurent Pinchart [geert: Cleanups] Reviewed-by: Laurent Pinchart Signed-off-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/69707650245bc2193d072f24723d4d5482ea590b.1694767209.git.geert+renesas@glider.be --- drivers/gpu/drm/renesas/shmobile/Makefile | 3 +- .../gpu/drm/renesas/shmobile/shmob_drm_backlight.c | 82 ---------------------- .../gpu/drm/renesas/shmobile/shmob_drm_backlight.h | 19 ----- drivers/gpu/drm/renesas/shmobile/shmob_drm_crtc.c | 33 ++------- drivers/gpu/drm/renesas/shmobile/shmob_drm_crtc.h | 8 --- drivers/gpu/drm/renesas/shmobile/shmob_drm_drv.h | 2 +- drivers/gpu/drm/renesas/shmobile/shmob_drm_kms.c | 2 +- include/linux/platform_data/shmob_drm.h | 8 --- 8 files changed, 7 insertions(+), 150 deletions(-) delete mode 100644 drivers/gpu/drm/renesas/shmobile/shmob_drm_backlight.c delete mode 100644 drivers/gpu/drm/renesas/shmobile/shmob_drm_backlight.h (limited to 'include/linux') diff --git a/drivers/gpu/drm/renesas/shmobile/Makefile b/drivers/gpu/drm/renesas/shmobile/Makefile index 861edafed856..2679555d61a7 100644 --- a/drivers/gpu/drm/renesas/shmobile/Makefile +++ b/drivers/gpu/drm/renesas/shmobile/Makefile @@ -1,6 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 -shmob-drm-y := shmob_drm_backlight.o \ - shmob_drm_crtc.o \ +shmob-drm-y := shmob_drm_crtc.o \ shmob_drm_drv.o \ shmob_drm_kms.o \ shmob_drm_plane.o diff --git a/drivers/gpu/drm/renesas/shmobile/shmob_drm_backlight.c b/drivers/gpu/drm/renesas/shmobile/shmob_drm_backlight.c deleted file mode 100644 index 794573badfe8..000000000000 --- a/drivers/gpu/drm/renesas/shmobile/shmob_drm_backlight.c +++ /dev/null @@ -1,82 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0+ -/* - * shmob_drm_backlight.c -- SH Mobile DRM Backlight - * - * Copyright (C) 2012 Renesas Electronics Corporation - * - * Laurent Pinchart (laurent.pinchart@ideasonboard.com) - */ - -#include - -#include "shmob_drm_backlight.h" -#include "shmob_drm_crtc.h" -#include "shmob_drm_drv.h" - -static int shmob_drm_backlight_update(struct backlight_device *bdev) -{ - struct shmob_drm_connector *scon = bl_get_data(bdev); - struct shmob_drm_device *sdev = scon->connector.dev->dev_private; - const struct shmob_drm_backlight_data *bdata = &sdev->pdata->backlight; - int brightness = backlight_get_brightness(bdev); - - return bdata->set_brightness(brightness); -} - -static int shmob_drm_backlight_get_brightness(struct backlight_device *bdev) -{ - struct shmob_drm_connector *scon = bl_get_data(bdev); - struct shmob_drm_device *sdev = scon->connector.dev->dev_private; - const struct shmob_drm_backlight_data *bdata = &sdev->pdata->backlight; - - return bdata->get_brightness(); -} - -static const struct backlight_ops shmob_drm_backlight_ops = { - .options = BL_CORE_SUSPENDRESUME, - .update_status = shmob_drm_backlight_update, - .get_brightness = shmob_drm_backlight_get_brightness, -}; - -void shmob_drm_backlight_dpms(struct shmob_drm_connector *scon, int mode) -{ - if (scon->backlight == NULL) - return; - - scon->backlight->props.power = mode == DRM_MODE_DPMS_ON - ? FB_BLANK_UNBLANK : FB_BLANK_POWERDOWN; - backlight_update_status(scon->backlight); -} - -int shmob_drm_backlight_init(struct shmob_drm_connector *scon) -{ - struct shmob_drm_device *sdev = scon->connector.dev->dev_private; - const struct shmob_drm_backlight_data *bdata = &sdev->pdata->backlight; - struct drm_connector *connector = &scon->connector; - struct drm_device *dev = connector->dev; - struct backlight_device *backlight; - - if (!bdata->max_brightness) - return 0; - - backlight = backlight_device_register(bdata->name, dev->dev, scon, - &shmob_drm_backlight_ops, NULL); - if (IS_ERR(backlight)) { - dev_err(dev->dev, "unable to register backlight device: %ld\n", - PTR_ERR(backlight)); - return PTR_ERR(backlight); - } - - backlight->props.max_brightness = bdata->max_brightness; - backlight->props.brightness = bdata->max_brightness; - backlight->props.power = FB_BLANK_POWERDOWN; - backlight_update_status(backlight); - - scon->backlight = backlight; - return 0; -} - -void shmob_drm_backlight_exit(struct shmob_drm_connector *scon) -{ - backlight_device_unregister(scon->backlight); -} diff --git a/drivers/gpu/drm/renesas/shmobile/shmob_drm_backlight.h b/drivers/gpu/drm/renesas/shmobile/shmob_drm_backlight.h deleted file mode 100644 index d9abb7a60be5..000000000000 --- a/drivers/gpu/drm/renesas/shmobile/shmob_drm_backlight.h +++ /dev/null @@ -1,19 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0+ */ -/* - * shmob_drm_backlight.h -- SH Mobile DRM Backlight - * - * Copyright (C) 2012 Renesas Electronics Corporation - * - * Laurent Pinchart (laurent.pinchart@ideasonboard.com) - */ - -#ifndef __SHMOB_DRM_BACKLIGHT_H__ -#define __SHMOB_DRM_BACKLIGHT_H__ - -struct shmob_drm_connector; - -void shmob_drm_backlight_dpms(struct shmob_drm_connector *scon, int mode); -int shmob_drm_backlight_init(struct shmob_drm_connector *scon); -void shmob_drm_backlight_exit(struct shmob_drm_connector *scon); - -#endif /* __SHMOB_DRM_BACKLIGHT_H__ */ diff --git a/drivers/gpu/drm/renesas/shmobile/shmob_drm_crtc.c b/drivers/gpu/drm/renesas/shmobile/shmob_drm_crtc.c index 2cdf8f9b06e5..db9d8d440144 100644 --- a/drivers/gpu/drm/renesas/shmobile/shmob_drm_crtc.c +++ b/drivers/gpu/drm/renesas/shmobile/shmob_drm_crtc.c @@ -7,7 +7,6 @@ * Laurent Pinchart (laurent.pinchart@ideasonboard.com) */ -#include #include #include @@ -24,7 +23,6 @@ #include #include -#include "shmob_drm_backlight.h" #include "shmob_drm_crtc.h" #include "shmob_drm_drv.h" #include "shmob_drm_kms.h" @@ -487,21 +485,9 @@ int shmob_drm_crtc_create(struct shmob_drm_device *sdev) * Encoder */ -#define to_shmob_encoder(e) \ - container_of(e, struct shmob_drm_encoder, encoder) - static void shmob_drm_encoder_dpms(struct drm_encoder *encoder, int mode) { - struct shmob_drm_encoder *senc = to_shmob_encoder(encoder); - struct shmob_drm_device *sdev = encoder->dev->dev_private; - struct shmob_drm_connector *scon = &sdev->connector; - - if (senc->dpms == mode) - return; - - shmob_drm_backlight_dpms(scon, mode); - - senc->dpms = mode; + /* No-op, everything is handled in the CRTC code. */ } static bool shmob_drm_encoder_mode_fixup(struct drm_encoder *encoder, @@ -553,11 +539,9 @@ static const struct drm_encoder_helper_funcs encoder_helper_funcs = { int shmob_drm_encoder_create(struct shmob_drm_device *sdev) { - struct drm_encoder *encoder = &sdev->encoder.encoder; + struct drm_encoder *encoder = &sdev->encoder; int ret; - sdev->encoder.dpms = DRM_MODE_DPMS_OFF; - encoder->possible_crtcs = 1; ret = drm_simple_encoder_init(sdev->ddev, encoder, @@ -622,9 +606,6 @@ static const struct drm_connector_helper_funcs connector_helper_funcs = { static void shmob_drm_connector_destroy(struct drm_connector *connector) { - struct shmob_drm_connector *scon = to_shmob_connector(connector); - - shmob_drm_backlight_exit(scon); drm_connector_unregister(connector); drm_connector_cleanup(connector); } @@ -653,13 +634,9 @@ int shmob_drm_connector_create(struct shmob_drm_device *sdev, drm_connector_helper_add(connector, &connector_helper_funcs); - ret = shmob_drm_backlight_init(&sdev->connector); - if (ret < 0) - goto err_cleanup; - ret = drm_connector_attach_encoder(connector, encoder); if (ret < 0) - goto err_backlight; + goto error; drm_helper_connector_dpms(connector, DRM_MODE_DPMS_OFF); drm_object_property_set_value(&connector->base, @@ -667,9 +644,7 @@ int shmob_drm_connector_create(struct shmob_drm_device *sdev, return 0; -err_backlight: - shmob_drm_backlight_exit(&sdev->connector); -err_cleanup: +error: drm_connector_cleanup(connector); return ret; } diff --git a/drivers/gpu/drm/renesas/shmobile/shmob_drm_crtc.h b/drivers/gpu/drm/renesas/shmobile/shmob_drm_crtc.h index 21718843f46d..bce692626945 100644 --- a/drivers/gpu/drm/renesas/shmobile/shmob_drm_crtc.h +++ b/drivers/gpu/drm/renesas/shmobile/shmob_drm_crtc.h @@ -14,7 +14,6 @@ #include #include -struct backlight_device; struct drm_pending_vblank_event; struct shmob_drm_device; struct shmob_drm_format_info; @@ -31,16 +30,9 @@ struct shmob_drm_crtc { bool started; }; -struct shmob_drm_encoder { - struct drm_encoder encoder; - int dpms; -}; - struct shmob_drm_connector { struct drm_connector connector; struct drm_encoder *encoder; - - struct backlight_device *backlight; }; int shmob_drm_crtc_create(struct shmob_drm_device *sdev); diff --git a/drivers/gpu/drm/renesas/shmobile/shmob_drm_drv.h b/drivers/gpu/drm/renesas/shmobile/shmob_drm_drv.h index 4964ddd5ab74..16d830168b2a 100644 --- a/drivers/gpu/drm/renesas/shmobile/shmob_drm_drv.h +++ b/drivers/gpu/drm/renesas/shmobile/shmob_drm_drv.h @@ -35,7 +35,7 @@ struct shmob_drm_device { struct drm_device *ddev; struct shmob_drm_crtc crtc; - struct shmob_drm_encoder encoder; + struct drm_encoder encoder; struct shmob_drm_connector connector; }; diff --git a/drivers/gpu/drm/renesas/shmobile/shmob_drm_kms.c b/drivers/gpu/drm/renesas/shmobile/shmob_drm_kms.c index 8fd360149743..3051318ddc79 100644 --- a/drivers/gpu/drm/renesas/shmobile/shmob_drm_kms.c +++ b/drivers/gpu/drm/renesas/shmobile/shmob_drm_kms.c @@ -159,7 +159,7 @@ int shmob_drm_modeset_init(struct shmob_drm_device *sdev) shmob_drm_crtc_create(sdev); shmob_drm_encoder_create(sdev); - shmob_drm_connector_create(sdev, &sdev->encoder.encoder); + shmob_drm_connector_create(sdev, &sdev->encoder); drm_kms_helper_poll_init(sdev->ddev); diff --git a/include/linux/platform_data/shmob_drm.h b/include/linux/platform_data/shmob_drm.h index d661399b217d..b6b5b6607fb5 100644 --- a/include/linux/platform_data/shmob_drm.h +++ b/include/linux/platform_data/shmob_drm.h @@ -40,13 +40,6 @@ enum shmob_drm_interface { SHMOB_DRM_IFACE_SYS24, /* 24bpp */ }; -struct shmob_drm_backlight_data { - const char *name; - int max_brightness; - int (*get_brightness)(void); - int (*set_brightness)(int brightness); -}; - struct shmob_drm_panel_data { unsigned int width_mm; /* Panel width in mm */ unsigned int height_mm; /* Panel height in mm */ @@ -83,7 +76,6 @@ struct shmob_drm_platform_data { enum shmob_drm_clk_source clk_source; struct shmob_drm_interface_data iface; struct shmob_drm_panel_data panel; - struct shmob_drm_backlight_data backlight; }; #endif /* __SHMOB_DRM_H__ */ -- cgit v1.2.3 From 04ed052f3ab4b3c4c3e8451522ffaa84479bf0fb Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Fri, 15 Sep 2023 10:53:30 +0200 Subject: drm: renesas: shmobile: Remove support for SYS panels SYS panels are not used, and have no defined DT bindings. Remove their support to avoid impeding DT support. It can always be added back later. Signed-off-by: Laurent Pinchart Signed-off-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/4ccca2a5ac05c73ea9fd6e44b8bc443fd9d14e0d.1694767209.git.geert+renesas@glider.be --- drivers/gpu/drm/renesas/shmobile/shmob_drm_crtc.c | 20 ------------------ drivers/gpu/drm/renesas/shmobile/shmob_drm_drv.c | 11 ---------- include/linux/platform_data/shmob_drm.h | 25 ----------------------- 3 files changed, 56 deletions(-) (limited to 'include/linux') diff --git a/drivers/gpu/drm/renesas/shmobile/shmob_drm_crtc.c b/drivers/gpu/drm/renesas/shmobile/shmob_drm_crtc.c index 2ccb2fbfea26..015263ac2100 100644 --- a/drivers/gpu/drm/renesas/shmobile/shmob_drm_crtc.c +++ b/drivers/gpu/drm/renesas/shmobile/shmob_drm_crtc.c @@ -55,24 +55,6 @@ static void shmob_drm_crtc_setup_geometry(struct shmob_drm_crtc *scrtc) | ((idata->flags & SHMOB_DRM_IFACE_FL_DWCNT) ? LDMT1R_DWCNT : 0); lcdc_write(sdev, LDMT1R, value); - if (idata->interface >= SHMOB_DRM_IFACE_SYS8A && - idata->interface <= SHMOB_DRM_IFACE_SYS24) { - /* Setup SYS bus. */ - value = (idata->sys.cs_setup << LDMT2R_CSUP_SHIFT) - | (idata->sys.vsync_active_high ? LDMT2R_RSV : 0) - | (idata->sys.vsync_dir_input ? LDMT2R_VSEL : 0) - | (idata->sys.write_setup << LDMT2R_WCSC_SHIFT) - | (idata->sys.write_cycle << LDMT2R_WCEC_SHIFT) - | (idata->sys.write_strobe << LDMT2R_WCLW_SHIFT); - lcdc_write(sdev, LDMT2R, value); - - value = (idata->sys.read_latch << LDMT3R_RDLC_SHIFT) - | (idata->sys.read_setup << LDMT3R_RCSC_SHIFT) - | (idata->sys.read_cycle << LDMT3R_RCEC_SHIFT) - | (idata->sys.read_strobe << LDMT3R_RCLW_SHIFT); - lcdc_write(sdev, LDMT3R, value); - } - value = ((mode->hdisplay / 8) << 16) /* HDCN */ | (mode->htotal / 8); /* HTCN */ lcdc_write(sdev, LDHCNR, value); @@ -180,8 +162,6 @@ static void shmob_drm_crtc_start(struct shmob_drm_crtc *scrtc) lcdc_write(sdev, LDDCKSTPR, 0); lcdc_wait_bit(sdev, LDDCKSTPR, ~0, 0); - /* TODO: Setup SYS panel */ - /* Setup geometry, format, frame buffer memory and operation mode. */ shmob_drm_crtc_setup_geometry(scrtc); diff --git a/drivers/gpu/drm/renesas/shmobile/shmob_drm_drv.c b/drivers/gpu/drm/renesas/shmobile/shmob_drm_drv.c index 5f3061e88e21..9c3d8b3cf57d 100644 --- a/drivers/gpu/drm/renesas/shmobile/shmob_drm_drv.c +++ b/drivers/gpu/drm/renesas/shmobile/shmob_drm_drv.c @@ -43,17 +43,6 @@ static int shmob_drm_init_interface(struct shmob_drm_device *sdev) [SHMOB_DRM_IFACE_RGB18] = LDMT1R_MIFTYP_RGB18, [SHMOB_DRM_IFACE_RGB24] = LDMT1R_MIFTYP_RGB24, [SHMOB_DRM_IFACE_YUV422] = LDMT1R_MIFTYP_YCBCR, - [SHMOB_DRM_IFACE_SYS8A] = LDMT1R_IFM | LDMT1R_MIFTYP_SYS8A, - [SHMOB_DRM_IFACE_SYS8B] = LDMT1R_IFM | LDMT1R_MIFTYP_SYS8B, - [SHMOB_DRM_IFACE_SYS8C] = LDMT1R_IFM | LDMT1R_MIFTYP_SYS8C, - [SHMOB_DRM_IFACE_SYS8D] = LDMT1R_IFM | LDMT1R_MIFTYP_SYS8D, - [SHMOB_DRM_IFACE_SYS9] = LDMT1R_IFM | LDMT1R_MIFTYP_SYS9, - [SHMOB_DRM_IFACE_SYS12] = LDMT1R_IFM | LDMT1R_MIFTYP_SYS12, - [SHMOB_DRM_IFACE_SYS16A] = LDMT1R_IFM | LDMT1R_MIFTYP_SYS16A, - [SHMOB_DRM_IFACE_SYS16B] = LDMT1R_IFM | LDMT1R_MIFTYP_SYS16B, - [SHMOB_DRM_IFACE_SYS16C] = LDMT1R_IFM | LDMT1R_MIFTYP_SYS16C, - [SHMOB_DRM_IFACE_SYS18] = LDMT1R_IFM | LDMT1R_MIFTYP_SYS18, - [SHMOB_DRM_IFACE_SYS24] = LDMT1R_IFM | LDMT1R_MIFTYP_SYS24, }; if (sdev->pdata->iface.interface >= ARRAY_SIZE(ldmt1r)) { diff --git a/include/linux/platform_data/shmob_drm.h b/include/linux/platform_data/shmob_drm.h index b6b5b6607fb5..b728e24222d9 100644 --- a/include/linux/platform_data/shmob_drm.h +++ b/include/linux/platform_data/shmob_drm.h @@ -27,17 +27,6 @@ enum shmob_drm_interface { SHMOB_DRM_IFACE_RGB18, /* 18bpp */ SHMOB_DRM_IFACE_RGB24, /* 24bpp */ SHMOB_DRM_IFACE_YUV422, /* 16bpp */ - SHMOB_DRM_IFACE_SYS8A, /* 24bpp, 8:8:8 */ - SHMOB_DRM_IFACE_SYS8B, /* 18bpp, 8:8:2 */ - SHMOB_DRM_IFACE_SYS8C, /* 18bpp, 2:8:8 */ - SHMOB_DRM_IFACE_SYS8D, /* 16bpp, 8:8 */ - SHMOB_DRM_IFACE_SYS9, /* 18bpp, 9:9 */ - SHMOB_DRM_IFACE_SYS12, /* 24bpp, 12:12 */ - SHMOB_DRM_IFACE_SYS16A, /* 16bpp */ - SHMOB_DRM_IFACE_SYS16B, /* 18bpp, 16:2 */ - SHMOB_DRM_IFACE_SYS16C, /* 18bpp, 2:16 */ - SHMOB_DRM_IFACE_SYS18, /* 18bpp */ - SHMOB_DRM_IFACE_SYS24, /* 24bpp */ }; struct shmob_drm_panel_data { @@ -46,19 +35,6 @@ struct shmob_drm_panel_data { struct drm_mode_modeinfo mode; }; -struct shmob_drm_sys_interface_data { - unsigned int read_latch:6; - unsigned int read_setup:8; - unsigned int read_cycle:8; - unsigned int read_strobe:8; - unsigned int write_setup:8; - unsigned int write_cycle:8; - unsigned int write_strobe:8; - unsigned int cs_setup:3; - unsigned int vsync_active_high:1; - unsigned int vsync_dir_input:1; -}; - #define SHMOB_DRM_IFACE_FL_DWPOL (1 << 0) /* Rising edge dot clock data latch */ #define SHMOB_DRM_IFACE_FL_DIPOL (1 << 1) /* Active low display enable */ #define SHMOB_DRM_IFACE_FL_DAPOL (1 << 2) /* Active low display data */ @@ -67,7 +43,6 @@ struct shmob_drm_sys_interface_data { struct shmob_drm_interface_data { enum shmob_drm_interface interface; - struct shmob_drm_sys_interface_data sys; unsigned int clk_div; unsigned int flags; }; -- cgit v1.2.3 From 6a6ab0c7162b4b10ce74347e282e1bc81103a48f Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Fri, 15 Sep 2023 10:53:38 +0200 Subject: drm: renesas: shmobile: Use struct videomode in platform data Replace the drm_mode_modeinfo field with videomode that includes more signal polarity flags. This simplifies driver handling of panel modes and prepares for DT support. Signed-off-by: Laurent Pinchart [geert: Simplify] Reviewed-by: Laurent Pinchart Signed-off-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/4312e56de424d94399c6105e7159317eae86c9d5.1694767209.git.geert+renesas@glider.be --- drivers/gpu/drm/renesas/shmobile/Kconfig | 1 + drivers/gpu/drm/renesas/shmobile/shmob_drm_crtc.c | 35 +++++++++-------------- drivers/gpu/drm/renesas/shmobile/shmob_drm_crtc.h | 3 ++ include/linux/platform_data/shmob_drm.h | 11 ++----- 4 files changed, 20 insertions(+), 30 deletions(-) (limited to 'include/linux') diff --git a/drivers/gpu/drm/renesas/shmobile/Kconfig b/drivers/gpu/drm/renesas/shmobile/Kconfig index ba941587ca70..027220b8fe1c 100644 --- a/drivers/gpu/drm/renesas/shmobile/Kconfig +++ b/drivers/gpu/drm/renesas/shmobile/Kconfig @@ -6,6 +6,7 @@ config DRM_SHMOBILE select BACKLIGHT_CLASS_DEVICE select DRM_KMS_HELPER select DRM_GEM_DMA_HELPER + select VIDEOMODE_HELPERS help Choose this option if you have an SH Mobile chipset. If M is selected the module will be called shmob-drm. diff --git a/drivers/gpu/drm/renesas/shmobile/shmob_drm_crtc.c b/drivers/gpu/drm/renesas/shmobile/shmob_drm_crtc.c index f62ae047a48f..b3ef10b7828d 100644 --- a/drivers/gpu/drm/renesas/shmobile/shmob_drm_crtc.c +++ b/drivers/gpu/drm/renesas/shmobile/shmob_drm_crtc.c @@ -23,6 +23,8 @@ #include #include +#include